# Truncate and Load 
### Create Database and Table in Databricks
###   One-Time Setup: Database & Table Creation

In [0]:
# Define Catalog & Database
catalog_name = "podlakehousedemo"
database_name = "Aqualake_truncate_load"
table_name = "Customers"

# Ensure database exists
spark.sql(f"CREATE DATABASE IF NOT EXISTS {catalog_name}.{database_name}")

# Create Delta table (if it doesn’t exist)
spark.sql(f"""
    CREATE TABLE IF NOT EXISTS {catalog_name}.{database_name}.{table_name} (
        CustomerID STRING,
        FirstName STRING,
        MiddleInitial STRING,
        LastName STRING,
        CityID STRING,
        Address STRING
    ) USING DELTA
""")
print(f"✅ Table {catalog_name}.{database_name}.{table_name} is ready!")


✅ Table podlakehousedemo.Aqualake_truncate_load.Customers is ready!


### Configure Azure Blob Storage Access (SAS Token)

In [0]:
# Set up Azure Blob Storage access using SAS Token
spark.conf.set(
    "fs.azure.account.auth.type.demolakehouse.dfs.core.windows.net", 
    "SAS"
)
spark.conf.set(
    "fs.azure.sas.token.provider.type.demolakehouse.dfs.core.windows.net", 
    "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider"
)
spark.conf.set(
    "fs.azure.sas.fixed.token.demolakehouse.dfs.core.windows.net", 
    "sv=2024-11-04&ss=bfqt&srt=sco&sp=rwdlacupyx&se=2025-03-25T19:05:29Z&st=2025-03-25T11:05:29Z&spr=https&sig=UIVR2nhtuNzAMsqJktqn68NLFuRYUwVNqwjK5gH85ts%3D"
)
print("✅ Azure Storage SAS Token Configured")

# Define the Azure Blob Storage path using abfss protocol
adls_path = "abfss://poddemo@demolakehouse.dfs.core.windows.net/Aqualake_Sourcefiles_TL/"

# Read data from the specified path
try:
    df = spark.read.format("csv").option("header", "true").load(adls_path)
    print("✅ Data read successfully!")
    display(df)
except Exception as e:
    print("❌ Error reading data:", str(e))

✅ Azure Storage SAS Token Configured
✅ Data read successfully!


CustomerID,FirstName,MiddleInitial,LastName,CityID,Address
2194,Walter,F,Sellers,63,22 Hague Street
2195,Marie,F,Sharp,16,20 Oak Street
2196,Kari,C,Cain,28,99 South Rocky Second Road
2197,Tricia,F,Sheppard,44,80 Cowley Parkway
2198,Brendan,F,Glenn,55,40 Rocky Old Street
2199,Gerard,I,Conrad,49,202 Cowley Drive
2200,Jill,K,Sherman,81,66 North Green Hague Avenue
2201,Theresa,T,Barker,85,36 Oak Street
2202,Valerie,K,Sanford,43,12 Clarendon Way
2203,Alison,T,Guzman,4,154 North Milton Freeway


### Read Data from Azure Blob Storage

In [0]:
# Read the CSV file from Azure Blob Storage
df = spark.read.format("csv").option("header", "true").load(adls_path)

# Write to Delta Table
df.write.format("delta").mode("append").saveAsTable(f"{catalog_name}.{database_name}.{table_name}")

print(f"✅ Data loaded into {catalog_name}.{database_name}.{table_name} successfully!")


✅ Data loaded into podlakehousedemo.Aqualake_truncate_load.Customers successfully!


### Truncate Existing Data Before Loading New Data

In [0]:
# Truncate the table
spark.sql(f"TRUNCATE TABLE {catalog_name}.{database_name}.{table_name}")
print(f"✅ {table_name} table truncated successfully!")


✅ Customers table truncated successfully!


In [0]:
%sql
select count(*) from aqualake_truncate_load.customers

count(1)
2880


In [0]:
# Databricks Recurring Steps Script

# Step 1: Configure Azure Blob Storage Access using SAS Token
spark.conf.set(
    "fs.azure.account.auth.type.demolakehouse.dfs.core.windows.net", 
    "SAS"
)
spark.conf.set(
    "fs.azure.sas.token.provider.type.demolakehouse.dfs.core.windows.net", 
    "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider"
)
spark.conf.set(
    "fs.azure.sas.fixed.token.demolakehouse.dfs.core.windows.net", 
    "sv=2024-11-04&ss=bfqt&srt=sco&sp=rwdlacupyx&se=2025-03-25T19:05:29Z&st=2025-03-25T11:05:29Z&spr=https&sig=UIVR2nhtuNzAMsqJktqn68NLFuRYUwVNqwjK5gH85ts%3D"
)
print("✅ Azure Storage SAS Token Configured")

# Step 2: Define Azure Blob Storage Path
adls_path = "abfss://poddemo@demolakehouse.dfs.core.windows.net/Aqualake_Sourcefiles_TL/"

# Step 3: Read Data from Azure Blob Storage
try:
    df = spark.read.format("csv").option("header", "true").load(adls_path)
    source_count = df.count()
    print(f"✅ Data read successfully! Source Count: {source_count}")
    display(df)  # Display first few rows
except Exception as e:
    print("❌ Error reading data:", str(e))

# Step 4: Truncate Existing Data Before Loading New Data
spark.sql(f"TRUNCATE TABLE {catalog_name}.{database_name}.{table_name}")
print(f"✅ {table_name} table truncated successfully!")

# Step 5: Load Fresh Data into Delta Table
df.write.format("delta").mode("append").saveAsTable(f"{catalog_name}.{database_name}.{table_name}")

# Step 6: Validate the Load
target_count = spark.sql(f"SELECT COUNT(*) FROM {catalog_name}.{database_name}.{table_name}").collect()[0][0]
print(f"✅ Data loaded into {catalog_name}.{database_name}.{table_name} successfully!")
print(f"🔄 Validation: Source Count = {source_count}, Target Count = {target_count}")

# Step 7: Display Sample Records from Target Table
spark.sql(f"SELECT * FROM {catalog_name}.{database_name}.{table_name} LIMIT 10").show()


✅ Azure Storage SAS Token Configured
✅ Data read successfully! Source Count: 2880


CustomerID,FirstName,MiddleInitial,LastName,CityID,Address
2194,Walter,F,Sellers,63,22 Hague Street
2195,Marie,F,Sharp,16,20 Oak Street
2196,Kari,C,Cain,28,99 South Rocky Second Road
2197,Tricia,F,Sheppard,44,80 Cowley Parkway
2198,Brendan,F,Glenn,55,40 Rocky Old Street
2199,Gerard,I,Conrad,49,202 Cowley Drive
2200,Jill,K,Sherman,81,66 North Green Hague Avenue
2201,Theresa,T,Barker,85,36 Oak Street
2202,Valerie,K,Sanford,43,12 Clarendon Way
2203,Alison,T,Guzman,4,154 North Milton Freeway


✅ Customers table truncated successfully!
✅ Data loaded into podlakehousedemo.Aqualake_truncate_load.Customers successfully!
🔄 Validation: Source Count = 2880, Target Count = 2880
+----------+---------+-------------+--------+------+--------------------+
|CustomerID|FirstName|MiddleInitial|LastName|CityID|             Address|
+----------+---------+-------------+--------+------+--------------------+
|      2194|   Walter|            F| Sellers|    63|     22 Hague Street|
|      2195|    Marie|            F|   Sharp|    16|       20 Oak Street|
|      2196|     Kari|            C|    Cain|    28|99 South Rocky Se...|
|      2197|   Tricia|            F|Sheppard|    44|   80 Cowley Parkway|
|      2198|  Brendan|            F|   Glenn|    55| 40 Rocky Old Street|
|      2199|   Gerard|            I|  Conrad|    49|    202 Cowley Drive|
|      2200|     Jill|            K| Sherman|    81|66 North Green Ha...|
|      2201|  Theresa|            T|  Barker|    85|       36 Oak Street|
|     