In [0]:
%sql
CREATE VOLUME IF NOT EXISTS main.default.raw_data;


In [0]:
%sql
SHOW VOLUMES IN main.default;


database,volume_name
default,raw_data
default,sales_volume


In [0]:
%sql
DESCRIBE VOLUME main.default.raw_data;


name,catalog,database,owner,storage_location,volume_type,comment,securable_type,securable_kind
raw_data,main,default,testwest1221@gmail.com,,MANAGED,,VOLUME,VOLUME_DB_STORAGE


In [0]:
dbutils.widgets.removeAll()


In [0]:
dbutils.widgets.text(
    "source_path",
    "/Volumes/main/default/raw_data/sales.csv"
)

dbutils.widgets.text(
    "bronze_table",
    "main.default.bronze_sales"
)

source_path = dbutils.widgets.get("source_path")
bronze_table = dbutils.widgets.get("bronze_table")

df = spark.read.option("header", True).csv(source_path)

df.write.format("delta") \
  .mode("overwrite") \
  .saveAsTable(bronze_table)

print("✅ Bronze layer completed")


✅ Bronze layer completed


In [0]:
%sql
SELECT * FROM main.default.bronze_sales;


date,order_id,customer_id,category,price,quantity,revenue
2023-01-01,20230101-489713,8726,Toys,63.95,4,255.8
2023-01-01,20230101-505413,3044,Beauty,27.82,2,55.64
2023-01-01,20230101-427515,5200,Electronics,80.12,1,80.12
2023-01-01,20230101-516646,2169,Home,83.68,3,251.04
2023-01-01,20230101-315601,7141,Electronics,82.78,2,165.56
2023-01-01,20230101-320932,2030,Beauty,47.07,1,47.07
2023-01-01,20230101-412748,4930,Electronics,146.22,4,584.88
2023-01-01,20230101-476162,8325,Electronics,45.91,1,45.91
2023-01-01,20230101-547596,7266,Home,43.17,3,129.51
2023-01-01,20230101-787805,9721,Toys,12.23,1,12.23


In [0]:
# Clear old widgets (safe practice)
dbutils.widgets.removeAll()

# Create widgets
dbutils.widgets.text(
    "bronze_table",
    "main.default.bronze_sales"
)

dbutils.widgets.text(
    "silver_table",
    "main.default.silver_sales"
)

# Read widget values
bronze_table = dbutils.widgets.get("bronze_table")
silver_table = dbutils.widgets.get("silver_table")

# Read data from Bronze
df = spark.read.table(bronze_table)

# ---- Silver Transformations ----
# 1. Remove duplicates
# 2. Remove nulls
df_clean = df.dropDuplicates().dropna()

# Write to Silver table
df_clean.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable(silver_table)

print("✅ Silver layer completed:", silver_table)


✅ Silver layer completed: main.default.silver_sales


In [0]:
%sql
SELECT COUNT(*) FROM main.default.silver_sales;


COUNT(*)
129675


In [0]:
# Clear widgets (best practice)
dbutils.widgets.removeAll()

# Create widgets
dbutils.widgets.text(
    "silver_table",
    "main.default.silver_sales"
)

dbutils.widgets.text(
    "gold_table",
    "main.default.gold_sales"
)

# Read widget values
silver_table = dbutils.widgets.get("silver_table")
gold_table = dbutils.widgets.get("gold_table")

# Read Silver data
df = spark.read.table(silver_table)

# ---- Gold Transformations ----
# Example business logic:
# Total records per category
df_gold = df.groupBy("category").count()

# Write to Gold table
df_gold.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable(gold_table)

print("✅ Gold layer completed:", gold_table)


✅ Gold layer completed: main.default.gold_sales


In [0]:
%sql
SELECT * FROM main.default.gold_sales;


category,count
Toys,20881
Home,28576
Beauty,23232
Grocery,20765
Electronics,36221
