In [0]:
dbutils.fs.rm("dbfs:/user/hive/warehouse/global_retail_silver.db/silver_products", recurse=True)

Out[1]: True

In [0]:
%sql
select * FROM global_retail_bronze.bronze_products limit 5

brand,category,is_active,name,price,product_id,rating,stock_quantity,ingestion_timestamp
BeautyGlow,Toys,True,Product 1,995.73,1,3.5,989,2025-04-29T11:49:54.382+0000
GardenMaster,Garden,True,Product 2,497.76,2,3.8,495,2025-04-29T11:49:54.382+0000
BeautyGlow,Electronics,True,Product 3,331.63,3,4.6,10,2025-04-29T11:49:54.382+0000
TechPro,Beauty,False,Product 4,798.83,4,4.7,683,2025-04-29T11:49:54.382+0000
HomeSmart,Automotive,False,Product 5,-454.98,5,4.4,719,2025-04-29T11:49:54.382+0000


In [0]:
%sql

USE global_retail_silver;
CREATE TABLE IF NOT EXISTS silver_products (
  brand STRING,
  category STRING,
  is_active BOOLEAN,
  name STRING,
  price DOUBLE,
  product_id STRING,
  rating DOUBLE,
  stock_quantity INT,
  price_category STRING,
  stock_status STRING,
  last_updated TIMESTAMP
) USING DELTA;

In [0]:
%sql
show tables

database,tableName,isTemporary
global_retail_silver,silver_customers,False
global_retail_silver,silver_products,False


In [0]:
# Get the last processed timestamp from silver layer
last_processed_df = spark.sql("SELECT MAX(last_updated) as last_processed FROM silver_products")
last_processed_timestamp = last_processed_df.collect()[0]['last_processed']

if last_processed_timestamp is None:
    last_processed_timestamp = "1900-01-01T00:00:00.000+00:00"

In [0]:
# Create a temporary view of incremental bronze data
spark.sql(f"""
CREATE OR REPLACE TEMPORARY VIEW bronze_incremental_products AS
SELECT *
FROM global_retail_bronze.bronze_products p where  p.ingestion_timestamp > '{last_processed_timestamp}'
""")

Out[6]: DataFrame[]

In [0]:
%sql
select * from bronze_incremental_products limit 10

brand,category,is_active,name,price,product_id,rating,stock_quantity,ingestion_timestamp
BeautyGlow,Toys,True,Product 1,995.73,1,3.5,989,2025-04-29T11:49:54.382+0000
GardenMaster,Garden,True,Product 2,497.76,2,3.8,495,2025-04-29T11:49:54.382+0000
BeautyGlow,Electronics,True,Product 3,331.63,3,4.6,10,2025-04-29T11:49:54.382+0000
TechPro,Beauty,False,Product 4,798.83,4,4.7,683,2025-04-29T11:49:54.382+0000
HomeSmart,Automotive,False,Product 5,-454.98,5,4.4,719,2025-04-29T11:49:54.382+0000
BookWorm,Electronics,False,Product 6,645.3,6,2.2,823,2025-04-29T11:49:54.382+0000
FashionX,Automotive,False,Product 7,549.08,7,1.1,999,2025-04-29T11:49:54.382+0000
TechPro,Books,False,Product 8,982.36,8,2.4,542,2025-04-29T11:49:54.382+0000
FashionX,Toys,True,Product 9,307.14,9,1.0,671,2025-04-29T11:49:54.382+0000
BeautyGlow,Garden,False,Product 10,871.38,10,3.4,975,2025-04-29T11:49:54.382+0000


In [0]:
%sql
select max(stock_quantity),avg(stock_quantity),min(stock_quantity) from bronze_incremental_products
-- select price from bronze_incremental_products where price<0

max(stock_quantity),avg(stock_quantity),min(stock_quantity)
999,493.472,1


### Transformations
- Price normalization- setting negatives to zero
- stock quantity normalization - setting negatives to zero
- rating normalization - clamping between 0 and 5
- price categorization - premium, standard, budget
- stock status calculation - out of stock, low stock, moderate stock, sufficient stock

In [0]:
%sql
CREATE OR REPLACE TEMPORARY VIEW silver_incremental_products AS(
SELECT
  product_id,
  name,
  category,
  brand,
  is_active,
  CASE 
    WHEN price < 0 THEN 0
    ELSE price 
  END AS price,
  CASE 
    WHEN stock_quantity < 0 THEN 0
    ELSE stock_quantity 
  END AS stock_quantity,
  CASE 
    WHEN rating < 0 THEN 0
    WHEN rating > 5 THEN 5
    ELSE rating
  END AS rating,
  CASE 
    WHEN price >700 THEN 'Premium'
    WHEN price >400 THEN 'Standard'
    ELSE 'Budget' 
  END AS price_category,
  CASE 
    WHEN stock_quantity =0 THEN 'Out of Stock'
    WHEN stock_quantity >50 THEN 'Low Stock'
    WHEN stock_quantity >300 THEN 'Moderate stock'
    ELSE 'Sufficient Stock' 
  END AS stock_status,
  CURRENT_TIMESTAMP AS last_updated
FROM bronze_incremental_products
WHERE name IS NOT NULL and category IS NOT NULL
)

In [0]:
%sql
SELECT * FROM silver_incremental_products limit 10

product_id,name,category,brand,is_active,price,stock_quantity,rating,price_category,stock_status,last_updated
1,Product 1,Toys,BeautyGlow,True,995.73,989,3.5,Premium,Low Stock,2025-04-29T13:55:45.196+0000
2,Product 2,Garden,GardenMaster,True,497.76,495,3.8,Standard,Low Stock,2025-04-29T13:55:45.196+0000
3,Product 3,Electronics,BeautyGlow,True,331.63,10,4.6,Budget,Sufficient Stock,2025-04-29T13:55:45.196+0000
4,Product 4,Beauty,TechPro,False,798.83,683,4.7,Premium,Low Stock,2025-04-29T13:55:45.196+0000
5,Product 5,Automotive,HomeSmart,False,0.0,719,4.4,Budget,Low Stock,2025-04-29T13:55:45.196+0000
6,Product 6,Electronics,BookWorm,False,645.3,823,2.2,Standard,Low Stock,2025-04-29T13:55:45.196+0000
7,Product 7,Automotive,FashionX,False,549.08,999,1.1,Standard,Low Stock,2025-04-29T13:55:45.196+0000
8,Product 8,Books,TechPro,False,982.36,542,2.4,Premium,Low Stock,2025-04-29T13:55:45.196+0000
9,Product 9,Toys,FashionX,True,307.14,671,1.0,Budget,Low Stock,2025-04-29T13:55:45.196+0000
10,Product 10,Garden,BeautyGlow,False,871.38,975,3.4,Premium,Low Stock,2025-04-29T13:55:45.196+0000


In [0]:
%sql
MERGE INTO silver_products AS target
USING silver_incremental_products AS source
ON target.product_id = source.product_id
WHEN MATCHED THEN
  UPDATE SET *
WHEN NOT MATCHED THEN
  INSERT *

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
1000,0,0,1000


In [0]:
%sql
select * from silver_products limit 10

brand,category,is_active,name,price,product_id,rating,stock_quantity,price_category,stock_status,last_updated
BeautyGlow,Toys,True,Product 1,995.73,1,3.5,989,Premium,Low Stock,2025-04-29T11:55:39.559+0000
GardenMaster,Garden,True,Product 2,497.76,2,3.8,495,Standard,Low Stock,2025-04-29T11:55:39.559+0000
BeautyGlow,Electronics,True,Product 3,331.63,3,4.6,10,Budget,Sufficient Stock,2025-04-29T11:55:39.559+0000
TechPro,Beauty,False,Product 4,798.83,4,4.7,683,Premium,Low Stock,2025-04-29T11:55:39.559+0000
HomeSmart,Automotive,False,Product 5,0.0,5,4.4,719,Budget,Low Stock,2025-04-29T11:55:39.559+0000
BookWorm,Electronics,False,Product 6,645.3,6,2.2,823,Standard,Low Stock,2025-04-29T11:55:39.559+0000
FashionX,Automotive,False,Product 7,549.08,7,1.1,999,Standard,Low Stock,2025-04-29T11:55:39.559+0000
TechPro,Books,False,Product 8,982.36,8,2.4,542,Premium,Low Stock,2025-04-29T11:55:39.559+0000
FashionX,Toys,True,Product 9,307.14,9,1.0,671,Budget,Low Stock,2025-04-29T11:55:39.559+0000
BeautyGlow,Garden,False,Product 10,871.38,10,3.4,975,Premium,Low Stock,2025-04-29T11:55:39.559+0000
