In [0]:
dbutils.fs.rm("dbfs:/user/hive/warehouse/global_retail_silver.db/silver_orders", recurse=True)

Out[28]: True

In [0]:
%sql
select * FROM global_retail_bronze.bronze_transactions limit 5

transaction_id,customer_id,product_id,quantity,total_amount,transaction_date,payment_method,store_type,ingestion_timestamp
TRX000001,802,425,1,363.4,2020-07-27T00:00:00.000+0000,Debit Card,Physical Store,2025-04-29T11:50:29.359+0000
TRX000002,858,280,6,758.18,2022-08-10T00:00:00.000+0000,Credit Card,Physical Store,2025-04-29T11:50:29.359+0000
TRX000003,658,694,9,748.66,2020-05-22T00:00:00.000+0000,Bank Transfer,Online,2025-04-29T11:50:29.359+0000
TRX000004,516,930,4,933.78,,Bank Transfer,Physical Store,2025-04-29T11:50:29.359+0000
TRX000005,368,104,10,137.28,2022-06-24T00:00:00.000+0000,PayPal,Physical Store,2025-04-29T11:50:29.359+0000


In [0]:
%sql

USE global_retail_silver;
CREATE TABLE IF NOT EXISTS silver_orders (
  transaction_id STRING,
  customer_id STRING,
  product_id STRING,
  quantity INT,
  total_amount DOUBLE,
  transaction_date DATE,
  payment_method STRING,
  store_type STRING,
  order_status STRING,
  last_updated TIMESTAMP
) USING DELTA;

In [0]:
%sql
show tables

database,tableName,isTemporary
global_retail_silver,silver_customers,False
global_retail_silver,silver_orders,False
global_retail_silver,silver_products,False
,bronze_incremental_orders,True
,silver_incremental_orders,True


In [0]:
# Get the last processed timestamp from silver layer
last_processed_df = spark.sql("SELECT MAX(last_updated) as last_processed FROM silver_orders")
last_processed_timestamp = last_processed_df.collect()[0]['last_processed']

if last_processed_timestamp is None:
    last_processed_timestamp = "1900-01-01T00:00:00.000+00:00"

In [0]:
# Create a temporary view of incremental bronze data
spark.sql(f"""
CREATE OR REPLACE TEMPORARY VIEW bronze_incremental_orders AS
SELECT *
FROM global_retail_bronze.bronze_transactions p where  p.ingestion_timestamp > '{last_processed_timestamp}'
""")

Out[38]: DataFrame[]

In [0]:
%sql
select * from bronze_incremental_orders limit 10

transaction_id,customer_id,product_id,quantity,total_amount,transaction_date,payment_method,store_type,ingestion_timestamp
TRX000001,802,425,1,363.4,2020-07-27T00:00:00.000+0000,Debit Card,Physical Store,2025-04-29T11:50:29.359+0000
TRX000002,858,280,6,758.18,2022-08-10T00:00:00.000+0000,Credit Card,Physical Store,2025-04-29T11:50:29.359+0000
TRX000003,658,694,9,748.66,2020-05-22T00:00:00.000+0000,Bank Transfer,Online,2025-04-29T11:50:29.359+0000
TRX000004,516,930,4,933.78,,Bank Transfer,Physical Store,2025-04-29T11:50:29.359+0000
TRX000005,368,104,10,137.28,2022-06-24T00:00:00.000+0000,PayPal,Physical Store,2025-04-29T11:50:29.359+0000
TRX000006,606,409,6,556.88,2020-10-14T00:00:00.000+0000,Credit Card,Online,2025-04-29T11:50:29.359+0000
TRX000007,535,487,7,246.29,2023-08-19T00:00:00.000+0000,Credit Card,Physical Store,2025-04-29T11:50:29.359+0000
TRX000008,87,630,8,-416.82,2020-04-18T00:00:00.000+0000,Credit Card,Physical Store,2025-04-29T11:50:29.359+0000
TRX000009,383,617,10,405.05,2021-02-13T00:00:00.000+0000,Cash,Physical Store,2025-04-29T11:50:29.359+0000
TRX000010,725,938,1,410.03,2020-03-18T00:00:00.000+0000,PayPal,Online,2025-04-29T11:50:29.359+0000


In [0]:
%sql
SELECT MIN(QUANTITY),AVG(QUANTITY),MAX(QUANTITY) FROM bronze_incremental_orders

min(QUANTITY),avg(QUANTITY),max(QUANTITY)
1,5.4578,10


### Transformations
- Quantity and total_amount normalization (setting negatives to zero)
- Date casting to endsure consistent date format
- Order status derivation based on quantity and total_amount
- filter out records with null transaction date, customer_id, or product_id

In [0]:
%sql
CREATE OR REPLACE TEMPORARY VIEW silver_incremental_orders AS(
SELECT
  transaction_id,
  customer_id,
  product_id,
  CASE
    WHEN quantity < 0 THEN 0
    ELSE quantity
  END AS quantity,
  CASE
    WHEN total_amount < 0 THEN 0
    ELSE total_amount
  END AS total_amount,
  CAST(transaction_date AS DATE) AS transaction_date,
  payment_method,
  store_type,
  CASE
    WHEN (quantity = 0 OR total_amount = 0) THEN 'Cancelled'
    ELSE 'Completed'
  END AS order_status,
  CURRENT_TIMESTAMP() AS last_updated 
FROM bronze_incremental_orders
WHERE product_id IS NOT NULL AND
      customer_id IS NOT NULL AND
      transaction_date IS NOT NULL
)

In [0]:
%sql
SELECT * FROM silver_incremental_orders limit 10

transaction_id,customer_id,product_id,quantity,total_amount,transaction_date,payment_method,store_type,order_status,last_updated
TRX000001,802,425,1,363.4,2020-07-27,Debit Card,Physical Store,Completed,2025-04-29T13:56:30.297+0000
TRX000002,858,280,6,758.18,2022-08-10,Credit Card,Physical Store,Completed,2025-04-29T13:56:30.297+0000
TRX000003,658,694,9,748.66,2020-05-22,Bank Transfer,Online,Completed,2025-04-29T13:56:30.297+0000
TRX000005,368,104,10,137.28,2022-06-24,PayPal,Physical Store,Completed,2025-04-29T13:56:30.297+0000
TRX000006,606,409,6,556.88,2020-10-14,Credit Card,Online,Completed,2025-04-29T13:56:30.297+0000
TRX000007,535,487,7,246.29,2023-08-19,Credit Card,Physical Store,Completed,2025-04-29T13:56:30.297+0000
TRX000008,87,630,8,0.0,2020-04-18,Credit Card,Physical Store,Completed,2025-04-29T13:56:30.297+0000
TRX000009,383,617,10,405.05,2021-02-13,Cash,Physical Store,Completed,2025-04-29T13:56:30.297+0000
TRX000010,725,938,1,410.03,2020-03-18,PayPal,Online,Completed,2025-04-29T13:56:30.297+0000
TRX000011,687,457,8,269.88,2020-06-27,Credit Card,Physical Store,Completed,2025-04-29T13:56:30.297+0000


In [0]:
%sql
select * from silver_incremental_orders where order_status = 'Cancelled'

transaction_id,customer_id,product_id,quantity,total_amount,transaction_date,payment_method,store_type,order_status,last_updated


In [0]:
%sql
MERGE INTO silver_orders AS target
USING silver_incremental_orders AS source
ON target.transaction_id = source.transaction_id
WHEN MATCHED THEN
  UPDATE SET *
WHEN NOT MATCHED THEN
  INSERT *

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
9714,0,0,9714


In [0]:
%sql
select * from silver_orders limit 10

transaction_id,customer_id,product_id,quantity,total_amount,transaction_date,payment_method,store_type,order_status,last_updated
TRX000001,802,425,1,363.4,2020-07-27,Debit Card,Physical Store,Completed,2025-04-29T12:55:40.294+0000
TRX000002,858,280,6,758.18,2022-08-10,Credit Card,Physical Store,Completed,2025-04-29T12:55:40.294+0000
TRX000003,658,694,9,748.66,2020-05-22,Bank Transfer,Online,Completed,2025-04-29T12:55:40.294+0000
TRX000005,368,104,10,137.28,2022-06-24,PayPal,Physical Store,Completed,2025-04-29T12:55:40.294+0000
TRX000006,606,409,6,556.88,2020-10-14,Credit Card,Online,Completed,2025-04-29T12:55:40.294+0000
TRX000007,535,487,7,246.29,2023-08-19,Credit Card,Physical Store,Completed,2025-04-29T12:55:40.294+0000
TRX000008,87,630,8,0.0,2020-04-18,Credit Card,Physical Store,Completed,2025-04-29T12:55:40.294+0000
TRX000009,383,617,10,405.05,2021-02-13,Cash,Physical Store,Completed,2025-04-29T12:55:40.294+0000
TRX000010,725,938,1,410.03,2020-03-18,PayPal,Online,Completed,2025-04-29T12:55:40.294+0000
TRX000011,687,457,8,269.88,2020-06-27,Credit Card,Physical Store,Completed,2025-04-29T12:55:40.294+0000
