In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [0]:
%sql
USE CATALOG amazonretail;
USE SCHEMA bronze;

### Reading Product file

In [0]:
df_product = spark.read.format("parquet")\
                        .option("header", "true")\
                        .option("inferSchema", "true")\
                        .load("abfss://bronze@amazonprimeadls.dfs.core.windows.net/product")
display(df_product)

product_id,product_name,category,price
1,Wireless Mouse,Electronics,799.99
2,Bluetooth Speaker,Electronics,1299.49
3,Yoga Mat,Fitness,499.0
4,Laptop Stand,Accessories,999.99
5,Notebook Set,Stationery,149.0
6,Water Bottle,Fitness,299.0
7,Smartwatch,Electronics,4999.0
8,Desk Organizer,Accessories,399.0
9,Dumbbell Set,Fitness,1999.0
10,Pen Drive 32GB,Electronics,599.0


### Reading Customer file

In [0]:
df_customer = spark.read.format("parquet")\
                        .option("header", "true")\
                        .option("inferSchema", "true")\
                        .load("abfss://bronze@amazonprimeadls.dfs.core.windows.net/customer")
display(df_customer)

customer_id,first_name,last_name,email,phone,city,registration_date
101,Ravi,Yadav,user101@example.com,9887654321,Delhi,2023-09-14
102,Nina,Joshi,user102@example.com,9876543210,Mumbai,2024-01-21
103,Sonal,Sharma,user103@example.com,9865432109,Bangalore,2023-07-10
104,Karan,Patel,user104@example.com,9854321098,Hyderabad,2024-02-05
105,Riya,Singh,user105@example.com,9843210987,Chennai,2023-06-28
106,Ajay,Mishra,user106@example.com,9832109876,Pune,2024-03-10
107,Priya,Kapoor,user107@example.com,9821098765,Ahmedabad,2023-05-12
108,Rahul,Verma,user108@example.com,9810987654,Kolkata,2023-08-19
109,Pooja,Mehta,user109@example.com,9809876543,Delhi,2024-04-01
110,Deepak,Nair,user110@example.com,9798765432,Mumbai,2023-10-14


### Reading Transaction file

In [0]:
df_transaction = spark.read.format("parquet")\
                        .option("header", "true")\
                        .option("inferSchema", "true")\
                        .load("abfss://bronze@amazonprimeadls.dfs.core.windows.net/transaction")
display(df_transaction)

transaction_id,customer_id,product_id,store_id,quantity,transaction_date
1,127,8,4,4,2025-03-31
2,105,3,4,5,2024-11-12
3,116,2,2,3,2025-05-01
4,120,8,1,1,2024-11-02
5,105,5,2,1,2025-03-17
6,110,7,3,5,2025-01-04
7,110,7,2,5,2025-01-01
8,126,7,5,2,2025-06-08
9,123,1,3,2,2024-10-08
10,124,2,2,5,2024-08-27


### Reading Store file

In [0]:
df_store = spark.read.format("parquet")\
                        .option("header", "true")\
                        .option("inferSchema", "true")\
                        .load("abfss://bronze@amazonprimeadls.dfs.core.windows.net/store")
display(df_store)

store_id,store_name,location
1,City Mall Store,Mumbai
2,High Street Store,Delhi
3,Tech World Outlet,Bangalore
4,Downtown Mini Store,Pune
5,Mega Plaza,Chennai


### Writing all the files to Bronze Layer

In [0]:
def write_to_bronze_layer(df, table_name):
            df.write.mode("overwrite")\
                    .format("delta")\
                    .option("overwriteSchema", "true")\
                    .saveAsTable(f"amazonretail.bronze.{table_name}")

In [0]:
write_to_bronze_layer(df_product, "bronze_product")
write_to_bronze_layer(df_customer, "bronze_customer")
write_to_bronze_layer(df_transaction, "bronze_transaction")
write_to_bronze_layer(df_store, "bronze_store")