#Bronze layer

###Loading raw data from csv and storing them in dataframes for further use

In [0]:
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType, DateType

# File path inside your volume
raw_path = "/Volumes/workspace/default/sales_data/sales_data_sample_de.csv"

# Bronze Delta path
bronze_table = "bronze_sales"

# Define schema (adjust based on your CSV columns)
schema = StructType([
    StructField("ORDERNUMBER", IntegerType(), True),
    StructField("QUANTITYORDERED", IntegerType(), True),
    StructField("PRICEEACH", DoubleType(), True),
    StructField("ORDERLINENUMBER", IntegerType(), True),
    StructField("SALES", DoubleType(), True),
    StructField("ORDERDATE", StringType(), True),
    StructField("STATUS", StringType(), True),
    StructField("QTR_ID", IntegerType(), True),
    StructField("MONTH_ID", IntegerType(), True),
    StructField("YEAR_ID", IntegerType(), True),
    StructField("PRODUCTLINE", StringType(), True),
    StructField("MSRP", IntegerType(), True),
    StructField("PRODUCTCODE", StringType(), True),
    StructField("CUSTOMERNAME", StringType(), True),
    StructField("PHONE", StringType(), True),
    StructField("ADDRESSLINE1", StringType(), True),
    StructField("CITY", StringType(), True),
    StructField("STATE", StringType(), True),
    StructField("POSTALCODE", StringType(), True),
    StructField("COUNTRY", StringType(), True),
    StructField("TERRITORY", StringType(), True),
    StructField("CONTACTLASTNAME", StringType(), True),
    StructField("CONTACTFIRSTNAME", StringType(), True),
    StructField("DEALSIZE", StringType(), True)
])

# Read raw CSV into Bronze
bronze_df = (
    spark.read.format("csv")
    .option("header", "true")
    .schema(schema)
    .load(raw_path)
)

# Write to Delta (Bronze Layer)
bronze_df.write.format("delta").mode("overwrite").saveAsTable(bronze_table)

print('number of rows in the dataset---------------')
print(bronze_df.count())
display(bronze_df.limit(5))

number of rows in the dataset---------------
2823


ORDERNUMBER,QUANTITYORDERED,PRICEEACH,ORDERLINENUMBER,SALES,ORDERDATE,STATUS,QTR_ID,MONTH_ID,YEAR_ID,PRODUCTLINE,MSRP,PRODUCTCODE,CUSTOMERNAME,PHONE,ADDRESSLINE1,CITY,STATE,POSTALCODE,COUNTRY,TERRITORY,CONTACTLASTNAME,CONTACTFIRSTNAME,DEALSIZE
10107,30,95.7,2,2871.0,2/24/2003 0:00,Shipped,1,2,2003,Motorcycles,95,S10_1678,Land of Toys Inc.,2125557818,897 Long Airport Avenue,,NYC,NY,10022.0,USA,,Yu,Kwai
10121,34,81.35,5,2765.9,5/7/2003 0:00,Shipped,2,5,2003,Motorcycles,95,S10_1678,Reims Collectables,26.47.1555,59 rue de l'Abbaye,,Reims,,51100.0,France,EMEA,Henriot,Paul
10134,41,94.74,2,3884.34,7/1/2003 0:00,Shipped,3,7,2003,Motorcycles,95,S10_1678,Lyon Souveniers,+33 1 46 62 7555,27 rue du Colonel Pierre Avia,,Paris,,75508.0,France,EMEA,Da Cunha,Daniel
10145,45,83.26,6,3746.7,8/25/2003 0:00,Shipped,3,8,2003,Motorcycles,95,S10_1678,Toys4GrownUps.com,6265557265,78934 Hillside Dr.,,Pasadena,CA,90003.0,USA,,Young,Julie
10159,49,100.0,14,5205.27,10/10/2003 0:00,Shipped,4,10,2003,Motorcycles,95,S10_1678,Corporate Gift Ideas Co.,6505551386,7734 Strong St.,,San Francisco,CA,,USA,,Brown,Julie
