### Import Required Libraries and Spark SQL Functions

In [0]:
import os
from pyspark.sql.functions import *

Define Bronze and Raw/Source Layer Base Paths

In [0]:
raw_base = "/Volumes/adventure_works_lakehouse/adventure_works/lakehouse/raw"
bronze_base = "/Volumes/adventure_works_lakehouse/adventure_works/lakehouse/bronze"


Listing out CSV Source Files from Raw Data Layer

In [0]:
files = dbutils.fs.ls(raw_base)

csv_files = [f for f in files if f.name.endswith(".csv")]

Read Raw CSV Files and Ingest into Bronze Layer

In [0]:
#running a loop to fetch all the files:

for file in csv_files:
    file_name = file.name.replace(".csv", "").lower()
    
    bronze_table_name = f"bronze_{file_name}"
    bronze_path = f"{bronze_base}/{bronze_table_name}"

    print(f"Processing {file.name} → {bronze_table_name}")

    df = (
        spark.read
        .option("header", True)
        .option("inferSchema", True)
        .csv(file.path)
        .withColumn("ing_ts", current_timestamp())
    )

    # Write Delta
    df.write.format("delta") \
        .mode("overwrite") \
        .option("overwriteSchema", "true") \
        .save(bronze_path)


Processing AdventureWorks_Calendar.csv → bronze_adventureworks_calendar
Processing AdventureWorks_Customers.csv → bronze_adventureworks_customers
Processing AdventureWorks_Product_Categories.csv → bronze_adventureworks_product_categories
Processing AdventureWorks_Product_Subcategories.csv → bronze_adventureworks_product_subcategories
Processing AdventureWorks_Products.csv → bronze_adventureworks_products
Processing AdventureWorks_Returns.csv → bronze_adventureworks_returns
Processing AdventureWorks_Sales_2015.csv → bronze_adventureworks_sales_2015
Processing AdventureWorks_Sales_2016.csv → bronze_adventureworks_sales_2016
Processing AdventureWorks_Sales_2017.csv → bronze_adventureworks_sales_2017
Processing AdventureWorks_Territories.csv → bronze_adventureworks_territories


Data Read Verification

In [0]:
file_names = [f.name.replace(".csv", "") for f in dbutils.fs.ls(bronze_base)]
file_names

['bronze_adventureworks_calendar/',
 'bronze_adventureworks_customers/',
 'bronze_adventureworks_product_categories/',
 'bronze_adventureworks_product_subcategories/',
 'bronze_adventureworks_products/',
 'bronze_adventureworks_returns/',
 'bronze_adventureworks_sales_2015/',
 'bronze_adventureworks_sales_2016/',
 'bronze_adventureworks_sales_2017/',
 'bronze_adventureworks_territories/']

In [0]:
df_sales_2015 = spark.read.format("delta").load(f"{bronze_base}/bronze_adventureworks_sales_2015")

In [0]:
df_sales_2015.limit(7).display()

OrderDate,StockDate,OrderNumber,ProductKey,CustomerKey,TerritoryKey,OrderLineItem,OrderQuantity,ing_ts
2015-01-01,2001-09-21,SO45080,332,14657,1,1,1,2026-01-13T11:06:00.153Z
2015-01-01,2001-12-05,SO45079,312,29255,4,1,1,2026-01-13T11:06:00.153Z
2015-01-01,2001-10-29,SO45082,350,11455,9,1,1,2026-01-13T11:06:00.153Z
2015-01-01,2001-11-16,SO45081,338,26782,6,1,1,2026-01-13T11:06:00.153Z
2015-01-02,2001-12-15,SO45083,312,14947,10,1,1,2026-01-13T11:06:00.153Z
2015-01-02,2001-10-12,SO45084,310,29143,4,1,1,2026-01-13T11:06:00.153Z
2015-01-02,2001-12-18,SO45086,314,18747,9,1,1,2026-01-13T11:06:00.153Z


In [0]:
df_sales_2015.printSchema()

root
 |-- OrderDate: date (nullable = true)
 |-- StockDate: date (nullable = true)
 |-- OrderNumber: string (nullable = true)
 |-- ProductKey: integer (nullable = true)
 |-- CustomerKey: integer (nullable = true)
 |-- TerritoryKey: integer (nullable = true)
 |-- OrderLineItem: integer (nullable = true)
 |-- OrderQuantity: integer (nullable = true)
 |-- ing_ts: timestamp (nullable = true)

