In [0]:
from pyspark.sql.functions import col
from pyspark.sql import functions as F

# Create bronze schema if it doesn't exist
spark.sql("CREATE SCHEMA IF NOT EXISTS fmcg.bronze")

In [0]:
#Create a Reusable Function
def full_load_to_bronze(
    source_path: str,
    table_name: str,
    table_description: str,
    cast_columns: dict = None
):
    print(f"Reading data from {source_path}")
    
    df = (
        spark.read.format("csv")
            .option("header", True)
            .option("inferSchema", True)
            .load(source_path)
            .withColumn("read_timestamp", F.current_timestamp())
            .select("*", "_metadata.file_name", "_metadata.file_size")
    )
    
    # Apply column casting if provided
    if cast_columns:
        for column_name, data_type in cast_columns.items():
            df = df.withColumn(column_name, col(column_name).cast(data_type))
    
    print(f"Writing to bronze table: {table_name}")
    
    df.write.format("delta") \
        .mode("overwrite") \
        .option("overwriteSchema", "true") \
        .option("description", table_description) \
        .saveAsTable(table_name)
    
    print(f"Successfully loaded {table_name}")




In [0]:
#Load Master Data (Single Files)
#Customers
full_load_to_bronze(
    source_path="/Volumes/fmcg/default/raw_0_data/2_child_company/full_load/customers/customers.csv",
    table_name="main_fmcg.bronze.customers",
    table_description="Customer data from raw layer"
)
#Products
full_load_to_bronze(
    source_path="/Volumes/fmcg/default/raw_0_data/2_child_company/full_load/products/products.csv",
    table_name="main_fmcg.bronze.products",
    table_description="Products data from raw layer"
)
#Gross Price
full_load_to_bronze(
    source_path="/Volumes/fmcg/default/raw_0_data/2_child_company/full_load/gross_price/gross_price.csv",
    table_name="main_fmcg.bronze.gross_price",
    table_description="Gross price data from raw layer"
)
#Load Orders (Multiple Files in Folder)
full_load_to_bronze(
    source_path="/Volumes/fmcg/default/raw_0_data/2_child_company/full_load/orders/landing/",
    table_name="main_fmcg.bronze.orders",
    table_description="Orders full load from landing folder"
)