# BRONZE LAYER

### Necessary Imports

In [1]:
import os
import importlib
from pathlib import Path
from datetime import datetime
from dotenv import load_dotenv

import urllib
import polars as pl
from sqlalchemy import create_engine

import config

load_dotenv(override=True) 
importlib.reload(config)

<module 'config' from '/home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/bronze_layer/config.py'>

In [2]:
PARQUET_FILES_DIR = Path(os.getenv("PARQUET_FILES_DIR"))

bronze_parquet = PARQUET_FILES_DIR/'bronze_layer'
bronze_parquet.mkdir(parents=True, exist_ok=True)

raw_parquet = PARQUET_FILES_DIR/'raw_layer'
raw_parquet.mkdir(parents=True, exist_ok=True)

### Setup the db-connection

In [3]:
params = urllib.parse.quote_plus(
    "DRIVER={ODBC Driver 17 for SQL Server};"
    "SERVER=associatetraining.database.windows.net,1433;"
    "DATABASE=associatetraining;"
    "UID=training;"
    "PWD=dFyUT1#$rKIh26;"
)

engine = create_engine(f"mssql+pyodbc:///?odbc_connect={params}")

### Delete all "non-hist" parquet files of the Bronze layer

In [4]:
for file in bronze_parquet.glob("*.parquet"):
    if not file.name.endswith("_hist.parquet"):
        print(f"Deleting: {file}")
        os.remove(file)

Deleting: /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/bronze_layer/bronze_product.parquet
Deleting: /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/bronze_layer/bronze_shipping_type.parquet
Deleting: /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/bronze_layer/bronze_orders.parquet
Deleting: /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/bronze_layer/bronze_customer.parquet


### Load all tables & Save as parquet files

In [5]:
def load_bronze_tables(bronze_cfg: dict, src_dir: Path, dest_dir: Path, engine, schema_name: str) -> dict[str, pl.DataFrame]:
    dfs = {}
    for tbl, props in bronze_cfg.items():
        src_file = src_dir / props["src_file"]
        dest_file = dest_dir / props["dest_file"]

        df_raw = pl.read_parquet(src_file)

        df_bronze = (
            df_raw.select([pl.all().cast(pl.Utf8)])
            .with_columns([
                pl.lit(datetime.now()).alias("load_timestamp")
            ])
        )

        dfs[tbl] = df_bronze

        # Save to parquet (local bronze layer)
        df_bronze.write_parquet(dest_file)
        print(f"Created {tbl}: {df_bronze.shape} at {dest_file}")

        # Save to SQL Server (bronze schema in DB)
        table_name = f"{schema_name}.{tbl}"
        df_bronze.write_database(
            table_name=table_name,
            connection=engine,
            if_table_exists="replace",
        )
        print(f"Saved {tbl} into DB schema {schema_name}")

    return dfs


In [6]:
dfs = load_bronze_tables(config.BRONZE_CONFIG, raw_parquet, bronze_parquet, engine=engine, schema_name='bronze')

Created customer: (500, 16) at /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/bronze_layer/bronze_customer.parquet
Saved customer into DB schema bronze
Created shipping_type: (5, 4) at /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/bronze_layer/bronze_shipping_type.parquet
Saved shipping_type into DB schema bronze
Created product: (50000, 12) at /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/bronze_layer/bronze_product.parquet
Saved product into DB schema bronze
Created orders: (3002, 20) at /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/bronze_layer/bronze_orders.parquet
Saved orders into DB schema bronze


### Verify Results

In [None]:
schema_name='bronze'
tbl = 'orders'

table_name = f"{schema_name}.{tbl}"
query = f"SELECT * FROM {table_name}"

df_bronze = pl.read_database(query, engine)
df_bronze

orders_id,customer_id,product_id,payment_src,shipping_type,lead_type,order_status,order_date,shipping_date,expected_delivery_date,delivery_date,return_date,refund_date,quantity,unit_price,is_gift,gift_message,has_coupon,coupon_code,load_timestamp
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,datetime[μs]
"""ord_000000001""","""1306""","""Prd_00000005""","""upi""","""One-Day Delivery""","""retail""","""""","""2024-06-22""","""2024-06-23""","""2024-07-01""","""2024-07-05""","""""","""""","""6""","""2989.39990234375""","""0""","""""","""0""","""""",2025-09-16 23:07:01.447
"""ord_000000001""","""1306""","""Prd_00000002""","""upi""","""One-Day Delivery""","""retail""","""Delivered""","""2024-06-22""","""2024-06-23""","""2024-07-01""","""2024-07-05""","""""","""""","""6""","""2989.39990234375""","""0""","""""","""0""","""""",2025-09-16 23:07:01.447
"""ord_000000002""","""1168""","""Prd_00000003""","""Credit card""","""Express""","""website""","""Shipped""","""2024-03-12""","""2024-03-12""","""""","""2024-03-17""","""""","""""","""4""","""34031.6015625""","""0""","""""","""1""","""CUPN2674""",2025-09-16 23:07:01.447
"""ord_000000003""","""1490""","""Prd_00000004""","""Net banking""","""""","""retail""","""Processing""","""2024-05-17""","""""","""2024-05-18""","""2024-05-29""","""""","""""","""2""","""55208.54296875""","""1""","""Best Wishes!""","""0""","""""",2025-09-16 23:07:01.447
"""ord_000000004""","""1441""","""Prd_00000005""","""Cash!""","""Express""","""retail""","""Processing""","""2024-03-12""","""2024-03-19""","""2024-03-22""","""2024-03-23""","""""","""""","""10""","""37024.3984375""","""1""","""Happy Birthday!""","""0""","""""",2025-09-16 23:07:01.447
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""ord_000002996""","""1470""","""prd_3523""",""" Debit Card ""","""One-Day Delivery""","""website""","""Returned""","""2024-05-25""","""2024-05-25""","""2024-06-01""","""2024-06-08""","""""","""""","""9""","""32450.0""","""1""","""Enjoy!""","""0""","""""",2025-09-16 23:07:01.447
"""ord_000002997""","""1078""","""prd_9944""","""upi""","""Express""","""website""","""Processing""","""2024-04-29""","""2024-04-29""","""2024-05-06""","""2024-05-11""","""""","""""","""9""","""98695.0546875""","""0""","""""","""1""","""CUPN5515""",2025-09-16 23:07:01.447
"""ord_000002998""","""1335""","""prd_3620""","""Cash!""","""FAST Delivery""","""mobile app""","""Returned""","""2024-05-22""","""2024-05-25""","""2024-05-28""","""2024-06-01""","""""","""""","""9""","""58800.87109375""","""0""","""""","""1""","""CUPN8586""",2025-09-16 23:07:01.447
"""ord_000002999""","""1075""","""prd_0450""",""" Debit Card ""","""Normal""","""website""","""Shipped""","""2024-06-17""","""2024-06-18""","""2024-06-22""","""2024-06-29""","""""","""""","""2""","""59807.12890625""","""0""","""""","""0""","""""",2025-09-16 23:07:01.447
