# STAGING LAYER

### Necessary Imports

In [16]:
import os
import importlib
from pathlib import Path
from dotenv import load_dotenv

import urllib
import polars as pl
from sqlalchemy import create_engine

import config

load_dotenv(override=True) 
importlib.reload(config)

<module 'config' from '/home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/staging_layer/config.py'>

In [17]:
%run ./transformations.ipynb

In [18]:
PARQUET_FILES_DIR = Path(os.getenv("PARQUET_FILES_DIR"))

bronze_parquet = PARQUET_FILES_DIR/'bronze_layer'
bronze_parquet.mkdir(parents=True, exist_ok=True)

staging_parquet = PARQUET_FILES_DIR/'staging_layer'
staging_parquet.mkdir(parents=True, exist_ok=True)

### Setup the db-connection

In [19]:
params = urllib.parse.quote_plus(
    "DRIVER={ODBC Driver 17 for SQL Server};"
    "SERVER=associatetraining.database.windows.net,1433;"
    "DATABASE=associatetraining;"
    "UID=training;"
    "PWD=dFyUT1#$rKIh26;"
)

engine = create_engine(f"mssql+pyodbc:///?odbc_connect={params}")

### Delete all "non-hist" parquet files of the Staging layer

In [20]:
for file in staging_parquet.glob("*.parquet"):
    if not file.name.endswith("_hist.parquet"):
        print(f"Deleting: {file}")
        os.remove(file)

Deleting: /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/staging_layer/staging_orders.parquet
Deleting: /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/staging_layer/staging_product.parquet
Deleting: /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/staging_layer/staging_shipping_type.parquet
Deleting: /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/staging_layer/staging_customer.parquet


### Load all tables & Save as parquet files

In [21]:
def load_staging_tables(staging_cfg: dict, src_dir: Path, dest_dir: Path, engine, schema_name: str) -> dict[str, pl.DataFrame]:
    dfs = {}
    for tbl, props in staging_cfg.items():
        src_file = src_dir / props["src_file"]
        dest_file = dest_dir / props["dest_file"]

        # get transform function
        transform_fn_name = props["transform_fn"]
        # transform_fn = getattr(transformations, transform_fn_name, None)
        transform_fn = globals().get(transform_fn_name, None)  
        if transform_fn is None:
            print(f"Transform function {transform_fn_name} not found, using default passthrough")
            df_staging = pl.read_parquet(src_file)
        else:
            df_staging = transform_fn(src_file)

        dfs[tbl] = df_staging

        # Save to parquet (local staging layer)
        df_staging.write_parquet(dest_file)
        print(f"Created {tbl}: {df_staging.shape} at {dest_file}")

        # Save to SQL Server (staging schema in DB)
        table_name = f"{schema_name}.{tbl}"
        df_staging.write_database(
            table_name=table_name,
            connection=engine,
            if_table_exists="replace",
        )
        print(f"Saved {tbl} into DB schema {schema_name}")

    return dfs


In [22]:
dfs = load_staging_tables(config.STAGING_CONFIG, bronze_parquet, staging_parquet, engine=engine, schema_name='staging')

Created customer: (499, 16) at /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/staging_layer/staging_customer.parquet


Saved customer into DB schema staging
Created shipping_type: (4, 4) at /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/staging_layer/staging_shipping_type.parquet
Saved shipping_type into DB schema staging
Created product: (49997, 12) at /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/staging_layer/staging_product.parquet
Saved product into DB schema staging
Created orders: (3000, 20) at /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/staging_layer/staging_orders.parquet
Saved orders into DB schema staging


In [23]:
schema_name='staging'
tbl = 'orders'

table_name = f"{schema_name}.{tbl}"

query = f"SELECT * FROM {table_name}"

df_staging = pl.read_database(query, engine)
df_staging

orders_id,customer_id,product_id,payment_source,shipping_type,lead_type,order_status,order_date,shipping_date,expected_delivery_date,delivery_date,return_date,refund_date,quantity,unit_price,is_gift,gift_message,has_coupon,coupon_code,load_timestamp
str,i64,str,str,str,str,str,datetime[μs],datetime[μs],datetime[μs],datetime[μs],datetime[μs],datetime[μs],i64,f64,i64,str,i64,str,datetime[μs]
"""Ord_000002467""",1466,"""Prd_4755""","""Net Banking""","""One-Day Delivery""","""Retail""","""Processing""",2024-02-15 00:00:00,2024-02-22 00:00:00,2024-02-23 00:00:00,2024-02-15 00:00:00,2024-03-02 00:00:00,,2,39381.300781,0,"""""",0,"""""",2025-09-16 23:18:26.130
"""Ord_000002798""",1276,"""Prd_2055""","""Cash""","""One-Day Delivery""","""Mobile App""","""Cancelled""",2024-03-26 00:00:00,2024-03-29 00:00:00,,2024-03-31 00:00:00,,,5,97318.0,1,""" """,0,"""""",2025-09-16 23:18:26.130
"""Ord_000000766""",1354,"""Prd_7139""","""Net Banking""","""Normal""","""Mobile App""","""Returned""",2024-01-08 00:00:00,1900-01-01 00:00:00,2024-01-08 00:00:00,2024-01-18 00:00:00,,,3,88883.0,0,"""""",1,"""CUPN1761""",2025-09-16 23:18:26.130
"""Ord_000001246""",1325,"""Prd_8262""","""Credit Card""","""Fast Delivery""","""Retail""","""Processing""",2024-04-16 00:00:00,2024-04-18 00:00:00,2024-04-17 00:00:00,2024-04-27 00:00:00,,,2,40932.921875,0,"""""",0,"""""",2025-09-16 23:18:26.130
"""Ord_000000195""",1028,"""Prd_7588""","""Net Banking""","""Normal""","""Retail""","""Processing""",2024-05-27 00:00:00,2024-05-30 00:00:00,2024-05-29 00:00:00,2024-06-10 00:00:00,,,1,79921.0,1,"""Happy Birthday!""",1,"""CUPN5278""",2025-09-16 23:18:26.130
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Ord_000001434""",1074,"""Prd_9872""","""Cash""","""Fast Delivery""","""Retail""","""Cancelled""",2024-06-12 00:00:00,1900-01-01 00:00:00,2024-06-16 00:00:00,2024-06-20 00:00:00,,,3,42640.910156,1,""" """,1,"""CUPN5807""",2025-09-16 23:18:26.130
"""Ord_000001681""",1051,"""Prd_6586""","""Debit Card""","""Fast Delivery""","""Website""","""Delivered""",2024-02-24 00:00:00,2024-02-27 00:00:00,2024-02-29 00:00:00,2024-02-27 00:00:00,,,5,61213.0,1,"""Best Wishes!""",0,"""""",2025-09-16 23:18:26.130
"""Ord_000001167""",1358,"""Prd_1650""","""Paypal""","""One-Day Delivery""","""Mobile App""","""Returned""",2024-02-18 00:00:00,2024-02-22 00:00:00,2024-02-23 00:00:00,2024-03-01 00:00:00,,,1,22137.0,0,"""""",1,"""CUPN1787""",2025-09-16 23:18:26.130
"""Ord_000002764""",1002,"""Prd_1515""","""Cash""","""Normal""","""Retail""","""Delivered""",2024-06-27 00:00:00,2024-06-29 00:00:00,2024-07-06 00:00:00,2024-07-10 00:00:00,,,5,27194.669922,0,"""""",1,"""CUPN2056""",2025-09-16 23:18:26.130
