# STAGING LAYER

### Necessary Imports

In [7]:
import os, sys
import logging
import importlib
from pathlib import Path
from dotenv import load_dotenv

import polars as pl
from sqlalchemy.engine import Engine

import config

top_level = Path().resolve().parent
sys.path.append(str(top_level))
from db_utils import engine


importlib.reload(config)

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

load_dotenv(override=True)

True

In [8]:
%run ./df_transformations.ipynb

In [9]:
PARQUET_FILES_DIR = Path(os.getenv("PARQUET_FILES_DIR"))

bronze_parquet = PARQUET_FILES_DIR/'bronze_layer'
bronze_parquet.mkdir(parents=True, exist_ok=True)

staging_parquet = PARQUET_FILES_DIR/'staging_layer'
staging_parquet.mkdir(parents=True, exist_ok=True)

### Delete all "non-hist" parquet files of the Staging layer

In [10]:
for file in staging_parquet.glob("*.parquet"):
    if not file.name.endswith("_hist.parquet"):
        logger.info(f"Deleting: {file}")
        os.remove(file)

### Load all tables & Save as parquet files

In [11]:
def load_staging_tables(
    staging_cfg: dict, 
    src_dir: Path, 
    dest_dir: Path, 
    engine: Engine, 
    schema_name: str
) -> dict[str, pl.DataFrame]:
    dfs = {}
    for tbl, props in staging_cfg.items():
        src_file = src_dir / props["src_file"]
        dest_file = dest_dir / props["dest_file"]

        # get transform function
        transform_fn_name = props["transform_fn"]
        transform_fn = globals().get(transform_fn_name, None)  
        if transform_fn is None:
            logger.info(f"Transform function {transform_fn_name} not found, using default passthrough")
            df_staging = pl.read_parquet(src_file)
        else:
            df_staging = transform_fn(src_file)

        dfs[tbl] = df_staging

        # Save to parquet (local staging layer)
        df_staging.write_parquet(dest_file)
        logger.info(f"Created {tbl}: {df_staging.shape} at {dest_file}")

        # Save to SQL Server (staging schema in DB)
        table_name = f"{schema_name}.{tbl}"
        df_staging.write_database(
            table_name=table_name,
            connection=engine,
            if_table_exists="replace",
        )
        logger.info(f"Saved {tbl} into DB schema {schema_name}")

    return dfs


In [12]:
dfs = load_staging_tables(config.STAGING_CONFIG, bronze_parquet, staging_parquet, engine=engine, schema_name='staging')

INFO:__main__:Created shipping_type: (4, 4) at /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/staging_layer/staging_shipping_type.parquet


INFO:__main__:Saved shipping_type into DB schema staging


### Verify Results

In [13]:
schema_name='staging'
tbl = 'customer'

table_name = f"{schema_name}.{tbl}"

query = f"SELECT * FROM {table_name}"

df_staging = pl.read_database(query, engine)
df_staging

customer_id,signup_date,gender,customer_dob,customer_name,marital_status,email,phone,customer_type,account_status,country,state,city,postal_code,region,load_timestamp
i64,datetime[μs],str,date,str,str,str,i64,str,str,str,str,str,str,str,datetime[μs]
1342,2024-02-17 00:00:00,"""Female""",1992-11-04,"""Erin""","""Unknown""",,672513670,"""Non-prime""","""Active""","""Nigeria""","""Lagos""","""Ikeja""","""100001""","""Africa""",2025-09-19 16:03:07.637
1207,2024-09-08 00:00:00,"""Male""",,"""Melissa Kim""","""Single""","""melissa.kim@yahoo.com""",657815616,"""Unknown""","""Unknown""","""Japan""","""Osaka""","""Kita""","""5300001""","""Asia""",2025-09-19 16:03:07.637
1146,2024-08-31 00:00:00,"""Unknown""",1991-09-21,"""Courtney Baker Singh""","""Married""",,793828100,"""Unknown""","""Unknown""","""Japan""","""Tokyo""","""Shinjuku""","""1600022""","""Asia""",2025-09-19 16:03:07.637
1205,2025-04-19 00:00:00,"""Male""",1961-06-11,"""Jermaine Vance""","""Single""",,537354472,"""Non-prime""","""Unknown""","""India""","""Maharashtra""","""Mumbai""","""400001""","""Asia""",2025-09-19 16:03:07.637
1280,2024-01-07 00:00:00,"""Male""",1966-06-27,"""Kathryn Myers""","""Single""","""kathryn.myers@gmail.com""",61747720,"""Non-prime""","""Unknown""","""Germany""","""Bavaria""","""Munich""","""80331""","""Europe""",2025-09-19 16:03:07.637
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
1496,2025-04-16 00:00:00,"""Female""",1962-07-22,"""Amber Garca""","""Single""",,293658369,"""Prime""","""Inactive""","""Germany""","""Bavaria""","""Munich""","""80331""","""Europe""",2025-09-19 16:03:07.637
1066,2025-04-25 00:00:00,"""Unknown""",1959-01-09,"""Bianca""","""Single""",,688384532,"""Non-prime""","""Suspended""","""Nigeria""","""Lagos""","""Ikeja""","""100001""","""Africa""",2025-09-19 16:03:07.637
1126,2023-09-03 00:00:00,"""Female""",,"""Jennifer Jackson""","""Married""","""jennifer.jackson@yahoo.com""",593386179,"""Prime""","""Unknown""","""Nigeria""","""Lagos""","""Ikeja""","""100001""","""Africa""",2025-09-19 16:03:07.637
1414,2024-05-15 00:00:00,"""Male""",1963-08-06,"""Cindy""","""Married""","""cindy@yahoo.com""",966172203,"""Non-prime""","""Suspended""","""Nigeria""","""Lagos""","""Ikeja""","""100001""","""Africa""",2025-09-19 16:03:07.637
