# RAW LAYER

### Necessary Imports

In [8]:
import os
import importlib
from pathlib import Path
from dotenv import load_dotenv

import urllib
import polars as pl
from sqlalchemy import create_engine

import config

load_dotenv(override=True) 
importlib.reload(config)

<module 'config' from '/home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/raw_layer/config.py'>

In [9]:
RAW_DATASET = Path(os.getenv("RAW_DATASET"))
PARQUET_FILES_DIR = Path(os.getenv("PARQUET_FILES_DIR"))

raw_parquet = PARQUET_FILES_DIR/'raw_layer'
raw_parquet.mkdir(parents=True, exist_ok=True)

### Delete all parquet files of the Raw layer

In [10]:
for file in raw_parquet.glob("*.parquet"):
    print(f"Deleting: {file}")
    os.remove(file)

Deleting: /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/raw_layer/raw_orders.parquet


### Setup the db-connection

In [11]:
params = urllib.parse.quote_plus(
    "DRIVER={ODBC Driver 17 for SQL Server};"
    "SERVER=associatetraining.database.windows.net,1433;"
    "DATABASE=associatetraining;"
    "UID=training;"
    "PWD=dFyUT1#$rKIh26;"
)

engine = create_engine(f"mssql+pyodbc:///?odbc_connect={params}")

### Load all tables & Save as parquet files

In [12]:
def load_raw_tables(raw_cfg: dict, dest_dir: Path, engine: str, schema_name: str) -> dict[str, pl.DataFrame]:
    dfs = {}
    for tbl, props in raw_cfg.items():
        table_name = f"{schema_name}.{tbl}"

        query = f"SELECT * FROM {table_name}"

        dest_file = dest_dir / props["dest_file"]

        df_raw = pl.read_database(query, engine)
        dfs[tbl] = df_raw

        df_raw.write_parquet(dest_file)
        print(f"Created {tbl}: {df_raw.shape} at {dest_file}")

    return dfs


In [13]:

importlib.reload(config)
dfs = load_raw_tables(config.RAW_CONFIG, raw_parquet, engine=engine, schema_name='raw')

Created customer: (500, 15) at /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/raw_layer/raw_customer.parquet
Created shipping_type: (5, 3) at /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/raw_layer/raw_shipping_type.parquet
Created product: (50000, 11) at /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/raw_layer/raw_product.parquet
Created orders: (3002, 19) at /home/sapna.choudhary/Data-Engineering-Training/Polars_DWH/parquet_files/raw_layer/raw_orders.parquet


In [14]:
schema_name='raw'
tbl = 'orders'

table_name = f"{schema_name}.{tbl}"

query = f"SELECT * FROM {table_name}"


df_bronze = pl.read_database(query, engine)
df_bronze

orders_id,customer_id,product_id,payment_src,shipping_type,lead_type,order_status,order_date,shipping_date,expected_delivery_date,delivery_date,return_date,refund_date,quantity,unit_price,is_gift,gift_message,has_coupon,coupon_code
str,i64,str,str,str,str,str,str,str,str,str,str,str,i64,f64,i64,str,i64,str
"""ord_000000001""",1306,"""Prd_00000005""","""upi""","""One-Day Delivery""","""retail""","""""","""2024-06-22""","""2024-06-23""","""2024-07-01""","""2024-07-05""","""""","""""",6,2989.399902,0,"""""",0,""""""
"""ord_000000001""",1306,"""Prd_00000002""","""upi""","""One-Day Delivery""","""retail""","""Delivered""","""2024-06-22""","""2024-06-23""","""2024-07-01""","""2024-07-05""","""""","""""",6,2989.399902,0,"""""",0,""""""
"""ord_000000002""",1168,"""Prd_00000003""","""Credit card""","""Express""","""website""","""Shipped""","""2024-03-12""","""2024-03-12""","""""","""2024-03-17""","""""","""""",4,34031.601562,0,"""""",1,"""CUPN2674"""
"""ord_000000003""",1490,"""Prd_00000004""","""Net banking""","""""","""retail""","""Processing""","""2024-05-17""","""""","""2024-05-18""","""2024-05-29""","""""","""""",2,55208.542969,1,"""Best Wishes!""",0,""""""
"""ord_000000004""",1441,"""Prd_00000005""","""Cash!""","""Express""","""retail""","""Processing""","""2024-03-12""","""2024-03-19""","""2024-03-22""","""2024-03-23""","""""","""""",10,37024.398438,1,"""Happy Birthday!""",0,""""""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""ord_000002996""",1470,"""prd_3523""",""" Debit Card ""","""One-Day Delivery""","""website""","""Returned""","""2024-05-25""","""2024-05-25""","""2024-06-01""","""2024-06-08""","""""","""""",9,32450.0,1,"""Enjoy!""",0,""""""
"""ord_000002997""",1078,"""prd_9944""","""upi""","""Express""","""website""","""Processing""","""2024-04-29""","""2024-04-29""","""2024-05-06""","""2024-05-11""","""""","""""",9,98695.054688,0,"""""",1,"""CUPN5515"""
"""ord_000002998""",1335,"""prd_3620""","""Cash!""","""FAST Delivery""","""mobile app""","""Returned""","""2024-05-22""","""2024-05-25""","""2024-05-28""","""2024-06-01""","""""","""""",9,58800.871094,0,"""""",1,"""CUPN8586"""
"""ord_000002999""",1075,"""prd_0450""",""" Debit Card ""","""Normal""","""website""","""Shipped""","""2024-06-17""","""2024-06-18""","""2024-06-22""","""2024-06-29""","""""","""""",2,59807.128906,0,"""""",0,""""""
