# Part 3 - Loading Data

In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, Column, Integer, String, Float, ForeignKey, Date, Boolean
from sqlalchemy.orm import declarative_base, relationship, sessionmaker


product_df = pd.read_csv("../data/warehouse_products.csv")
operations_df = pd.read_csv("../data/warehouse_daily.csv")

## Wide to Long

In [2]:
def select_one_strategy(df, underscore_strategy, shared_columns, strategy):
    
    column_list = [column for column in df.columns if column.endswith(underscore_strategy)]
    renamed_columns = {column: column.replace(underscore_strategy, "") for column in column_list}
    single_strat_df = df[shared_columns + column_list].rename(columns=renamed_columns)
    single_strat_df["strategy"] = strategy

    return single_strat_df

In [3]:
shared_columns_pro = ["date", "product_id", "warehouse_id", "demand"]

In [4]:
weekly_pro_df = select_one_strategy(product_df, "_weekly", shared_columns_pro, "weekly")
jit_pro_df = select_one_strategy(product_df, "_jit", shared_columns_pro, "JIT")
combined_product_df = pd.concat([weekly_pro_df, jit_pro_df], ignore_index=True)

In [5]:
shared_columns_op = ["date", "warehouse_id"]

In [6]:
weekly_op_df = select_one_strategy(operations_df, "_weekly", shared_columns_op, "weekly")
jit_op_df = select_one_strategy(operations_df, "_jit", shared_columns_op, "JIT")
combined_operations_df = pd.concat([weekly_op_df, jit_op_df], ignore_index=True)

## Schema

<center><img src="../schema.svg"></center>
<center><i>Schema created with <a href="https://dbdiagram.io/">dbdiagram.io</a></i></center>

In [7]:
base = declarative_base()

class DimWarehouse(base):
    __tablename__ = "dim_warehouse"
    warehouse_id = Column(String, primary_key=True)
    warehouse_name = Column(String)
    region = Column(String)
    capacity = Column(Integer)

class DimDate(base):
    __tablename__ = "dim_date"
    date = Column(Date, primary_key=True)
    year = Column(Integer)
    month = Column(Integer)
    month_name = Column(String)
    day = Column(Integer)
    weekday = Column(String)

class DimProducts(base):
    __tablename__ = "dim_products"
    product_id = Column(String, primary_key=True)
    product_name = Column(String)
    wholesale_cost = Column(Float)
    retail_price = Column(Float)
    storage_cost_per_day = Column(Float)
    product_class = Column(String)
        
class DailyOperations(base):
    __tablename__ = "daily_operations"
    date = Column(Date, ForeignKey("dim_date.date"), primary_key=True)
    warehouse_id = Column(String, ForeignKey("dim_warehouse.warehouse_id"), primary_key=True)
    strategy = Column(String, primary_key=True)
    inbound_units = Column(Integer)
    orders_fulfilled = Column(Integer)
    inventory_level = Column(Integer)
    missed_sales = Column(Integer)
    outbound_shipments = Column(Integer)
    van_utilization = Column(Float)
    inbound_shipments = Column(Integer)
    truck_utilization = Column(Float)
    warehouse_utilization = Column(Float)
    staff_count = Column(Integer)
    errors = Column(Integer)

class DailyProducts(base):
    __tablename__ = "daily_products"
    date = Column(Date, ForeignKey("dim_date.date"), primary_key=True)
    product_id = Column(String, ForeignKey("dim_products.product_id"), primary_key=True)
    warehouse_id = Column(String, ForeignKey("dim_warehouse.warehouse_id"), primary_key=True)
    strategy = Column(String, primary_key=True)
    demand = Column(Integer)
    inbound_units = Column(Integer)
    actual_outbound = Column(Integer)
    inventory_level = Column(Integer)
    unmet_demand = Column(Integer)
    stockout_flag = Column(Boolean)

PostgreSQL is supported by Power BI

In [8]:
engine = create_engine("postgresql+psycopg2://my_user:Password@localhost:5432/warehouse_db")
base.metadata.create_all(engine)

Session = sessionmaker(bind=engine)
session = Session()

In [9]:
warehouse_dim_df = pd.read_csv("../data/dim_warehouse.csv")
date_dim_df = pd.read_csv("../data/dim_date.csv")
product_dim_df = pd.read_csv("../data/dim_products.csv")


In [10]:
warehouse_objects = [
    DimWarehouse(
        warehouse_id=row["warehouse_id"],
        warehouse_name=row["warehouse_name"],
        region=row["region"],
        capacity=row["capacity"]
    )
    for _, row in warehouse_dim_df.iterrows()
]

for obj in warehouse_objects:
    session.merge(obj)
session.commit()

In [11]:
date_objects = [
    DimDate(
        date=row["date"],
        year=row["year"],
        month=row["month"],
        month_name=row["month_name"],
        day=row["day"],
        weekday=row["weekday"]
    )
    for _, row in date_dim_df.iterrows()
]

for obj in date_objects:
    session.merge(obj)
session.commit()

In [12]:
product_objects = [
    DimProducts(
        product_id=row["product_id"],
        product_name=row["product_name"],
        wholesale_cost=row["wholesale_cost"],
        retail_price=row["retail_price"],
        storage_cost_per_day=row["storage_costs_per_day"],
        product_class=row["product_class"])

    for _, row in product_dim_df.iterrows()
]

for obj in product_objects:
    session.merge(obj)
session.commit()
        
        

In [13]:
operations_objects = [
    DailyOperations(
        date=row["date"],
        warehouse_id=row["warehouse_id"],
        strategy=row["strategy"],
        inbound_units=row["inbound_units"],
        orders_fulfilled=row["orders_fulfilled"],
        inventory_level=row["inventory_level"],
        missed_sales=row["missed_sales"],
        outbound_shipments=row["outbound_shipments"],
        van_utilization=row["van_utilization"],
        inbound_shipments=row["inbound_shipments"],
        truck_utilization=row["truck_utilization"],
        warehouse_utilization=row["warehouse_utilization"],
        staff_count=row["staff_count"],
        errors=row["errors"]
    )
    for _, row in combined_operations_df.iterrows()
]

for obj in operations_objects:
    session.merge(obj)
session.commit()

In [14]:
daily_products_objects = [
    DailyProducts(
        date=row["date"],
        product_id=row["product_id"],
        warehouse_id=row["warehouse_id"],
        strategy=row["strategy"],
        demand=row["demand"],
        inbound_units=row["inbound_units"],
        actual_outbound=row["actual_outbound"],
        inventory_level=row["inventory_level"],
        unmet_demand=row["unmet_demand"],
        stockout_flag=row["stockout_flag"]
    )
    for _, row in combined_product_df.iterrows()
]

for obj in daily_products_objects:
    session.merge(obj)
session.commit()