#### Deliverable 7: Inventory Fact Table (Daily Level) Implementation

In [1]:
import pandas as pd
import sqlite3 as lite

In [2]:
conn = lite.connect("store1.db")
output_file_path = "./output/"

In [3]:
# Load dimension tables
date_dim = pd.read_sql("SELECT DateKey, Date FROM DateDimension", conn)
products = pd.read_sql("SELECT ProductKey, SKU FROM ProductDimension", conn)
stores = pd.read_sql("SELECT StoreKey FROM StoreDimension", conn)

In [4]:
store_databases = {
    "Store 1": {
        "database_name": "store1.db",
        "transactions_table_name": "sales_transactions",
    },
    "Store 2": {
        "database_name": "store2.db",
        "transactions_table_name": "sales_transactions",
    },
    "Store 3": {
        "database_name": "store3.db",
        "transactions_table_name": "sales_transactions",
    },
    "Store 4": {
        "database_name": "store4.db",
        "transactions_table_name": "sales_transactions"
    }
}

In [5]:
products.head()

Unnamed: 0,ProductKey,sku
0,1,42081001
1,2,42082001
2,3,42083001
3,4,42084001
4,5,42085001


In [6]:
def load_all_transactions(store):
    conn = lite.connect(store['database_name'])
    table_name = store['transactions_table_name']

    transactions_query = f"select * from {table_name}"
    transaction_df = pd.read_sql(transactions_query, conn)

    conn.close()
    return transaction_df

In [8]:
all_inventory_data = []

all_transa = []

for store_name, config in store_databases.items():
    print(f"Processing {store_name}...")

    # load transactions data
    transactions = load_all_transactions(config)
    transactions['StoreKey'] = store_name.split(" ")[1]

    # Convert date and merge with date dimension
    transactions['temp_date'] = pd.to_datetime(transactions['date'])
    transactions = transactions.merge(
        date_dim,
        left_on='temp_date',
        right_on=pd.to_datetime(date_dim['Date']),
        how='left'
    ).drop(columns=['temp_date', 'Date'])

    # Convert SKU to match product dimension
    transactions['sku'] = pd.to_numeric(
        transactions['sku'], errors='coerce').astype('Int64')

    # Merge with product dimension
    transactions = transactions.merge(
        products,
        left_on='sku',
        right_on='sku',
        how='left'
    )

    # Calculate inventory metrices
    # 70% of retail price
    transactions['CostToStore_itemLevel'] = (
        transactions['salesPrice'] * .7).round(2)
    
    # 12 items per case
    transactions['CostToStore_caseLevel'] = (transactions['CostToStore_itemLevel'] * 12).round(2)

    daily_inventory = transactions.groupby(['DateKey', "ProductKey", "StoreKey"]).agg({
        "items_left": "last",
        'cases_ordered': "sum",
        "CostToStore_itemLevel": "first",
        "CostToStore_caseLevel": "first"
    }).reset_index()

    daily_inventory = daily_inventory.rename(columns={
        "items_left" : "#Available",
        "cases_ordered": "#CasesPurchasedToDate"
    })

    all_inventory_data.append(daily_inventory)

Processing Store 1...
Processing Store 2...
Processing Store 3...
Processing Store 4...


In [9]:
# Combine all stores' data
inventory_fact = pd.concat(all_inventory_data)

In [10]:
inventory_fact.head(20)

Unnamed: 0,DateKey,ProductKey,StoreKey,#Available,#CasesPurchasedToDate,CostToStore_itemLevel,CostToStore_caseLevel
0,1,1,1,72,84,1.92,23.04
1,1,2,1,66,126,1.38,16.56
2,1,3,1,73,77,3.07,36.84
3,1,4,1,73,77,3.07,36.84
4,1,5,1,71,91,2.3,27.6
5,1,6,1,67,119,1.54,18.48
6,1,7,1,66,126,8.92,107.04
7,1,8,1,66,126,3.84,46.08
8,1,9,1,70,98,2.76,33.12
9,1,10,1,69,105,3.69,44.28


In [11]:
# Filter out rows with missing keys
inventory_fact = inventory_fact.dropna(
    subset=['DateKey', 'ProductKey', 'StoreKey'])

In [12]:
# Convert keys to integers
inventory_fact['DateKey'] = inventory_fact['DateKey'].astype(int)
inventory_fact['ProductKey'] = inventory_fact['ProductKey'].astype(int)
inventory_fact['StoreKey'] = inventory_fact['StoreKey'].astype(int)

In [13]:
# Create the table in the database
curr = conn.cursor()

curr.execute("DROP TABLE IF EXISTS InventoryFact_DailyLevel")
curr.execute("""
    CREATE TABLE InventoryFact_DailyLevel (
        DateKey INT NOT NULL,
        ProductKey INT NOT NULL,
        StoreKey INT NOT NULL,
        "#Available" INT NOT NULL,
        "CostToStore_itemLevel" REAL NOT NULL,
        "CostToStore_caseLevel" REAL NOT NULL,
        "#CasesPurchasedToDate" INT NOT NULL,
        PRIMARY KEY (DateKey, ProductKey, StoreKey),
        FOREIGN KEY (DateKey) REFERENCES DateDimension(DateKey),
        FOREIGN KEY (ProductKey) REFERENCES ProductDimension(ProductKey),
        FOREIGN KEY (StoreKey) REFERENCES StoreDimension(StoreKey)
    );
""")

<sqlite3.Cursor at 0x24e1178b7c0>

In [14]:
inventory_fact.to_sql("InventoryFact_DailyLevel", conn,
                      if_exists='replace', index=False)

1469153

In [15]:
print("\ninventory fact table:")
inventory_df = pd.read_sql("SELECT * FROM InventoryFact_DailyLevel LIMIT 5", conn)
inventory_df


inventory fact table:


Unnamed: 0,DateKey,ProductKey,StoreKey,#Available,#CasesPurchasedToDate,CostToStore_itemLevel,CostToStore_caseLevel
0,1,1,1,72,84,1.92,23.04
1,1,2,1,66,126,1.38,16.56
2,1,3,1,73,77,3.07,36.84
3,1,4,1,73,77,3.07,36.84
4,1,5,1,71,91,2.3,27.6


In [16]:
curr.close()
conn.close()