#### Deliverables 8-11: Quarterly Inventory Snapshots Implementation

In [1]:
import pandas as pd
import sqlite3 as lite
from datetime import datetime

In [2]:
conn = lite.connect("store1.db")
output_file_path = "./output/"

In [3]:
inventory_daily = pd.read_sql("select * from InventoryFact_DailyLevel", conn)
sales_daily = pd.read_sql("select * from SalesFact_DailyLevel", conn)
date_dim = pd.read_sql("select DateKey, Date, Quarter, Year from DateDimension", conn)

In [4]:
inventory_daily.sample(5)

Unnamed: 0,DateKey,ProductKey,StoreKey,#Available,#CasesPurchasedToDate,CostToStore_itemLevel,CostToStore_caseLevel
2646026,216,223,4,62,8866,0.89,10.68
401541,200,474,1,33,5346,1.69,20.28
1487995,7,3,3,21,408,3.35,40.2
1144544,202,1477,2,23,11928,0.38,4.56
2321584,55,1089,4,66,2856,0.69,8.28


In [5]:
sales_daily.sample(5)

Unnamed: 0,DateKey,ProductKey,StoreKey,#SoldToday,SalesTotal,CostOfItemsSold,GrossProfit,DailyCustomerCount
1100480,180,1804,2,114192,398530.08,278971.06,119559.02,26
2884851,334,560,4,194724,810051.84,567036.29,243015.55,27
1577276,51,544,3,32292,205054.2,143537.94,61516.26,23
1170501,215,1140,2,151728,132003.36,92402.35,39601.01,29
1104001,182,1284,2,480,446.4,312.48,133.92,2


In [6]:
date_dim.sample(5)

Unnamed: 0,DateKey,Date,Quarter,Year
19,20,2024-01-20,1,2024
302,303,2024-10-29,4,2024
153,154,2024-06-02,2,2024
275,276,2024-10-02,4,2024
262,263,2024-09-19,3,2024


In [7]:
# Merge date information with inventory date
inventory_with_dates = inventory_daily.merge(
    date_dim,
    on="DateKey",
    how='left'
)

inventory_with_dates.sample(5)

Unnamed: 0,DateKey,ProductKey,StoreKey,#Available,#CasesPurchasedToDate,CostToStore_itemLevel,CostToStore_caseLevel,Date,Quarter,Year
2706743,246,292,4,31,5759,2.05,24.6,2024-09-02,3,2024
1072951,167,481,2,30,8575,1.61,19.32,2024-06-15,2,2024
652023,324,1082,1,42,7939,1.39,16.68,2024-11-19,4,2024
1008124,135,213,2,30,6528,5.87,70.44,2024-05-14,2,2024
2088757,305,2004,3,42,21111,2.18,26.16,2024-10-31,4,2024


In [8]:
# merge date information with sales date
sales_with_dates = sales_daily.merge(
    date_dim,
    on="DateKey",
    how='left'
)

sales_with_dates.sample(5)

Unnamed: 0,DateKey,ProductKey,StoreKey,#SoldToday,SalesTotal,CostOfItemsSold,GrossProfit,DailyCustomerCount,Date,Quarter,Year
2816115,300,551,4,195228,1001519.64,701063.75,300455.89,29,2024-10-26,4,2024
2004326,264,196,3,155496,324986.64,227490.65,97495.99,22,2024-09-20,3,2024
824804,44,439,2,27456,123552.0,86486.4,37065.6,26,2024-02-13,1,2024
1227458,243,1664,2,184512,579367.68,405557.38,173810.3,29,2024-08-30,3,2024
1690768,108,1298,3,89652,123719.76,86603.83,37115.93,30,2024-04-17,2,2024


#### Defining quarters

In [9]:
current_year = datetime.now().year - 1
quarters = [1, 2, 3, 4]

quarterly_snapshots = []

In [10]:
current_year

2024

In [11]:
for quarter in quarters:
    print(f"Processing Q{quarter} {current_year}...")

    # Filter for the quarter
    quarter_mask = (inventory_with_dates['Quarter'] == quarter) & (
        inventory_with_dates['Year'] == current_year)
    current_quarter_date = inventory_with_dates[quarter_mask]

    last_day_of_quarter = current_quarter_date['Date'].max()
    snapshot_date = current_quarter_date[current_quarter_date['Date']
                                         == last_day_of_quarter]

    # calculate quarterly metrics
    quarterly_metrics = snapshot_date.groupby(['ProductKey', "StoreKey", "Quarter", "Year"]).agg({
        "#Available": "last",
        '#CasesPurchasedToDate': 'last',
        'CostToStore_caseLevel': 'first'
    }).reset_index()

    # calculate cases purchased this quarter
    if quarter == 1:
        quarterly_metrics['#CasesPurchasedThisQuarter'] = quarterly_metrics['#CasesPurchasedToDate']
    else:
        prev_quarter_end = inventory_with_dates[
            (inventory_with_dates['Quarter'] == quarter-1) &
            (inventory_with_dates['Year'] == current_year)]['Date'].max()

        prev_quarter_data = inventory_with_dates[
            (inventory_with_dates['Date'] == prev_quarter_end)]

        prev_cases = prev_quarter_data.groupby(['ProductKey', 'StoreKey'])[
            '#CasesPurchasedToDate'].last().reset_index()

        quarterly_metrics = quarterly_metrics.merge(
            prev_cases,
            on=['ProductKey', 'StoreKey'],
            how='left',
            suffixes=('', '_prev')
        )

        quarterly_metrics['#CasesPurchasedThisQuarter'] = (
            quarterly_metrics['#CasesPurchasedToDate'] -
            quarterly_metrics['#CasesPurchasedToDate_prev']).round(2)

        quarterly_metrics.drop(
            '#CasesPurchasedToDate_prev', axis=1, inplace=True)

    # Calculate cases on hand (convert items to cases)
    quarterly_metrics['#CasesOnHand'] = quarterly_metrics['#Available'] // 12

    # Calculate sales metrics for the quarter
    quarter_sales = sales_with_dates[
        (sales_with_dates['Quarter'] == quarter) &
        (sales_with_dates['Year'] == current_year)]

    sales_metrics = quarter_sales.groupby(['ProductKey', 'StoreKey']).agg({
        'SalesTotal': 'sum',
        'CostOfItemsSold': 'sum',
        '#SoldToday': 'sum'
    }).reset_index()

    # Format monetary values to 2 decimal places
    sales_metrics['SalesTotal'] = sales_metrics['SalesTotal'].round(2)
    sales_metrics['CostOfItemsSold'] = sales_metrics['CostOfItemsSold'].round(
        2)

    # Merge sales metrics with inventory metrics
    quarterly_metrics = quarterly_metrics.merge(
        sales_metrics,
        on=['ProductKey', 'StoreKey'],
        how='left'
    )

    if quarter > 1:
        ytd_sales = sales_with_dates[
            (sales_with_dates['Quarter'] <= quarter) &
            (sales_with_dates['Year'] == current_year)]

        ytd_metrics = ytd_sales.groupby(['ProductKey', 'StoreKey']).agg({
            'SalesTotal': 'sum',
            'CostOfItemsSold': 'sum'
        }).reset_index()

        # Format YTD values to 2 decimal places
        ytd_metrics['SalesTotal'] = ytd_metrics['SalesTotal'].round(2)
        ytd_metrics['CostOfItemsSold'] = ytd_metrics['CostOfItemsSold'].round(
            2)

        quarterly_metrics = quarterly_metrics.merge(
            ytd_metrics,
            on=['ProductKey', 'StoreKey'],
            how='left',
            suffixes=('', '_YTD')
        )
    else:
        quarterly_metrics['SalesTotal_YTD'] = quarterly_metrics['SalesTotal'].round(
            2)
        quarterly_metrics['CostOfItemsSold_YTD'] = quarterly_metrics['CostOfItemsSold'].round(
            2)

    # Rename columns to match required schema
    quarterly_metrics = quarterly_metrics.rename(columns={
        'Quarter': 'Quarter',
        'Year': 'Year',
        '#SoldToday': 'TotalSoldByStoreThisQuarter',
        'CostOfItemsSold': 'TotalCostToStoreThisQuarter',
        'SalesTotal_YTD': 'TotalSoldByStoreYTD',
        'CostOfItemsSold_YTD': 'TotalCostToStoreYTD'
    })

    # Ensure all monetary values have exactly 2 decimal places
    monetary_columns = [
        'TotalCostToStoreThisQuarter',
        'TotalSoldByStoreThisQuarter',
        'TotalCostToStoreYTD',
        'TotalSoldByStoreYTD'
    ]

    for col in monetary_columns:
        quarterly_metrics[col] = quarterly_metrics[col].round(2)

    # Add quarter-year string
    quarterly_metrics['Quarter and Year'] = f'Q{quarter} {current_year}'

    final_columns = [
        'ProductKey', 'StoreKey', 'Quarter and Year', 'Quarter', 'Year',
        '#CasesPurchasedToDate', '#CasesPurchasedThisQuarter', '#CasesOnHand',
        'TotalCostToStoreThisQuarter', 'TotalSoldByStoreThisQuarter',
        'TotalCostToStoreYTD', 'TotalSoldByStoreYTD'
    ]

    quarterly_snapshots.append(quarterly_metrics[final_columns])

Processing Q1 2024...
Processing Q2 2024...
Processing Q3 2024...
Processing Q4 2024...


In [12]:
# Combine all quarterly snapshots
all_quarterly_snapshots = pd.concat(quarterly_snapshots)
all_quarterly_snapshots.head(5)

Unnamed: 0,ProductKey,StoreKey,Quarter and Year,Quarter,Year,#CasesPurchasedToDate,#CasesPurchasedThisQuarter,#CasesOnHand,TotalCostToStoreThisQuarter,TotalSoldByStoreThisQuarter,TotalCostToStoreYTD,TotalSoldByStoreYTD
0,1,1,Q1 2024,1,2024,3082,3082.0,1,2578022.18,1344120,2578022.18,3682888.8
1,1,2,Q1 2024,1,2024,6878,6878.0,0,4465480.33,2444160,4465480.33,6379257.6
2,1,3,Q1 2024,1,2024,6270,6270.0,3,6590639.35,3148896,6590639.35,9415199.04
3,1,4,Q1 2024,1,2024,2934,2934.0,4,3596937.16,1931760,3596937.16,5138481.6
4,2,1,Q1 2024,1,2024,1820,1820.0,2,1744324.67,1264920,1744324.67,2491892.4


In [13]:
all_quarterly_snapshots.shape

(31855, 12)

In [14]:
curr = conn.cursor()

curr.execute("Drop table if exists InventoryFact_QuarterlySnapshot")
curr.execute("""
    CREATE TABLE InventoryFact_QuarterlySnapshot (
        ProductKey INT NOT NULL,
        StoreKey INT NOT NULL,
        "Quarter and Year" TEXT NOT NULL,
        Quarter INT NOT NULL,
        Year INT NOT NULL,
        "#CasesPurchasedToDate" INT NOT NULL,
        "#CasesPurchasedThisQuarter" INT NOT NULL,
        "#CasesOnHand" INT NOT NULL,
        "TotalCostToStoreThisQuarter" REAL NOT NULL,
        "TotalSoldByStoreThisQuarter" REAL NOT NULL,
        "TotalCostToStoreYTD" REAL NOT NULL,
        "TotalSoldByStoreYTD" REAL NOT NULL,
        PRIMARY KEY (ProductKey, StoreKey, Quarter, Year),
        FOREIGN KEY (ProductKey) REFERENCES ProductDimension(ProductKey),
        FOREIGN KEY (StoreKey) REFERENCES StoreDimension(StoreKey)
    );
""")

<sqlite3.Cursor at 0x1d5e14c2440>

In [15]:
all_quarterly_snapshots.to_sql("InventoryFact_QuarterlySnapshot", conn, if_exists='replace', index=False)

31855

In [16]:
print("\nSample data from quarterly inventory snapshots:")
inventory_df = pd.read_sql("SELECT * FROM InventoryFact_QuarterlySnapshot LIMIT 5", conn)
inventory_df


Sample data from quarterly inventory snapshots:


Unnamed: 0,ProductKey,StoreKey,Quarter and Year,Quarter,Year,#CasesPurchasedToDate,#CasesPurchasedThisQuarter,#CasesOnHand,TotalCostToStoreThisQuarter,TotalSoldByStoreThisQuarter,TotalCostToStoreYTD,TotalSoldByStoreYTD
0,1,1,Q1 2024,1,2024,3082,3082.0,1,2578022.18,1344120,2578022.18,3682888.8
1,1,2,Q1 2024,1,2024,6878,6878.0,0,4465480.33,2444160,4465480.33,6379257.6
2,1,3,Q1 2024,1,2024,6270,6270.0,3,6590639.35,3148896,6590639.35,9415199.04
3,1,4,Q1 2024,1,2024,2934,2934.0,4,3596937.16,1931760,3596937.16,5138481.6
4,2,1,Q1 2024,1,2024,1820,1820.0,2,1744324.67,1264920,1744324.67,2491892.4


In [17]:
curr.close()
conn.close()

In [19]:
for i in quarters:
    quarter_data = all_quarterly_snapshots[all_quarterly_snapshots['Quarter'] == i]
    print(f"Quarter: {i}")
    quarter_data.to_csv(f"{output_file_path}InventoryFact_Quarter_{i}.csv")

Quarter: 1
Quarter: 2
Quarter: 3
Quarter: 4


In [20]:
all_quarterly_snapshots[all_quarterly_snapshots['Quarter'] == 2].shape

(8058, 12)

In [21]:
all_quarterly_snapshots[all_quarterly_snapshots['Quarter'] == 3].shape

(8067, 12)

In [22]:
all_quarterly_snapshots[all_quarterly_snapshots['Quarter'] == 4].shape

(7630, 12)

In [24]:
all_quarterly_snapshots.to_csv(f"{output_file_path}InventoryFact_QuarterlySnapshot.csv", index=False)