#### Deliverables 8-11: Quarterly Inventory Snapshots Implementation

In [1]:
import pandas as pd
import sqlite3 as lite
from datetime import datetime

In [2]:
conn = lite.connect("store1.db")

In [3]:
inventory_daily = pd.read_sql("select * from InventoryFact_DailyLevel", conn)
sales_daily = pd.read_sql("select * from SalesFact_DailyLevel", conn)
date_dim = pd.read_sql("select DateKey, Date, Quarter, Year from DateDimension", conn)

In [4]:
inventory_daily.sample(5)

Unnamed: 0,DateKey,ProductKey,StoreKey,#Available,#CasesPurchasedToDate,CostToStore_itemLevel,CostToStore_caseLevel
49280,22,1317,1,32,6,16.93,203.16
3218575,312,150,4,72,10963,1.25,15.0
772522,339,407,1,59,6578,0.99,11.88
2100117,189,31,3,44,11704,1.13,13.56
1177449,150,738,2,56,5776,2.05,24.6


In [5]:
sales_daily.sample(5)

Unnamed: 0,DateKey,ProductKey,StoreKey,#SoldToday,SalesTotal,CostOfItemsSold,GrossProfit,DailyCustomerCount
134071,59,1580,1,18816,92951.04,65065.728,27885.312,15
3079062,251,359,4,130176,834428.16,584099.712,250328.448,23
1222717,170,279,2,110808,929679.12,650775.384,278903.736,27
312940,137,2318,1,59064,163607.28,114525.096,49082.184,22
2135208,204,871,3,171264,1806835.2,1264784.64,542050.56,32


In [6]:
date_dim.sample(5)

Unnamed: 0,DateKey,Date,Quarter,Year
222,223,2024-08-10 00:00:00,3,2024
248,249,2024-09-05 00:00:00,3,2024
353,354,2024-12-19 00:00:00,4,2024
158,159,2024-06-07 00:00:00,2,2024
64,65,2024-03-05 00:00:00,1,2024


In [7]:
# Merge date information with inventory date
inventory_with_dates = inventory_daily.merge(
    date_dim,
    on="DateKey",
    how='left'
)

inventory_with_dates.sample(5)

Unnamed: 0,DateKey,ProductKey,StoreKey,#Available,#CasesPurchasedToDate,CostToStore_itemLevel,CostToStore_caseLevel,Date,Quarter,Year
1713873,18,2266,3,28,1107,1.67,20.04,2024-01-18 00:00:00,1,2024
2382887,312,1818,3,37,27183,2.48,29.76,2024-11-07 00:00:00,4,2024
2791474,125,1245,4,65,4767,1.22,14.64,2024-05-04 00:00:00,2,2024
1828255,70,100,3,31,56,14.27,171.24,2024-03-10 00:00:00,1,2024
1297468,202,1881,2,42,8589,1.81,21.72,2024-07-20 00:00:00,3,2024


In [8]:
# merge date information with sales date
sales_with_dates = sales_daily.merge(
    date_dim,
    on="DateKey",
    how='left'
)

sales_with_dates.sample(5)

Unnamed: 0,DateKey,ProductKey,StoreKey,#SoldToday,SalesTotal,CostOfItemsSold,GrossProfit,DailyCustomerCount,Date,Quarter,Year
1069657,103,353,2,27048,62210.4,43547.28,18663.12,23,2024-04-12 00:00:00,2,2024
418938,184,908,1,45024,148128.96,103690.272,44438.688,14,2024-07-02 00:00:00,3,2024
782868,343,1654,1,91620,553384.8,387369.36,166015.44,15,2024-12-08 00:00:00,4,2024
2304118,278,723,3,214020,716967.0,501876.9,215090.1,29,2024-10-04 00:00:00,4,2024
2937061,189,336,4,86940,352976.4,247083.48,105892.92,21,2024-07-07 00:00:00,3,2024


#### Defining quarters

In [9]:
current_year = datetime.now().year - 1
quarters = [1, 2, 3, 4]

quarterly_snapshots = []

In [10]:
current_year

2024

In [11]:
for quarter in quarters:
    print(f"Processing Q{quarter} {current_year}...")

    # Filter for the quarter
    quarter_mask = (inventory_with_dates['Quarter'] == quarter) & (
        inventory_with_dates['Year'] == current_year)
    current_quarter_date = inventory_with_dates[quarter_mask]

    last_day_of_quarter = current_quarter_date['Date'].max()
    snapshot_date = current_quarter_date[current_quarter_date['Date']
                                         == last_day_of_quarter]

    # calculate quarterly metrics
    quarterly_metrics = snapshot_date.groupby(['ProductKey', "StoreKey", "Quarter", "Year"]).agg({
        "#Available": "last",
        '#CasesPurchasedToDate': 'last',
        'CostToStore_caseLevel': 'first'
    }).reset_index()

    # calculate cases purchased this quarter
    if quarter == 1:
        quarterly_metrics['#CasesPurchasedThisQuarter'] = quarterly_metrics['#CasesPurchasedToDate']
    else:
        prev_quarter_end = inventory_with_dates[
            (inventory_with_dates['Quarter'] == quarter-1) &
            (inventory_with_dates['Year'] == current_year)]['Date'].max()

        prev_quarter_data = inventory_with_dates[
            (inventory_with_dates['Date'] == prev_quarter_end)]

        prev_cases = prev_quarter_data.groupby(['ProductKey', 'StoreKey'])[
            '#CasesPurchasedToDate'].last().reset_index()

        quarterly_metrics = quarterly_metrics.merge(
            prev_cases,
            on=['ProductKey', 'StoreKey'],
            how='left',
            suffixes=('', '_prev')
        )

        quarterly_metrics['#CasesPurchasedThisQuarter'] = (
            quarterly_metrics['#CasesPurchasedToDate'] -
            quarterly_metrics['#CasesPurchasedToDate_prev']).round(2)

        quarterly_metrics.drop(
            '#CasesPurchasedToDate_prev', axis=1, inplace=True)

    # Calculate cases on hand (convert items to cases)
    quarterly_metrics['#CasesOnHand'] = quarterly_metrics['#Available'] // 12

    # Calculate sales metrics for the quarter
    quarter_sales = sales_with_dates[
        (sales_with_dates['Quarter'] == quarter) &
        (sales_with_dates['Year'] == current_year)]

    sales_metrics = quarter_sales.groupby(['ProductKey', 'StoreKey']).agg({
        'SalesTotal': 'sum',
        'CostOfItemsSold': 'sum',
        '#SoldToday': 'sum'
    }).reset_index()

    # Format monetary values to 2 decimal places
    sales_metrics['SalesTotal'] = sales_metrics['SalesTotal'].round(2)
    sales_metrics['CostOfItemsSold'] = sales_metrics['CostOfItemsSold'].round(
        2)

    # Merge sales metrics with inventory metrics
    quarterly_metrics = quarterly_metrics.merge(
        sales_metrics,
        on=['ProductKey', 'StoreKey'],
        how='left'
    )

    if quarter > 1:
        ytd_sales = sales_with_dates[
            (sales_with_dates['Quarter'] <= quarter) &
            (sales_with_dates['Year'] == current_year)]

        ytd_metrics = ytd_sales.groupby(['ProductKey', 'StoreKey']).agg({
            'SalesTotal': 'sum',
            'CostOfItemsSold': 'sum'
        }).reset_index()

        # Format YTD values to 2 decimal places
        ytd_metrics['SalesTotal'] = ytd_metrics['SalesTotal'].round(2)
        ytd_metrics['CostOfItemsSold'] = ytd_metrics['CostOfItemsSold'].round(
            2)

        quarterly_metrics = quarterly_metrics.merge(
            ytd_metrics,
            on=['ProductKey', 'StoreKey'],
            how='left',
            suffixes=('', '_YTD')
        )
    else:
        quarterly_metrics['SalesTotal_YTD'] = quarterly_metrics['SalesTotal'].round(
            2)
        quarterly_metrics['CostOfItemsSold_YTD'] = quarterly_metrics['CostOfItemsSold'].round(
            2)

    # Rename columns to match required schema
    quarterly_metrics = quarterly_metrics.rename(columns={
        'Quarter': 'Quarter',
        'Year': 'Year',
        '#SoldToday': 'TotalSoldByStoreThisQuarter',
        'CostOfItemsSold': 'TotalCostToStoreThisQuarter',
        'SalesTotal_YTD': 'TotalSoldByStoreYTD',
        'CostOfItemsSold_YTD': 'TotalCostToStoreYTD'
    })

    # Ensure all monetary values have exactly 2 decimal places
    monetary_columns = [
        'TotalCostToStoreThisQuarter',
        'TotalSoldByStoreThisQuarter',
        'TotalCostToStoreYTD',
        'TotalSoldByStoreYTD'
    ]

    for col in monetary_columns:
        quarterly_metrics[col] = quarterly_metrics[col].round(2)

    # Add quarter-year string
    quarterly_metrics['Quarter and Year'] = f'Q{quarter} {current_year}'

    final_columns = [
        'ProductKey', 'StoreKey', 'Quarter and Year', 'Quarter', 'Year',
        '#CasesPurchasedToDate', '#CasesPurchasedThisQuarter', '#CasesOnHand',
        'TotalCostToStoreThisQuarter', 'TotalSoldByStoreThisQuarter',
        'TotalCostToStoreYTD', 'TotalSoldByStoreYTD'
    ]

    quarterly_snapshots.append(quarterly_metrics[final_columns])

Processing Q1 2024...
Processing Q2 2024...
Processing Q3 2024...
Processing Q4 2024...


In [12]:
# Combine all quarterly snapshots
all_quarterly_snapshots = pd.concat(quarterly_snapshots)
all_quarterly_snapshots.head(5)

Unnamed: 0,ProductKey,StoreKey,Quarter and Year,Quarter,Year,#CasesPurchasedToDate,#CasesPurchasedThisQuarter,#CasesOnHand,TotalCostToStoreThisQuarter,TotalSoldByStoreThisQuarter,TotalCostToStoreYTD,TotalSoldByStoreYTD
0,1,1,Q1 2024,1,2024,3082,3082.0,1,2578022.16,1344120,2578022.16,3682888.8
1,1,2,Q1 2024,1,2024,6878,6878.0,0,4465480.32,2444160,4465480.32,6379257.6
2,1,3,Q1 2024,1,2024,6270,6270.0,3,6590639.33,3148896,6590639.33,9415199.04
3,1,4,Q1 2024,1,2024,2934,2934.0,4,3596937.12,1931760,3596937.12,5138481.6
4,2,1,Q1 2024,1,2024,1820,1820.0,2,1744324.68,1264920,1744324.68,2491892.4


In [13]:
all_quarterly_snapshots.shape

(36133, 12)

In [14]:
curr = conn.cursor()

curr.execute("Drop table if exists InventoryFact_QuarterlySnapshot")
curr.execute("""
    CREATE TABLE InventoryFact_QuarterlySnapshot (
        ProductKey INT NOT NULL,
        StoreKey INT NOT NULL,
        "Quarter and Year" TEXT NOT NULL,
        Quarter INT NOT NULL,
        Year INT NOT NULL,
        "#CasesPurchasedToDate" INT NOT NULL,
        "#CasesPurchasedThisQuarter" INT NOT NULL,
        "#CasesOnHand" INT NOT NULL,
        "TotalCostToStoreThisQuarter" REAL NOT NULL,
        "TotalSoldByStoreThisQuarter" REAL NOT NULL,
        "TotalCostToStoreYTD" REAL NOT NULL,
        "TotalSoldByStoreYTD" REAL NOT NULL,
        PRIMARY KEY (ProductKey, StoreKey, Quarter, Year),
        FOREIGN KEY (ProductKey) REFERENCES ProductDimension(ProductKey),
        FOREIGN KEY (StoreKey) REFERENCES StoreDimension(StoreKey)
    );
""")

<sqlite3.Cursor at 0x227e2df60c0>

In [15]:
all_quarterly_snapshots.to_sql("InventoryFact_QuarterlySnapshot", conn, if_exists='replace', index=False)

36133

In [16]:
print("\nSample data from quarterly inventory snapshots:")
inventory_df = pd.read_sql("SELECT * FROM InventoryFact_QuarterlySnapshot LIMIT 5", conn)
inventory_df


Sample data from quarterly inventory snapshots:


Unnamed: 0,ProductKey,StoreKey,Quarter and Year,Quarter,Year,#CasesPurchasedToDate,#CasesPurchasedThisQuarter,#CasesOnHand,TotalCostToStoreThisQuarter,TotalSoldByStoreThisQuarter,TotalCostToStoreYTD,TotalSoldByStoreYTD
0,1,1,Q1 2024,1,2024,3082,3082.0,1,2578022.16,1344120,2578022.16,3682888.8
1,1,2,Q1 2024,1,2024,6878,6878.0,0,4465480.32,2444160,4465480.32,6379257.6
2,1,3,Q1 2024,1,2024,6270,6270.0,3,6590639.33,3148896,6590639.33,9415199.04
3,1,4,Q1 2024,1,2024,2934,2934.0,4,3596937.12,1931760,3596937.12,5138481.6
4,2,1,Q1 2024,1,2024,1820,1820.0,2,1744324.68,1264920,1744324.68,2491892.4


In [17]:
curr.close()
conn.close()

In [30]:
for i in quarters:
    quarter_data = all_quarterly_snapshots[all_quarterly_snapshots['Quarter'] == i]
    print(f"Quarter: {i}")
    quarter_data.to_csv(f"InventoryFact_Quarter_{i}.csv")

Quarter: 1
Quarter: 2
Quarter: 3
Quarter: 4


In [22]:
all_quarterly_snapshots[all_quarterly_snapshots['Quarter'] == 2].shape

(9134, 12)

In [23]:
all_quarterly_snapshots[all_quarterly_snapshots['Quarter'] == 3].shape

(9143, 12)

In [24]:
all_quarterly_snapshots[all_quarterly_snapshots['Quarter'] == 4].shape

(8680, 12)

In [18]:
all_quarterly_snapshots.to_csv("InventoryFact_QuarterlySnapshot.csv", index=False)