#### Deliverables 8-11: Quarterly Inventory Snapshots Implementation

In [24]:
import pandas as pd
import sqlite3 as lite
from datetime import datetime

In [25]:
conn = lite.connect("store1.db")
output_file_path = "./output/"

In [26]:
inventory_daily = pd.read_sql("select * from InventoryFact_DailyLevel", conn)
sales_daily = pd.read_sql("select * from SalesFact_DailyLevel", conn)
date_dim = pd.read_sql("select DateKey, Date, Quarter, Year from DateDimension", conn)

In [27]:
inventory_daily.head(5)

Unnamed: 0,DateKey,ProductKey,StoreKey,#Available,#CasesPurchasedToDate,CostToStore_itemLevel,CostToStore_caseLevel
0,1,1,1,65,133,1.92,23.04
1,1,2,1,73,77,1.38,16.56
2,1,3,1,71,91,3.07,36.84
3,1,4,1,64,140,3.07,36.84
4,1,5,1,76,56,2.3,27.6


In [28]:
sales_daily.head(5)

Unnamed: 0,DateKey,ProductKey,StoreKey,#SoldToday,SalesTotal,CostOfItemsSold,GrossProfit,DailyCustomerCount
0,1,1,1,1596,4373.04,3061.13,1311.91,19
1,1,2,1,924,1820.28,1274.2,546.08,11
2,1,3,1,1092,4793.88,3355.72,1438.16,12
3,1,4,1,1680,7375.2,5162.64,2212.56,20
4,1,5,1,672,2210.88,1547.62,663.26,8


In [29]:
date_dim.head(5)

Unnamed: 0,DateKey,Date,Quarter,Year
0,1,2024-01-01,1,2024
1,2,2024-01-02,1,2024
2,3,2024-01-03,1,2024
3,4,2024-01-04,1,2024
4,5,2024-01-05,1,2024


In [30]:
# Merge date information with inventory date
inventory_with_dates = inventory_daily.merge(
    date_dim,
    on="DateKey",
    how='left'
)

inventory_with_dates.sample(5)

Unnamed: 0,DateKey,ProductKey,StoreKey,#Available,#CasesPurchasedToDate,CostToStore_itemLevel,CostToStore_caseLevel,Date,Quarter,Year
174620,87,1322,1,59,1935,4.22,50.64,2024-03-27,1,2024
2624505,208,885,3,48,12250,2.93,35.16,2024-07-26,3,2024
748538,372,1116,1,42,8720,0.69,8.28,2025-01-06,1,2025
1147009,23,541,2,28,28,8.08,96.96,2024-01-23,1,2024
3066266,427,550,3,32,35424,2.68,32.16,2025-03-02,1,2025


In [31]:
# merge date information with sales date
sales_with_dates = sales_daily.merge(
    date_dim,
    on="DateKey",
    how='left'
)

sales_with_dates.sample(5)

Unnamed: 0,DateKey,ProductKey,StoreKey,#SoldToday,SalesTotal,CostOfItemsSold,GrossProfit,DailyCustomerCount,Date,Quarter,Year
3392338,41,1474,4,20160,42940.8,30058.56,12882.24,21,2024-02-10,1,2024
2402964,98,1560,3,89088,302899.2,212029.44,90869.76,32,2024-04-07,2,2024
4003376,344,402,4,163680,278256.0,194779.2,83476.8,22,2024-12-09,4,2024
1367528,132,1222,2,360,637.2,446.04,191.16,2,2024-05-11,2,2024
201214,100,1698,1,26208,25683.84,17978.69,7705.15,14,2024-04-09,2,2024


#### Defining quarters

In [32]:
# i want to create quarterly snapshot for all the year in database

current_years = [2024, 2025]

quarters = [1, 2, 3, 4]

quarterly_snapshots = []

In [33]:
current_years

[2024, 2025]

In [34]:
current_years = [2024, 2025]
quarters = [1, 2, 3, 4]
quarterly_snapshots = []

for year in current_years:
    for quarter in quarters:
        print(f"Processing Q{quarter} {year}...")

        # Filter for the quarter
        quarter_mask = (inventory_with_dates['Quarter'] == quarter) & (
            inventory_with_dates['Year'] == year)
        current_quarter_date = inventory_with_dates[quarter_mask]

        # Handle case where quarter might be empty
        if current_quarter_date.empty:
            print(f"No data found for Q{quarter} {year}")
            continue

        last_day_of_quarter = current_quarter_date['Date'].max()
        snapshot_date = current_quarter_date[current_quarter_date['Date']
                                             == last_day_of_quarter]

        # Calculate quarterly metrics
        quarterly_metrics = snapshot_date.groupby(['ProductKey', "StoreKey", "Quarter", "Year"]).agg({
            "#Available": "last",
            '#CasesPurchasedToDate': 'last',
            'CostToStore_caseLevel': 'first'
        }).reset_index()

        # Calculate cases purchased this quarter
        if quarter == 1:
            quarterly_metrics['#CasesPurchasedThisQuarter'] = quarterly_metrics['#CasesPurchasedToDate']
        else:
            prev_quarter_end = inventory_with_dates[
                (inventory_with_dates['Quarter'] == quarter-1) &
                (inventory_with_dates['Year'] == year)
            ]['Date'].max()

            prev_quarter_data = inventory_with_dates[
                (inventory_with_dates['Date'] == prev_quarter_end)
            ]

            prev_cases = prev_quarter_data.groupby(['ProductKey', 'StoreKey'])[
                '#CasesPurchasedToDate'].last().reset_index()

            quarterly_metrics = quarterly_metrics.merge(
                prev_cases,
                on=['ProductKey', 'StoreKey'],
                how='left',
                suffixes=('', '_prev')
            )

            quarterly_metrics['#CasesPurchasedThisQuarter'] = (
                quarterly_metrics['#CasesPurchasedToDate'] -
                quarterly_metrics['#CasesPurchasedToDate_prev']
            ).fillna(0).round(2)  # Added fillna(0) to handle missing values

            quarterly_metrics.drop(
                '#CasesPurchasedToDate_prev', axis=1, inplace=True)

        # Calculate cases on hand (convert items to cases)
        quarterly_metrics['#CasesOnHand'] = (
            quarterly_metrics['#Available'] / 12).round(2)

        # Calculate sales metrics for the quarter
        quarter_sales = sales_with_dates[
            (sales_with_dates['Quarter'] == quarter) &
            (sales_with_dates['Year'] == year)
        ]

        sales_metrics = quarter_sales.groupby(['ProductKey', 'StoreKey']).agg({
            'SalesTotal': 'sum',
            'CostOfItemsSold': 'sum',
            '#SoldToday': 'sum'
        }).reset_index()

        # Format monetary values to 2 decimal places
        sales_metrics['SalesTotal'] = sales_metrics['SalesTotal'].round(2)
        sales_metrics['CostOfItemsSold'] = sales_metrics['CostOfItemsSold'].round(
            2)

        # Merge sales metrics with inventory metrics
        quarterly_metrics = quarterly_metrics.merge(
            sales_metrics,
            on=['ProductKey', 'StoreKey'],
            how='left'
        ).fillna(0)  # Fill NA values with 0 for cases with no sales

        # Calculate YTD metrics
        ytd_sales = sales_with_dates[
            (sales_with_dates['Quarter'] <= quarter) &
            (sales_with_dates['Year'] == year)
        ]

        ytd_metrics = ytd_sales.groupby(['ProductKey', 'StoreKey']).agg({
            'SalesTotal': 'sum',
            'CostOfItemsSold': 'sum'
        }).reset_index()

        # Format YTD values to 2 decimal places
        ytd_metrics['SalesTotal'] = ytd_metrics['SalesTotal'].round(2)
        ytd_metrics['CostOfItemsSold'] = ytd_metrics['CostOfItemsSold'].round(
            2)

        quarterly_metrics = quarterly_metrics.merge(
            ytd_metrics,
            on=['ProductKey', 'StoreKey'],
            how='left',
            suffixes=('', '_YTD')
        ).fillna(0)  # Fill NA values with 0 for cases with no YTD sales

        # Rename columns to match required schema
        quarterly_metrics = quarterly_metrics.rename(columns={
            '#SoldToday': 'TotalSoldByStoreThisQuarter',
            'CostOfItemsSold': 'TotalCostToStoreThisQuarter',
            'SalesTotal_YTD': 'TotalSoldByStoreYTD',
            'CostOfItemsSold_YTD': 'TotalCostToStoreYTD'
        })

        # Add quarter-year string
        quarterly_metrics['Quarter and Year'] = f'Q{quarter} {year}'

        final_columns = [
            'ProductKey', 'StoreKey', 'Quarter and Year', 'Quarter', 'Year',
            '#CasesPurchasedToDate', '#CasesPurchasedThisQuarter', '#CasesOnHand',
            'TotalCostToStoreThisQuarter', 'TotalSoldByStoreThisQuarter',
            'TotalCostToStoreYTD', 'TotalSoldByStoreYTD'
        ]

        # Ensure all required columns are present
        for col in final_columns:
            if col not in quarterly_metrics.columns:
                quarterly_metrics[col] = 0  # Initialize missing columns with 0

        quarterly_snapshots.append(quarterly_metrics[final_columns])

# Concatenate all quarterly snapshots if needed
if quarterly_snapshots:
    final_result = pd.concat(quarterly_snapshots, ignore_index=True)
else:
    final_result = pd.DataFrame(columns=final_columns)

Processing Q1 2024...
Processing Q2 2024...
Processing Q3 2024...
Processing Q4 2024...
Processing Q1 2025...
Processing Q2 2025...
Processing Q3 2025...
No data found for Q3 2025
Processing Q4 2025...
No data found for Q4 2025


In [35]:
# Combine all quarterly snapshots
all_quarterly_snapshots = pd.concat(quarterly_snapshots)
all_quarterly_snapshots.head(5)

Unnamed: 0,ProductKey,StoreKey,Quarter and Year,Quarter,Year,#CasesPurchasedToDate,#CasesPurchasedThisQuarter,#CasesOnHand,TotalCostToStoreThisQuarter,TotalSoldByStoreThisQuarter,TotalCostToStoreYTD,TotalSoldByStoreYTD
0,1,1,Q1 2024,1,2024,2565,2565.0,4.0,2576065.79,1343100,2576065.79,3680094.0
1,1,2,Q1 2024,1,2024,4393,4393.0,4.08,4833145.76,2645400,4833145.76,6904494.0
2,1,3,Q1 2024,1,2024,6235,6235.0,2.58,7090924.94,3387924,7090924.94,10129892.76
3,1,4,Q1 2024,1,2024,2771,2771.0,3.92,3619772.73,1944024,3619772.73,5171103.84
4,2,1,Q1 2024,1,2024,3216,3216.0,4.25,1819088.57,1319136,1819088.57,2598697.92


In [36]:
all_quarterly_snapshots.shape

(48511, 12)

In [37]:
all_quarterly_snapshots[all_quarterly_snapshots['Quarter and Year'] == 'Q1 2025'].head(5)

Unnamed: 0,ProductKey,StoreKey,Quarter and Year,Quarter,Year,#CasesPurchasedToDate,#CasesPurchasedThisQuarter,#CasesOnHand,TotalCostToStoreThisQuarter,TotalSoldByStoreThisQuarter,TotalCostToStoreYTD,TotalSoldByStoreYTD
0,1,1,Q1 2025,1,2025,11898,11898.0,1.25,21221810.72,11064552,21221810.72,30316872.48
1,1,2,Q1 2025,1,2025,17499,17499.0,1.33,39374342.01,21551364,39374342.01,56249060.04
2,1,3,Q1 2025,1,2025,38554,38554.0,0.0,57756201.05,27594936,57756201.05,82508858.64
3,1,4,Q1 2025,1,2025,21476,21476.0,2.25,33466932.55,17973648,33466932.55,47809903.68
4,2,1,Q1 2025,1,2025,7348,7348.0,3.17,15316282.67,11106804,15316282.67,21880403.88


In [38]:
curr = conn.cursor()

curr.execute("Drop table if exists InventoryFact_QuarterlySnapshot")
curr.execute("""
    CREATE TABLE InventoryFact_QuarterlySnapshot (
        ProductKey INT NOT NULL,
        StoreKey INT NOT NULL,
        "Quarter and Year" TEXT NOT NULL,
        Quarter INT NOT NULL,
        Year INT NOT NULL,
        "#CasesPurchasedToDate" INT NOT NULL,
        "#CasesPurchasedThisQuarter" INT NOT NULL,
        "#CasesOnHand" INT NOT NULL,
        "TotalCostToStoreThisQuarter" REAL NOT NULL,
        "TotalSoldByStoreThisQuarter" REAL NOT NULL,
        "TotalCostToStoreYTD" REAL NOT NULL,
        "TotalSoldByStoreYTD" REAL NOT NULL,
        PRIMARY KEY (ProductKey, StoreKey, Quarter, Year),
        FOREIGN KEY (ProductKey) REFERENCES ProductDimension(ProductKey),
        FOREIGN KEY (StoreKey) REFERENCES StoreDimension(StoreKey)
    );
""")

<sqlite3.Cursor at 0x1c9f8a417c0>

In [39]:
all_quarterly_snapshots.to_sql("InventoryFact_QuarterlySnapshot", conn, if_exists='replace', index=False)

48511

In [40]:
print("\nSample data from quarterly inventory snapshots:")
inventory_df = pd.read_sql("SELECT * FROM InventoryFact_QuarterlySnapshot LIMIT 5", conn)
inventory_df


Sample data from quarterly inventory snapshots:


Unnamed: 0,ProductKey,StoreKey,Quarter and Year,Quarter,Year,#CasesPurchasedToDate,#CasesPurchasedThisQuarter,#CasesOnHand,TotalCostToStoreThisQuarter,TotalSoldByStoreThisQuarter,TotalCostToStoreYTD,TotalSoldByStoreYTD
0,1,1,Q1 2024,1,2024,2565,2565.0,4.0,2576065.79,1343100,2576065.79,3680094.0
1,1,2,Q1 2024,1,2024,4393,4393.0,4.08,4833145.76,2645400,4833145.76,6904494.0
2,1,3,Q1 2024,1,2024,6235,6235.0,2.58,7090924.94,3387924,7090924.94,10129892.76
3,1,4,Q1 2024,1,2024,2771,2771.0,3.92,3619772.73,1944024,3619772.73,5171103.84
4,2,1,Q1 2024,1,2024,3216,3216.0,4.25,1819088.57,1319136,1819088.57,2598697.92


In [41]:
curr.close()
conn.close()

In [42]:
for i in quarters:
    quarter_data = all_quarterly_snapshots[all_quarterly_snapshots['Quarter'] == i]
    print(f"Quarter: {i}")
    quarter_data.to_csv(f"{output_file_path}InventoryFact_Quarter_{i}.csv")

Quarter: 1
Quarter: 2
Quarter: 3
Quarter: 4


In [43]:
all_quarterly_snapshots[all_quarterly_snapshots['Quarter'] == 2].shape

(16204, 12)

In [44]:
all_quarterly_snapshots[all_quarterly_snapshots['Quarter'] == 3].shape

(8061, 12)

In [45]:
all_quarterly_snapshots[all_quarterly_snapshots['Quarter'] == 4].shape

(8080, 12)

In [46]:
all_quarterly_snapshots.to_csv(f"{output_file_path}InventoryFact_QuarterlySnapshot.csv", index=False)