#### Deliverables 8-11: Quarterly Inventory Snapshots Implementation

In [1]:
import pandas as pd
import sqlite3 as lite
from datetime import datetime

In [2]:
conn = lite.connect("store1.db")
output_file_path = "./output/"

In [3]:
inventory_daily = pd.read_sql("select * from InventoryFact_DailyLevel", conn)
sales_daily = pd.read_sql("select * from SalesFact_DailyLevel", conn)
date_dim = pd.read_sql("select DateKey, Date, Quarter, Year from DateDimension", conn)

In [4]:
inventory_daily.sample(5)

Unnamed: 0,DateKey,ProductKey,StoreKey,#Available,#CasesPurchasedToDate,CostToStore_itemLevel,CostToStore_caseLevel
606757,119,1844,2,33,6669,2.34,28.08
714078,173,161,2,39,8064,2.27,27.24
82544,41,2009,1,61,1856,2.3,27.6
1283202,90,1950,4,34,13,0.82,9.84
209693,105,163,1,53,3586,5.92,71.04


In [5]:
sales_daily.sample(5)

Unnamed: 0,DateKey,ProductKey,StoreKey,#SoldToday,SalesTotal,CostOfItemsSold,GrossProfit,DailyCustomerCount
1098739,181,1249,3,504,781.2,546.84,234.36,2
417190,25,2019,2,14076,20550.96,14385.67,6165.29,22
423745,29,415,2,16560,64418.4,45092.88,19325.52,23
1306924,102,1453,4,56100,329307.0,230514.9,98792.1,25
1063880,164,674,3,147456,706314.24,494419.97,211894.27,32


In [6]:
date_dim.sample(5)

Unnamed: 0,DateKey,Date,Quarter,Year
221,222,2024-08-09,3,2024
84,85,2024-03-25,1,2024
174,175,2024-06-23,2,2024
285,286,2024-10-12,4,2024
33,34,2024-02-03,1,2024


In [7]:
# Merge date information with inventory date
inventory_with_dates = inventory_daily.merge(
    date_dim,
    on="DateKey",
    how='left'
)

inventory_with_dates.sample(5)

Unnamed: 0,DateKey,ProductKey,StoreKey,#Available,#CasesPurchasedToDate,CostToStore_itemLevel,CostToStore_caseLevel,Date,Quarter,Year
846971,56,1925,3,28,3082,2.21,26.52,2024-02-25,1,2024
377075,6,217,2,76,288,5.87,70.44,2024-01-06,1,2024
155572,78,451,1,19,2553,1.69,20.28,2024-03-18,1,2024
810831,39,41,3,63,2090,1.19,14.28,2024-02-08,1,2024
249207,124,1431,1,58,2805,1.69,20.28,2024-05-03,2,2024


In [8]:
# merge date information with sales date
sales_with_dates = sales_daily.merge(
    date_dim,
    on="DateKey",
    how='left'
)

sales_with_dates.sample(5)

Unnamed: 0,DateKey,ProductKey,StoreKey,#SoldToday,SalesTotal,CostOfItemsSold,GrossProfit,DailyCustomerCount,Date,Quarter,Year
278851,139,775,1,74880,139276.8,97493.76,41783.04,29,2024-05-18,2,2024
1124527,12,741,4,7200,18216.0,12751.2,5464.8,25,2024-01-12,1,2024
1370894,134,771,4,69000,242880.0,170016.0,72864.0,22,2024-05-13,2,2024
707302,169,1509,2,93564,735413.04,514789.13,220623.91,23,2024-06-17,2,2024
35635,18,1443,1,3264,17919.36,12543.55,5375.81,16,2024-01-18,1,2024


#### Defining quarters

In [9]:
current_year = datetime.now().year - 1
quarters = [1, 2, 3, 4]

quarterly_snapshots = []

In [10]:
current_year

2024

In [11]:
for quarter in quarters:
    print(f"Processing Q{quarter} {current_year}...")

    # Filter for the quarter
    quarter_mask = (inventory_with_dates['Quarter'] == quarter) & (
        inventory_with_dates['Year'] == current_year)
    current_quarter_date = inventory_with_dates[quarter_mask]

    last_day_of_quarter = current_quarter_date['Date'].max()
    snapshot_date = current_quarter_date[current_quarter_date['Date']
                                         == last_day_of_quarter]

    # calculate quarterly metrics
    quarterly_metrics = snapshot_date.groupby(['ProductKey', "StoreKey", "Quarter", "Year"]).agg({
        "#Available": "last",
        '#CasesPurchasedToDate': 'last',
        'CostToStore_caseLevel': 'first'
    }).reset_index()

    # calculate cases purchased this quarter
    if quarter == 1:
        quarterly_metrics['#CasesPurchasedThisQuarter'] = quarterly_metrics['#CasesPurchasedToDate']
    else:
        prev_quarter_end = inventory_with_dates[
            (inventory_with_dates['Quarter'] == quarter-1) &
            (inventory_with_dates['Year'] == current_year)]['Date'].max()

        prev_quarter_data = inventory_with_dates[
            (inventory_with_dates['Date'] == prev_quarter_end)]

        prev_cases = prev_quarter_data.groupby(['ProductKey', 'StoreKey'])[
            '#CasesPurchasedToDate'].last().reset_index()

        quarterly_metrics = quarterly_metrics.merge(
            prev_cases,
            on=['ProductKey', 'StoreKey'],
            how='left',
            suffixes=('', '_prev')
        )

        quarterly_metrics['#CasesPurchasedThisQuarter'] = (
            quarterly_metrics['#CasesPurchasedToDate'] -
            quarterly_metrics['#CasesPurchasedToDate_prev']).round(2)

        quarterly_metrics.drop(
            '#CasesPurchasedToDate_prev', axis=1, inplace=True)

    # Calculate cases on hand (convert items to cases)
    quarterly_metrics['#CasesOnHand'] = quarterly_metrics['#Available'] // 12

    # Calculate sales metrics for the quarter
    quarter_sales = sales_with_dates[
        (sales_with_dates['Quarter'] == quarter) &
        (sales_with_dates['Year'] == current_year)]

    sales_metrics = quarter_sales.groupby(['ProductKey', 'StoreKey']).agg({
        'SalesTotal': 'sum',
        'CostOfItemsSold': 'sum',
        '#SoldToday': 'sum'
    }).reset_index()

    # Format monetary values to 2 decimal places
    sales_metrics['SalesTotal'] = sales_metrics['SalesTotal'].round(2)
    sales_metrics['CostOfItemsSold'] = sales_metrics['CostOfItemsSold'].round(
        2)

    # Merge sales metrics with inventory metrics
    quarterly_metrics = quarterly_metrics.merge(
        sales_metrics,
        on=['ProductKey', 'StoreKey'],
        how='left'
    )

    if quarter > 1:
        ytd_sales = sales_with_dates[
            (sales_with_dates['Quarter'] <= quarter) &
            (sales_with_dates['Year'] == current_year)]

        ytd_metrics = ytd_sales.groupby(['ProductKey', 'StoreKey']).agg({
            'SalesTotal': 'sum',
            'CostOfItemsSold': 'sum'
        }).reset_index()

        # Format YTD values to 2 decimal places
        ytd_metrics['SalesTotal'] = ytd_metrics['SalesTotal'].round(2)
        ytd_metrics['CostOfItemsSold'] = ytd_metrics['CostOfItemsSold'].round(
            2)

        quarterly_metrics = quarterly_metrics.merge(
            ytd_metrics,
            on=['ProductKey', 'StoreKey'],
            how='left',
            suffixes=('', '_YTD')
        )
    else:
        quarterly_metrics['SalesTotal_YTD'] = quarterly_metrics['SalesTotal'].round(
            2)
        quarterly_metrics['CostOfItemsSold_YTD'] = quarterly_metrics['CostOfItemsSold'].round(
            2)

    # Rename columns to match required schema
    quarterly_metrics = quarterly_metrics.rename(columns={
        'Quarter': 'Quarter',
        'Year': 'Year',
        '#SoldToday': 'TotalSoldByStoreThisQuarter',
        'CostOfItemsSold': 'TotalCostToStoreThisQuarter',
        'SalesTotal_YTD': 'TotalSoldByStoreYTD',
        'CostOfItemsSold_YTD': 'TotalCostToStoreYTD'
    })

    # Ensure all monetary values have exactly 2 decimal places
    monetary_columns = [
        'TotalCostToStoreThisQuarter',
        'TotalSoldByStoreThisQuarter',
        'TotalCostToStoreYTD',
        'TotalSoldByStoreYTD'
    ]

    for col in monetary_columns:
        quarterly_metrics[col] = quarterly_metrics[col].round(2)

    # Add quarter-year string
    quarterly_metrics['Quarter and Year'] = f'Q{quarter} {current_year}'

    final_columns = [
        'ProductKey', 'StoreKey', 'Quarter and Year', 'Quarter', 'Year',
        '#CasesPurchasedToDate', '#CasesPurchasedThisQuarter', '#CasesOnHand',
        'TotalCostToStoreThisQuarter', 'TotalSoldByStoreThisQuarter',
        'TotalCostToStoreYTD', 'TotalSoldByStoreYTD'
    ]

    quarterly_snapshots.append(quarterly_metrics[final_columns])

Processing Q1 2024...
Processing Q2 2024...
Processing Q3 2024...
Processing Q4 2024...


In [12]:
# Combine all quarterly snapshots
all_quarterly_snapshots = pd.concat(quarterly_snapshots)
all_quarterly_snapshots.head(5)

Unnamed: 0,ProductKey,StoreKey,Quarter and Year,Quarter,Year,#CasesPurchasedToDate,#CasesPurchasedThisQuarter,#CasesOnHand,TotalCostToStoreThisQuarter,TotalSoldByStoreThisQuarter,TotalCostToStoreYTD,TotalSoldByStoreYTD
0,1,1,Q1 2024,1,2024,2312,2312.0,4,2605595.27,1358496,2605595.27,3722279.04
1,1,2,Q1 2024,1,2024,5394,5394.0,3,4630940.77,2534724,4630940.77,6615629.64
2,1,3,Q1 2024,1,2024,8988,8988.0,3,6998020.87,3343536,6998020.87,9997172.64
3,1,4,Q1 2024,1,2024,4872,4872.0,2,3879208.89,2083356,3879208.89,5541726.96
4,2,1,Q1 2024,1,2024,2800,2800.0,4,1966795.99,1426248,1966795.99,2809708.56


In [13]:
all_quarterly_snapshots.shape

(16151, 12)

In [14]:
curr = conn.cursor()

curr.execute("Drop table if exists InventoryFact_QuarterlySnapshot")
curr.execute("""
    CREATE TABLE InventoryFact_QuarterlySnapshot (
        ProductKey INT NOT NULL,
        StoreKey INT NOT NULL,
        "Quarter and Year" TEXT NOT NULL,
        Quarter INT NOT NULL,
        Year INT NOT NULL,
        "#CasesPurchasedToDate" INT NOT NULL,
        "#CasesPurchasedThisQuarter" INT NOT NULL,
        "#CasesOnHand" INT NOT NULL,
        "TotalCostToStoreThisQuarter" REAL NOT NULL,
        "TotalSoldByStoreThisQuarter" REAL NOT NULL,
        "TotalCostToStoreYTD" REAL NOT NULL,
        "TotalSoldByStoreYTD" REAL NOT NULL,
        PRIMARY KEY (ProductKey, StoreKey, Quarter, Year),
        FOREIGN KEY (ProductKey) REFERENCES ProductDimension(ProductKey),
        FOREIGN KEY (StoreKey) REFERENCES StoreDimension(StoreKey)
    );
""")

<sqlite3.Cursor at 0x2015fc7dec0>

In [15]:
all_quarterly_snapshots.to_sql("InventoryFact_QuarterlySnapshot", conn, if_exists='replace', index=False)

16151

In [16]:
print("\nSample data from quarterly inventory snapshots:")
inventory_df = pd.read_sql("SELECT * FROM InventoryFact_QuarterlySnapshot LIMIT 5", conn)
inventory_df


Sample data from quarterly inventory snapshots:


Unnamed: 0,ProductKey,StoreKey,Quarter and Year,Quarter,Year,#CasesPurchasedToDate,#CasesPurchasedThisQuarter,#CasesOnHand,TotalCostToStoreThisQuarter,TotalSoldByStoreThisQuarter,TotalCostToStoreYTD,TotalSoldByStoreYTD
0,1,1,Q1 2024,1,2024,2312,2312.0,4,2605595.27,1358496,2605595.27,3722279.04
1,1,2,Q1 2024,1,2024,5394,5394.0,3,4630940.77,2534724,4630940.77,6615629.64
2,1,3,Q1 2024,1,2024,8988,8988.0,3,6998020.87,3343536,6998020.87,9997172.64
3,1,4,Q1 2024,1,2024,4872,4872.0,2,3879208.89,2083356,3879208.89,5541726.96
4,2,1,Q1 2024,1,2024,2800,2800.0,4,1966795.99,1426248,1966795.99,2809708.56


In [17]:
curr.close()
conn.close()

In [18]:
for i in quarters:
    quarter_data = all_quarterly_snapshots[all_quarterly_snapshots['Quarter'] == i]
    print(f"Quarter: {i}")
    quarter_data.to_csv(f"{output_file_path}InventoryFact_Quarter_{i}.csv")

Quarter: 1
Quarter: 2
Quarter: 3
Quarter: 4


In [19]:
all_quarterly_snapshots[all_quarterly_snapshots['Quarter'] == 2].shape

(8068, 12)

In [20]:
all_quarterly_snapshots[all_quarterly_snapshots['Quarter'] == 3].shape

(0, 12)

In [21]:
all_quarterly_snapshots[all_quarterly_snapshots['Quarter'] == 4].shape

(0, 12)

In [22]:
all_quarterly_snapshots.to_csv(f"{output_file_path}InventoryFact_QuarterlySnapshot.csv", index=False)