In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

In [None]:
INPUT_BASE_PATH = "/kaggle/input/m5-forecasting-accuracy"
OUTPUT_BASE_BATH = "/kaggle/working"

CALENDAR_DATA = pd.read_csv(f"{INPUT_BASE_PATH}/calendar.csv")
SELL_PRICES = pd.read_csv(f"{INPUT_BASE_PATH}/sell_prices.csv")
SALES_TRAIN_EVALUATION = pd.read_csv(f"{INPUT_BASE_PATH}/sales_train_evaluation.csv")
SALES_TRAIN_VALIDATION = pd.read_csv(f"{INPUT_BASE_PATH}/sales_train_validation.csv")
SAMPLE_SUBMISSION = pd.read_csv(f"{INPUT_BASE_PATH}/sample_submission.csv")

In [None]:
def get_product_id_mappings(df: pd.DataFrame):
    id_to_int = {
        product_id: i
        for (i, product_id) in enumerate(df["id"].unique(), start=1)
    }
    int_to_id = {v: k for (k,v) in id_to_int.items()}
    return id_to_int, int_to_id


def flatten_sales_df(df: pd.DataFrame) -> pd.DataFrame:
    sales_dfs = []
    timestamp_cols = [f"d_{i}" for i in range(1, 1914)]
    product_ids = df["product_id"].unique()
    for p_id in tqdm(product_ids):
        sales_df = df[df["product_id"] == p_id]
        sales_df = sales_df[timestamp_cols].T.reset_index()
        sales_df.columns = ["timestamp_id", "count"]
        sales_df["product_id"] = p_id
        sales_dfs.append(sales_df)

    return pd.concat(sales_dfs)
    

In [None]:
timestamp_cols = [f"d_{i}" for i in range(1, 1914)]

# Simplify product id mappings
product_id_to_int, int_to_product_id = get_product_id_mappings(SALES_TRAIN_VALIDATION)
train_df = SALES_TRAIN_VALIDATION.copy(deep=True)
train_df["product_id"] = train_df["id"].map(product_id_to_int)

# SALES BY STORE

In [None]:
store_ids = list(train_df["store_id"].unique())

n_samples = 500

fig, ax = plt.subplots(5, 2, sharex=True, sharey=True, figsize=(15, 10))
ax = ax.flatten()
for i, store_id in enumerate(store_ids):
    store_df = train_df[train_df["store_id"] == store_id].copy(deep=True)
    total_sales = store_df[timestamp_cols].sum(axis=0)
    total_sales = total_sales.iloc[-n_samples:]
    ax[i].plot(np.arange(n_samples), total_sales.values, label=store_id)
    ax[i].legend()

fig.tight_layout();

# SALES BY CATEGORY

# SALES BY DEPARTMENT