In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsforecast import StatsForecast
from utilsforecast.losses import mse
import warnings
warnings.filterwarnings('ignore')


In [0]:
# --- Load Data ---
data_path = "/Users/pju307/retail-forecasting/data/sales_data.csv"
df = pd.read_csv(data_path, parse_dates=['Date'])

In [0]:
df.head()

In [0]:
df = df.sort_values('Date')
df = df.dropna(subset=['Demand'])
df['Category'] = df['Category'].astype(str)
df['Region'] = df['Region'].astype(str)

In [0]:
sns.set(style="whitegrid", palette="muted")
plt.rcParams["figure.figsize"] = (8,4)


In [0]:
plt.figure(figsize=(8,4))
sns.barplot(
    data=df.groupby('Category', as_index=False)['Demand'].sum().sort_values('Demand', ascending=False),
    x='Demand', y='Category', palette='viridis'
)
plt.title("Total Demand by Category")
plt.xlabel("Total Demand")
plt.ylabel("Category")
plt.show()


In [0]:
plt.figure(figsize=(8,4))
sns.barplot(
    data=df.groupby('Region', as_index=False)['Demand'].sum().sort_values('Demand', ascending=False),
    x='Demand', y='Region', palette='magma'
)
plt.title("Total Demand by Region")
plt.xlabel("Total Demand")
plt.ylabel("Region")
plt.show()


In [0]:
plt.figure(figsize=(12,5))
plt.plot(df['Date'], df['Demand'], color='orange')
plt.title("Daily Demand Over Time")
plt.xlabel("Date")
plt.ylabel("Demand")
plt.show()


In [0]:
plt.figure(figsize=(6,5))
sns.scatterplot(data=df, x='Inventory Level', y='Demand', alpha=0.3, color='gold')
plt.title("Inventory Level vs Demand")
plt.show()


In [0]:
plt.figure(figsize=(6,5))
sns.boxplot(data=df, x='Promotion', y='Demand', palette='Set2')
plt.title("Demand Distribution: Promotion vs No Promotion")
plt.xlabel("Promotion (0 = No, 1 = Yes)")
plt.show()


In [0]:
plt.figure(figsize=(6,5))
sns.boxplot(data=df, x='Weather Condition', y='Demand', palette='coolwarm')
plt.title("Demand by Weather Condition")
plt.xlabel("Weather Condition")
plt.xticks(rotation=15)
plt.show()


In [0]:
fig, axes = plt.subplots(2, 3, figsize=(18,10))

# Total Demand by Category
sns.barplot(ax=axes[0,0],
    data=df.groupby('Category', as_index=False)['Demand'].sum().sort_values('Demand', ascending=False),
    x='Demand', y='Category', palette='viridis')
axes[0,0].set_title("Total Demand by Category")

# Total Demand by Region
sns.barplot(ax=axes[0,1],
    data=df.groupby('Region', as_index=False)['Demand'].sum().sort_values('Demand', ascending=False),
    x='Demand', y='Region', palette='magma')
axes[0,1].set_title("Total Demand by Region")

# Daily Demand
axes[0,2].plot(df['Date'], df['Demand'], color='orange')
axes[0,2].set_title("Daily Demand Over Time")

# Inventory vs Demand
sns.scatterplot(ax=axes[1,0], data=df, x='Inventory Level', y='Demand', alpha=0.3, color='gold')
axes[1,0].set_title("Inventory Level vs Demand")

# Promotion vs Demand
sns.boxplot(ax=axes[1,1], data=df, x='Promotion', y='Demand', palette='Set2')
axes[1,1].set_title("Demand Distribution: Promotion vs No Promotion")

# Weather vs Demand
sns.boxplot(ax=axes[1,2], data=df, x='Weather Condition', y='Demand', palette='coolwarm')
axes[1,2].set_title("Demand by Weather Condition")
axes[1,2].tick_params(axis='x', rotation=15)

plt.suptitle("🛒 Retail Store Inventory & Demand Dashboard", fontsize=16, fontweight='bold')
plt.tight_layout(rect=[0,0,1,0.97])
plt.show()


In [0]:
top_products = df.groupby('Product ID')['Demand'].mean().nlargest(10)
top_products.plot(kind='bar', figsize=(8,4))
plt.title("Top 10 Products by Average Demand")
plt.ylabel("Average Demand")
plt.show()


In [0]:
plt.figure(figsize=(10,5))
sns.countplot(data=df, x='Region', hue='Category')
plt.title("Category Mix by Region")
plt.show()


In [0]:
plt.figure(figsize=(6,5))
sns.scatterplot(data=df, x='Discount', y='Demand', alpha=0.3, color='purple')
plt.title("Discount vs Demand")
plt.show()


In [0]:
sns.scatterplot(data=df, x='Price', y='Demand', alpha=0.4, color='teal')
plt.title("Price vs Demand")
plt.show()


In [0]:
sns.scatterplot(data=df, x='Competitor Pricing', y='Demand', alpha=0.4, color='crimson')
plt.title("Competitor Pricing vs Demand")
plt.show()


In [0]:
df.groupby('Weather Condition')['Demand'].mean().sort_values().plot(kind='bar', figsize=(8,4))
plt.title("Average Demand by Weather Condition")
plt.ylabel("Mean Demand")
plt.show()


In [0]:
sns.boxplot(data=df, x='Epidemic', y='Demand')
plt.title("Demand Distribution During Epidemic vs Normal Times")
plt.show()


In [0]:
df['Inventory_Utilization'] = df['Units Sold'] / (df['Inventory Level'] + 1)
sns.histplot(df['Inventory_Utilization'], bins=30, color='orange')
plt.title("Inventory Utilization Ratio Distribution")
plt.show()


In [0]:
sns.scatterplot(data=df, x='Inventory Level', y='Units Ordered', alpha=0.3)
plt.title("Inventory Level vs Units Ordered")
plt.show()

In [0]:
num_cols = ['Demand','Inventory Level','Units Sold','Units Ordered','Price','Discount','Competitor Pricing']
corr = df[num_cols].corr()
plt.figure(figsize=(8,6))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title("Feature Correlation Heatmap")
plt.show()

In [0]:
sns.pairplot(df[['Demand','Price','Discount','Inventory Level','Competitor Pricing']], diag_kind='kde')
plt.suptitle("Pairwise Relationships", y=1.02)
plt.show()


In [0]:
from pandas.plotting import autocorrelation_plot
autocorrelation_plot(df['Demand'])
plt.title("Autocorrelation of Demand")
plt.show()

In [0]:
df['rolling_7'] = df['Demand'].rolling(7).mean()
plt.figure(figsize=(12,4))
plt.plot(df['Date'], df['Demand'], alpha=0.4, label='Daily Demand')
plt.plot(df['Date'], df['rolling_7'], color='red', label='7-Day Rolling Average')
plt.title("Demand Trend with 7-Day Rolling Mean")
plt.legend()
plt.show()