# 01 – Data Loading & EDA
**Author:** Mousumi Paul | Feb 2025

## 1. Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append('../src')
from demand_forecasting import load_sales, pivot_monthly
print('✅ Ready')

## 2. Load Raw Sales Data

In [None]:
df = load_sales('../data/raw/sales_data_2024.csv')
print(f'Shape: {df.shape}')
df.head(10)

## 3. Data Quality

In [None]:
print('Missing Values:')
print(df.isnull().sum())
print('\nCategories:', df['Category'].unique())
print('Months covered:', sorted(df['Month'].unique()))

## 4. Monthly Totals by Category

In [None]:
pivot = pivot_monthly(df)
print('Pivot shape:', pivot.shape)
pivot

## 5. Descriptive Stats

In [None]:
df.groupby('Category')['Units_Sold'].agg(['mean','std','min','max']).round(1)

## 6. Sales Trend Chart

In [None]:
from demand_forecasting import plot_all_actuals
plot_all_actuals(df, save_path='../outputs/charts/all_categories_trend.png')

## 7. Seasonality Check

In [None]:
seasonal = df.groupby('Month_Num')['Units_Sold'].mean()
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
fig, ax = plt.subplots(figsize=(10,4))
ax.bar(months, seasonal.values, color='#2E75B6', alpha=0.85)
ax.set_title('Average Monthly Demand (All Categories) – Seasonality')
ax.set_ylabel('Avg Units Sold')
ax.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig('../outputs/charts/seasonality_pattern.png', dpi=150)
plt.show()