# üè™ Retail Demand Forecasting & Dynamic Replenishment
## AI-Powered Inventory Optimization for Retail Operations

This notebook demonstrates a production-ready forecasting system specifically designed for retail businesses.

**Key Features:**
- Multi-store, multi-SKU forecasting across 5 locations
- Retail-specific patterns (promotions, holidays, seasonality)
- 8 major retail categories with realistic behavior
- Real holiday calendar (Black Friday, Christmas, Back to School, etc.)
- Store location and type variations (Urban, Suburban, Mall, Outlet)
- Dynamic reorder point calculation with safety stock

**Learning Objectives:**
- Generate realistic retail sales data with multiple factors
- Train ML models for demand forecasting
- Calculate dynamic reorder recommendations
- Visualize forecasts and inventory decisions
- Launch interactive dashboard for real-time analysis

In [None]:
# %% Setup and Imports
import sys
sys.path.insert(0, '..')

# Import our modules
from src.retail_data_generator import RetailDataGenerator
from src.forecasting_agent import DemandForecastingAgent
from src.model_loader import ModelLoader
from src.dashboard import ForecastingDashboard

# Standard libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Configure plotting
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (16, 10)
%matplotlib inline

print(" All modules imported successfully!")
print(f" Pandas version: {pd.__version__}")
print(f" NumPy version: {np.__version__}")

In [None]:
# %% Generate Realistic Retail Sales Data
# Generate 2 years of sales data for a retail chain:
# - 5 stores across different locations
# - 20 SKUs across 8 retail categories
# - Realistic patterns: seasonality, promotions, holidays

# Initialize retail data generator
generator = RetailDataGenerator(seed=42)

print(" Generating retail sales data...")
print("  Configuration: 2 years, 20 SKUs, 5 stores")
print("  This may take 2-3 minutes...\n")

sales_df = generator.generate_retail_sales_data(
    start_date='2022-01-01',
    periods=730,  # 2 years
    n_skus=20,    # 20 products
    n_stores=5    # 5 store locations
)

print(f"\n Generated {len(sales_df):,} sales records")
print(f" Stores: {sales_df['store_id'].nunique()}")
print(f" SKUs: {sales_df['sku_id'].nunique()}")
print(f" Categories: {sales_df['category'].nunique()}")
print(f" Date range: {sales_df['date'].min()} to {sales_df['date'].max()}")

# Display sample
print("\n Sample Sales Records:\n")
display(sales_df.head(10))

In [None]:
# %% Generate Supporting Data
# Generate inventory snapshots and calculate summary statistics

# Generate current inventory levels
print(" Generating inventory snapshot...")
inventory_df = generator.generate_retail_inventory_snapshot(sales_df)

print(f"\n Generated inventory for {len(inventory_df)} SKU-Store combinations")
print(f"\nInventory Status Distribution:")
print(inventory_df['stock_status'].value_counts())

# Generate comprehensive summary
summary = generator.generate_summary_statistics(sales_df)

print("\n" + "="*70)
print(" RETAIL SALES SUMMARY")
print("="*70)

print(f"\n REVENUE METRICS:")
print(f"   Total Revenue: ${summary['total_revenue']:,.2f}")
print(f"   Avg Daily Revenue: ${summary['avg_daily_revenue']:,.2f}")
print(f"   Avg Transaction Value: ${summary['avg_transaction_value']:.2f}")

print(f"\n OPERATIONS:")
print(f"   Total Units Sold: {summary['total_units_sold']:,}")
print(f"   Stockout Rate: {summary['stockout_rate']:.2f}%")
print(f"   Promotion Rate: {summary['promotion_rate']:.2f}%")

print(f"\n TOP PERFORMERS:")
print(f"   Best Category (Revenue): {summary['top_category_by_revenue']}")
print(f"   Best Category (Units): {summary['top_category_by_units']}")

In [None]:
# %% Exploratory Data Analysis - Revenue Breakdown
# Visualize revenue and sales patterns across categories and stores

fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# 1. Revenue by Category
category_revenue = sales_df.groupby('category')['revenue'].sum().sort_values(ascending=False)
axes[0, 0].barh(category_revenue.index, category_revenue.values, color='steelblue')
axes[0, 0].set_xlabel('Total Revenue ($)', fontsize=12, fontweight='bold')
axes[0, 0].set_title('Revenue by Category', fontsize=14, fontweight='bold')
axes[0, 0].grid(axis='x', alpha=0.3)

# 2. Units Sold by Category
category_units = sales_df.groupby('category')['units_sold'].sum().sort_values(ascending=False)
axes[0, 1].barh(category_units.index, category_units.values, color='coral')
axes[0, 1].set_xlabel('Total Units Sold', fontsize=12, fontweight='bold')
axes[0, 1].set_title('Units Sold by Category', fontsize=14, fontweight='bold')
axes[0, 1].grid(axis='x', alpha=0.3)

# 3. Revenue by Store Type
store_revenue = sales_df.groupby('store_type')['revenue'].sum().sort_values(ascending=False)
axes[1, 0].bar(range(len(store_revenue)), store_revenue.values, color='green', alpha=0.7)
axes[1, 0].set_xticks(range(len(store_revenue)))
axes[1, 0].set_xticklabels(store_revenue.index, rotation=45, ha='right')
axes[1, 0].set_ylabel('Total Revenue ($)', fontsize=12, fontweight='bold')
axes[1, 0].set_title('Revenue by Store Type', fontsize=14, fontweight='bold')
axes[1, 0].grid(axis='y', alpha=0.3)

# 4. Promotion vs Regular Sales
promo_comparison = sales_df.groupby('promotion_active')['revenue'].sum()
labels = ['Regular Price', 'On Promotion']
colors = ['lightblue', 'orange']
axes[1, 1].pie(promo_comparison.values, labels=labels, colors=colors, 
               autopct='%1.1f%%', startangle=90, textprops={'fontsize': 12, 'fontweight': 'bold'})
axes[1, 1].set_title('Revenue: Promotion vs Regular', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

print(" Revenue analysis complete!")

In [None]:
# %% Time Series Analysis
# Analyze daily revenue and units sold trends over 2 years

daily_sales = sales_df.groupby('date').agg({
    'revenue': 'sum',
    'units_sold': 'sum'
}).reset_index()

fig, axes = plt.subplots(2, 1, figsize=(16, 10))

# Daily revenue trend
axes[0].plot(daily_sales['date'], daily_sales['revenue'], 
             linewidth=1, alpha=0.6, label='Daily Revenue')
axes[0].plot(daily_sales['date'], daily_sales['revenue'].rolling(30).mean(), 
             color='red', linewidth=2, label='30-Day Moving Average')
axes[0].set_xlabel('Date', fontsize=12, fontweight='bold')
axes[0].set_ylabel('Daily Revenue ($)', fontsize=12, fontweight='bold')
axes[0].set_title('Daily Revenue Trend (2 Years)', fontsize=14, fontweight='bold')
axes[0].legend(fontsize=11)
axes[0].grid(True, alpha=0.3)

# Units sold trend
axes[1].plot(daily_sales['date'], daily_sales['units_sold'], 
             linewidth=1, alpha=0.6, label='Daily Units')
axes[1].plot(daily_sales['date'], daily_sales['units_sold'].rolling(30).mean(), 
             color='green', linewidth=2, label='30-Day Moving Average')
axes[1].set_xlabel('Date', fontsize=12, fontweight='bold')
axes[1].set_ylabel('Daily Units Sold', fontsize=12, fontweight='bold')
axes[1].set_title('Daily Units Sold Trend (2 Years)', fontsize=14, fontweight='bold')
axes[1].legend(fontsize=11)
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(" Time series analysis complete!")

In [None]:
# %% Prepare Data for Forecasting Agent
# Aggregate sales data by SKU and date for model training

print(" Preparing data for forecasting agent...")

# Aggregate sales by date and SKU (across all stores)
sales_agg = sales_df.groupby(['date', 'sku_id', 'category']).agg({
    'units_sold': 'sum',
    'unit_price': 'mean',
    'cost': 'mean',
    'stockout': 'max'
}).reset_index()

# Rename for agent compatibility
sales_agg.rename(columns={'units_sold': 'sales', 'unit_price': 'price'}, inplace=True)

# Create external factors DataFrame
external_df = sales_df[['date', 'is_holiday', 'promotion_active']].drop_duplicates()
external_df = external_df.groupby('date').agg({
    'is_holiday': 'max',
    'promotion_active': 'mean'
}).reset_index()

print(f" Aggregated sales data: {len(sales_agg):,} records")
print(f" External factors: {len(external_df)} days")

display(sales_agg.head())

In [None]:
# %% Initialize Forecasting Agent
# Create demand forecasting agent with ML capabilities

agent = DemandForecastingAgent()
print(" Demand Forecasting Agent initialized")
print("   - Gradient Boosting Regressor")
print("   - Feature engineering with lag and rolling statistics")
print("   - Continuous learning capabilities")

In [None]:
# %% Train Forecasting Models
# Train ML models for first 5 SKUs (demo)

skus = sales_agg['sku_id'].unique()[:5]

print("üéì Training forecasting models...\n")

for i, sku in enumerate(skus, 1):
    print(f"[{i}/{len(skus)}] Training model for {sku}...", end=" ")
    
    try:
        model, metrics = agent.train_model(sku, sales_agg, external_df)
        print(f" MAPE: {metrics['mape']:.2f}%, R¬≤: {metrics['test_score']:.2%}")
    except Exception as e:
        print(f" Error: {e}")

print("\n Model training complete")

# Display model performance
performance_df = agent.get_model_performance()
print("\n Model Performance Summary:\n")
display(performance_df)

In [None]:
# %% Generate 30-Day Forecast
# Generate demand forecast for next 30 days with confidence intervals

# Select a SKU for detailed analysis
sku = skus[0]
print(f" Generating 30-day forecast for {sku}...")

# Get product info
sku_info = sales_df[sales_df['sku_id'] == sku].iloc[0]
print(f"   Product: {sku_info['product_name']}")
print(f"   Category: {sku_info['category']}")
print(f"   Brand: {sku_info['brand']}")

# Generate forecast
future_dates = pd.date_range(start=pd.Timestamp.now(), periods=30, freq='D')
forecast_df = agent.predict_demand(sku, future_dates, external_df)

print(f"\n Forecast generated")
print(f"   Avg predicted demand: {forecast_df['predicted_demand'].mean():.1f} units/day")
print(f"   Peak demand: {forecast_df['predicted_demand'].max()} units")
print(f"   Total 30-day demand: {forecast_df['predicted_demand'].sum():,} units")

display(forecast_df.head(10))

In [None]:
# %% Visualize Forecast
# Plot historical sales + 30-day forecast with confidence intervals

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10))

# Historical + Forecast
sku_history = sales_agg[sales_agg['sku_id'] == sku].tail(90)
ax1.plot(sku_history['date'], sku_history['sales'], 'o-', linewidth=2, 
         label='Historical Sales', color='steelblue', markersize=4)
ax1.plot(forecast_df['date'], forecast_df['predicted_demand'], 's-', linewidth=2,
         label='Predicted Demand', color='green', markersize=6)
ax1.fill_between(forecast_df['date'], 
                  forecast_df['lower_bound'], 
                  forecast_df['upper_bound'],
                  alpha=0.2, color='green', label='95% Confidence Interval')
ax1.set_xlabel('Date', fontsize=12, fontweight='bold')
ax1.set_ylabel('Units', fontsize=12, fontweight='bold')
ax1.set_title(f'{sku} - Historical Sales (Last 90 Days) + 30-Day Forecast', 
              fontsize=14, fontweight='bold')
ax1.legend(loc='best', fontsize=11)
ax1.grid(True, alpha=0.3)

# Forecast detail with inventory levels
ax2.plot(forecast_df['date'], forecast_df['predicted_demand'], 'o-', linewidth=2,
         label='Predicted Demand', color='darkgreen', markersize=6)
ax2.fill_between(forecast_df['date'], 
                  forecast_df['lower_bound'], 
                  forecast_df['upper_bound'],
                  alpha=0.3, color='green')

# Add inventory levels
inv_info = inventory_df[inventory_df['sku_id'] == sku]
if len(inv_info) > 0:
    current_stock = inv_info['current_stock'].sum()
    reorder_point = inv_info['reorder_point'].mean()
    
    ax2.axhline(y=current_stock, color='blue', linestyle='--', linewidth=2,
                label=f'Current Stock ({int(current_stock)} units)')
    ax2.axhline(y=reorder_point, color='red', linestyle=':', linewidth=2,
                label=f'Reorder Point ({int(reorder_point)} units)')

ax2.set_xlabel('Date', fontsize=12, fontweight='bold')
ax2.set_ylabel('Units', fontsize=12, fontweight='bold')
ax2.set_title(f'{sku} - 30-Day Demand Forecast with Inventory Levels', 
              fontsize=14, fontweight='bold')
ax2.legend(loc='best', fontsize=11)
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(" Forecast visualization complete!")

In [None]:
# %% Calculate Dynamic Reorder Recommendation
# Calculate optimal reorder point and quantity based on forecast

# Get inventory info
inv_info = inventory_df[inventory_df['sku_id'] == sku]

if len(inv_info) > 0:
    total_stock = inv_info['current_stock'].sum()
    avg_lead_time = int(inv_info['lead_time_days'].mean())
    
    reorder_info = agent.calculate_dynamic_reorder(
        sku,
        forecast_df,
        total_stock,
        avg_lead_time
    )
    
    # Display recommendation
    print("\n" + "="*70)
    print(f" REORDER RECOMMENDATION - {sku}")
    print(f"    {sku_info['product_name']} ({sku_info['category']})")
    print("="*70)
    
    print(f"\n CURRENT STATUS:")
    print(f"   Total Stock Across Stores: {reorder_info['current_stock']:,} units")
    print(f"   Reorder Point: {reorder_info['reorder_point']:,} units")
    print(f"   Safety Stock: {reorder_info['safety_stock']:,} units")
    print(f"   Lead Time: {avg_lead_time} days")
    
    print(f"\n FORECAST INSIGHTS:")
    print(f"   Expected Demand (Lead Time): {reorder_info['lead_time_demand']:,} units")
    print(f"   Days Until Stockout: {reorder_info['days_until_stockout']} days")
    print(f"   Avg Daily Demand (Forecast): {forecast_df['predicted_demand'].mean():.1f} units")
    
    print(f"\n RECOMMENDATION:")
    print(f"   Urgency Level: {reorder_info['urgency']}")
    print(f"   Reorder Needed: {'YES ‚ö†Ô∏è' if reorder_info['needs_reorder'] else 'NO ‚úì'}")
    print(f"   Recommended Order Quantity: {reorder_info['reorder_quantity']:,} units")
    
    if reorder_info['needs_reorder']:
        print(f"   ‚ö° ACTION REQUIRED: Place order immediately!")
    
    print("\n" + "="*70)
else:
    print(f" No inventory data available for {sku}")

In [None]:
# %% Launch Interactive Dashboard
# Launch web-based dashboard for interactive analysis

# Prepare inventory summary for dashboard
inv_summary = inventory_df.groupby('sku_id').agg({
    'current_stock': 'sum',
    'reorder_point': 'mean',
    'reorder_quantity': 'mean',
    'lead_time_days': 'mean',
    'unit_cost': 'first',
    'unit_price': 'first',
    'category': 'first'
}).reset_index()

print(" Launching Interactive Dashboard...")
print("\n" + "="*70)
print("DASHBOARD INFORMATION")
print("="*70)
print("\n URL: http://127.0.0.1:8050")
print("\n Features:")
print("   ‚Ä¢ Select any SKU from dropdown")
print("   ‚Ä¢ Adjust forecast horizon (7-90 days)")
print("   ‚Ä¢ View historical sales patterns")
print("   ‚Ä¢ See reorder recommendations")
print("   ‚Ä¢ Check model performance metrics")
print("\n  Note: Dashboard runs in blocking mode")
print("   Press Ctrl+C in terminal to stop")
print("="*70 + "\n")

# Create and launch dashboard
dashboard = ForecastingDashboard(agent, sales_agg, external_df, inv_summary)
dashboard.run(host='127.0.0.1', port=8050, debug=True)

In [None]:
# %% Save Results
# Save all generated data and results to CSV files

print(" Saving data to files...")

sales_df.to_csv('../data/retail_sales_full.csv', index=False)
inventory_df.to_csv('../data/retail_inventory.csv', index=False)

print("\n Files saved:")
print("   ‚Ä¢ data/retail_sales_full.csv")
print("   ‚Ä¢ data/retail_inventory.csv")

print("\n Analysis complete!")