# 09 - Model Evaluation & Comparison

## Purpose
Comprehensive evaluation and comparison of all ML models developed for the Smart Inventory Manager.

## Models Evaluated
1. Demand Forecasting Models
2. ABC Classification
3. Dead Stock Detection
4. Customer Segmentation (RFM)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import precision_score, recall_score, f1_score
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

# Load data
DATA_DIR = Path('../..') / 'ml' / 'data' / 'processed'

products = pd.read_csv(DATA_DIR / 'products.csv')
inventory = pd.read_csv(DATA_DIR / 'inventory.csv')
orders = pd.read_csv(DATA_DIR / 'orders.csv')
order_items = pd.read_csv(DATA_DIR / 'order_items.csv')
customers = pd.read_csv(DATA_DIR / 'customers.csv')

orders['OrderDate'] = pd.to_datetime(orders['OrderDate'])
full_orders = orders.merge(order_items, on='OrderID')
full_orders = full_orders.merge(products, on='ProductID')

print("Data loaded successfully")

## 1. Demand Forecasting Model Evaluation

In [None]:
# Prepare daily demand data
daily_demand = full_orders.groupby([full_orders['OrderDate'].dt.date, 'ProductID'])['Quantity'].sum().reset_index()
daily_demand.columns = ['Date', 'ProductID', 'Quantity']
daily_demand['Date'] = pd.to_datetime(daily_demand['Date'])

# Get top products
top_products = full_orders.groupby('ProductID')['Quantity'].sum().nlargest(20).index.tolist()

def evaluate_forecasting_models(product_id, test_days=30):
    """Evaluate all forecasting models for a product."""
    prod_data = daily_demand[daily_demand['ProductID'] == product_id].sort_values('Date')
    
    if len(prod_data) < 60:
        return None
    
    # Fill missing dates
    date_range = pd.date_range(prod_data['Date'].min(), prod_data['Date'].max())
    prod_data = prod_data.set_index('Date').reindex(date_range, fill_value=0).reset_index()
    prod_data.columns = ['Date', 'ProductID', 'Quantity']
    
    # Split
    train = prod_data.iloc[:-test_days]
    test = prod_data.iloc[-test_days:]
    actual = test['Quantity'].values
    
    results = []
    
    # 1. Moving Average (7-day)
    ma_pred = prod_data['Quantity'].rolling(7).mean().iloc[-test_days:].values
    mask = ~np.isnan(ma_pred)
    if mask.sum() > 0:
        mae = mean_absolute_error(actual[mask], ma_pred[mask])
        results.append({'Model': 'MA(7)', 'MAE': mae, 'ProductID': product_id})
    
    # 2. Exponential Smoothing
    alpha = 0.3
    values = prod_data['Quantity'].values
    smoothed = [values[0]]
    for i in range(1, len(values)):
        smoothed.append(alpha * values[i] + (1 - alpha) * smoothed[-1])
    es_pred = smoothed[-test_days:]
    mae = mean_absolute_error(actual, es_pred)
    results.append({'Model': 'ES(0.3)', 'MAE': mae, 'ProductID': product_id})
    
    # 3. Linear Regression
    train_copy = train.copy()
    train_copy['DayIndex'] = (train_copy['Date'] - train_copy['Date'].min()).dt.days
    X_train = train_copy[['DayIndex']].values
    y_train = train_copy['Quantity'].values
    
    test_copy = test.copy()
    test_copy['DayIndex'] = (test_copy['Date'] - train_copy['Date'].min()).dt.days
    X_test = test_copy[['DayIndex']].values
    
    model = LinearRegression()
    model.fit(X_train, y_train)
    lr_pred = np.maximum(model.predict(X_test), 0)
    mae = mean_absolute_error(actual, lr_pred)
    results.append({'Model': 'LinearReg', 'MAE': mae, 'ProductID': product_id})
    
    return results

# Evaluate on multiple products
all_results = []
for product_id in top_products[:10]:
    results = evaluate_forecasting_models(product_id)
    if results:
        all_results.extend(results)

forecast_eval = pd.DataFrame(all_results)

# Aggregate by model
model_summary = forecast_eval.groupby('Model')['MAE'].agg(['mean', 'std', 'min', 'max']).round(4)
print("\n=== Forecasting Model Performance ===")
display(model_summary)

In [None]:
# Visualize forecasting performance
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Box plot of MAE by model
forecast_eval.boxplot(column='MAE', by='Model', ax=axes[0])
axes[0].set_title('MAE Distribution by Model')
axes[0].set_xlabel('Model')
axes[0].set_ylabel('Mean Absolute Error')
plt.suptitle('')  # Remove automatic title

# Bar chart of average MAE
avg_mae = forecast_eval.groupby('Model')['MAE'].mean().sort_values()
axes[1].barh(avg_mae.index, avg_mae.values)
axes[1].set_title('Average MAE by Model')
axes[1].set_xlabel('Mean Absolute Error')

plt.tight_layout()
plt.show()

best_forecast_model = avg_mae.idxmin()
print(f"\nBest Forecasting Model: {best_forecast_model} (Avg MAE: {avg_mae.min():.4f})")

## 2. ABC Classification Evaluation

In [None]:
# ABC Classification
product_metrics = full_orders.groupby('ProductID').agg({
    'TotalAmount': 'sum',
    'Quantity': 'sum'
}).reset_index()
product_metrics.columns = ['ProductID', 'Revenue', 'Units']
product_metrics = product_metrics.sort_values('Revenue', ascending=False).reset_index(drop=True)

total_revenue = product_metrics['Revenue'].sum()
product_metrics['CumulativeRevenue'] = product_metrics['Revenue'].cumsum()
product_metrics['CumulativePct'] = product_metrics['CumulativeRevenue'] / total_revenue * 100

def assign_abc(pct):
    if pct <= 80:
        return 'A'
    elif pct <= 95:
        return 'B'
    else:
        return 'C'

product_metrics['ABC_Class'] = product_metrics['CumulativePct'].apply(assign_abc)

# Validation metrics
abc_validation = product_metrics.groupby('ABC_Class').agg({
    'ProductID': 'count',
    'Revenue': 'sum',
    'Units': 'sum'
}).reset_index()
abc_validation.columns = ['Class', 'Products', 'Revenue', 'Units']
abc_validation['Revenue_Pct'] = abc_validation['Revenue'] / abc_validation['Revenue'].sum() * 100
abc_validation['Product_Pct'] = abc_validation['Products'] / abc_validation['Products'].sum() * 100

print("\n=== ABC Classification Validation ===")
display(abc_validation)

# Pareto validation (80/20 rule)
a_class = abc_validation[abc_validation['Class'] == 'A']
pareto_ratio = a_class['Revenue_Pct'].values[0] / a_class['Product_Pct'].values[0]
print(f"\nPareto Ratio (A-class): {pareto_ratio:.2f}")
print(f"(Higher ratio = better concentration of value in fewer products)")

## 3. Dead Stock Detection Evaluation

In [None]:
# Dead stock detection
last_sale = full_orders.groupby('ProductID')['OrderDate'].max().reset_index()
last_sale.columns = ['ProductID', 'LastSale']

reference_date = orders['OrderDate'].max()
last_sale['DaysSinceSale'] = (reference_date - last_sale['LastSale']).dt.days

# Merge with inventory
inventory_status = products.merge(inventory, on='ProductID', how='left')
inventory_status = inventory_status.merge(last_sale, on='ProductID', how='left')

# Classify dead stock (90+ days without sale)
inventory_status['IsDeadStock'] = (
    (inventory_status['DaysSinceSale'] >= 90) | 
    (inventory_status['DaysSinceSale'].isna())
) & (inventory_status['Current_Stock'] > 0)

dead_stock_summary = pd.DataFrame({
    'Metric': ['Total Products with Stock', 'Dead Stock Products', 'Dead Stock %', 
               'Dead Stock Value ($)', 'At-Risk Products (60-90 days)'],
    'Value': [
        (inventory_status['Current_Stock'] > 0).sum(),
        inventory_status['IsDeadStock'].sum(),
        f"{inventory_status['IsDeadStock'].mean() * 100:.1f}%",
        f"${(inventory_status[inventory_status['IsDeadStock']]['Current_Stock'] * inventory_status[inventory_status['IsDeadStock']]['Cost_Price'].fillna(0)).sum():,.2f}",
        ((inventory_status['DaysSinceSale'] >= 60) & (inventory_status['DaysSinceSale'] < 90)).sum()
    ]
})

print("\n=== Dead Stock Detection Results ===")
display(dead_stock_summary)

In [None]:
# Dead stock by category
dead_by_category = inventory_status[inventory_status['IsDeadStock']].groupby('Category').size()
total_by_category = inventory_status[inventory_status['Current_Stock'] > 0].groupby('Category').size()
dead_pct_by_category = (dead_by_category / total_by_category * 100).sort_values(ascending=False)

plt.figure(figsize=(12, 6))
dead_pct_by_category.plot(kind='barh')
plt.title('Dead Stock Percentage by Category')
plt.xlabel('Dead Stock %')
plt.tight_layout()
plt.show()

## 4. Customer Segmentation (RFM) Evaluation

In [None]:
# RFM Analysis
reference_date = orders['OrderDate'].max() + pd.Timedelta(days=1)

rfm = full_orders.groupby('CustomerID').agg({
    'OrderDate': lambda x: (reference_date - x.max()).days,
    'OrderID': 'nunique',
    'TotalAmount': 'sum'
}).reset_index()
rfm.columns = ['CustomerID', 'Recency', 'Frequency', 'Monetary']

# RFM Scoring
rfm['R_Score'] = pd.qcut(rfm['Recency'], q=5, labels=[5, 4, 3, 2, 1]).astype(int)
rfm['F_Score'] = pd.qcut(rfm['Frequency'].rank(method='first'), q=5, labels=[1, 2, 3, 4, 5]).astype(int)
rfm['M_Score'] = pd.qcut(rfm['Monetary'].rank(method='first'), q=5, labels=[1, 2, 3, 4, 5]).astype(int)
rfm['RFM_Score'] = rfm['R_Score'] + rfm['F_Score'] + rfm['M_Score']

# Segment customers
def segment(row):
    r, f, m = row['R_Score'], row['F_Score'], row['M_Score']
    if r >= 4 and f >= 4 and m >= 4:
        return 'Champions'
    elif r >= 4 and f >= 3:
        return 'Loyal'
    elif r <= 2 and f >= 3:
        return 'At Risk'
    elif r <= 2 and f <= 2:
        return 'Lost'
    else:
        return 'Others'

rfm['Segment'] = rfm.apply(segment, axis=1)

# Segment evaluation
segment_eval = rfm.groupby('Segment').agg({
    'CustomerID': 'count',
    'Monetary': ['sum', 'mean'],
    'Frequency': 'mean'
}).round(2)
segment_eval.columns = ['Customers', 'Total_Revenue', 'Avg_Revenue', 'Avg_Frequency']
segment_eval = segment_eval.sort_values('Total_Revenue', ascending=False)

print("\n=== Customer Segmentation Results ===")
display(segment_eval)

In [None]:
# Visualize segments
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Customer distribution
segment_counts = rfm['Segment'].value_counts()
axes[0].pie(segment_counts, labels=segment_counts.index, autopct='%1.1f%%')
axes[0].set_title('Customer Distribution by Segment')

# Revenue distribution
segment_revenue = rfm.groupby('Segment')['Monetary'].sum().sort_values(ascending=True)
axes[1].barh(segment_revenue.index, segment_revenue.values / 1000)
axes[1].set_title('Revenue by Segment')
axes[1].set_xlabel('Revenue ($K)')

plt.tight_layout()
plt.show()

## 5. Overall Model Comparison

In [None]:
print("\n" + "="*70)
print("COMPREHENSIVE MODEL EVALUATION SUMMARY")
print("="*70)

print("\n=== 1. DEMAND FORECASTING ===")
print(f"Best Model: {best_forecast_model}")
print(f"Average MAE: {avg_mae.min():.4f}")
print("Recommendation: Use for inventory replenishment planning")

print("\n=== 2. ABC CLASSIFICATION ===")
a_data = abc_validation[abc_validation['Class'] == 'A']
print(f"A-Class Products: {a_data['Products'].values[0]} ({a_data['Product_Pct'].values[0]:.1f}%)")
print(f"A-Class Revenue: ${a_data['Revenue'].values[0]:,.2f} ({a_data['Revenue_Pct'].values[0]:.1f}%)")
print(f"Pareto Efficiency: {pareto_ratio:.2f}x")
print("Recommendation: Prioritize A-class items for inventory optimization")

print("\n=== 3. DEAD STOCK DETECTION ===")
dead_count = inventory_status['IsDeadStock'].sum()
dead_pct = inventory_status['IsDeadStock'].mean() * 100
print(f"Dead Stock Items: {dead_count} ({dead_pct:.1f}%)")
print("Recommendation: Implement clearance strategies for dead stock")

print("\n=== 4. CUSTOMER SEGMENTATION ===")
champions = len(rfm[rfm['Segment'] == 'Champions'])
at_risk = len(rfm[rfm['Segment'] == 'At Risk'])
print(f"Champions: {champions} customers")
print(f"At Risk: {at_risk} customers")
print("Recommendation: Focus retention efforts on At-Risk segment")

In [None]:
# Summary dashboard
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# 1. Forecasting model comparison
avg_mae.plot(kind='bar', ax=axes[0, 0], color=['green' if x == avg_mae.min() else 'gray' for x in avg_mae])
axes[0, 0].set_title('Forecasting Model MAE')
axes[0, 0].set_ylabel('MAE')
axes[0, 0].tick_params(axis='x', rotation=0)

# 2. ABC distribution
abc_validation.plot(x='Class', y=['Product_Pct', 'Revenue_Pct'], kind='bar', ax=axes[0, 1])
axes[0, 1].set_title('ABC Distribution (Products vs Revenue)')
axes[0, 1].set_ylabel('Percentage')
axes[0, 1].legend(['Products %', 'Revenue %'])
axes[0, 1].tick_params(axis='x', rotation=0)

# 3. Dead stock by category (top 5)
dead_pct_by_category.head(5).plot(kind='bar', ax=axes[1, 0], color='red', alpha=0.7)
axes[1, 0].set_title('Top 5 Categories - Dead Stock %')
axes[1, 0].set_ylabel('Dead Stock %')
axes[1, 0].tick_params(axis='x', rotation=45)

# 4. Customer segments
segment_counts.plot(kind='bar', ax=axes[1, 1])
axes[1, 1].set_title('Customer Segments')
axes[1, 1].set_ylabel('Count')
axes[1, 1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

In [None]:
print("\n" + "="*70)
print("ACTIONABLE INSIGHTS")
print("="*70)

print("""
1. INVENTORY OPTIMIZATION:
   - Use Linear Regression / Moving Average for demand forecasting
   - Focus on A-class products for safety stock optimization
   - Review and liquidate dead stock to free up capital

2. CUSTOMER RETENTION:
   - Prioritize Champions segment for loyalty programs
   - Re-engage At-Risk customers with targeted promotions
   - Monitor Loyal customers for potential upgrades

3. PRODUCT STRATEGY:
   - Invest in A-class product availability
   - Consider discontinuing chronic C-class dead stock
   - Review B-class products for potential A-class promotion

4. OPERATIONAL IMPROVEMENTS:
   - Implement automated reorder alerts for A-class items
   - Weekly dead stock review process
   - Monthly customer segment analysis
""")