In [6]:
"""
DEMAND FORECASTING & INVENTORY SIMULATION - SUPPLY CHAIN
Block A: Data Loading & Preprocessing
"""

# ============================================================================
# IMPORTS
# ============================================================================
import pandas as pd
import numpy as np
import warnings
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns

warnings.filterwarnings('ignore')

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

print("=" * 80)
print("BLOCK A: DATA LOADING & PREPROCESSING")
print("=" * 80)

# ============================================================================
# 1. DATA LOADING
# ============================================================================
print("\n[1] Loading Data...")

# For Google Colab - upload file
from google.colab import files
uploaded = files.upload()

# Get the filename
filename = list(uploaded.keys())[0]
print(f"✓ File uploaded: {filename}")

# Load data based on file extension
if filename.endswith('.csv'):
    df = pd.read_csv(filename)
elif filename.endswith('.xlsx'):
    df = pd.read_excel(filename)
else:
    raise ValueError("File must be .csv or .xlsx format")

print(f"✓ Data loaded: {df.shape[0]:,} rows × {df.shape[1]} columns")
print(f"\nColumns: {list(df.columns)}")

# ============================================================================
# 2. DATE PARSING & VALIDATION
# ============================================================================
print("\n[2] Parsing Dates...")

# Parse Date column with automatic format inference
df['Date'] = pd.to_datetime(df['Date'], infer_datetime_format=True, utc=True)

# Convert to date only (remove time component if present)
df['Date'] = df['Date'].dt.date
df['Date'] = pd.to_datetime(df['Date'])

print(f"✓ Date range: {df['Date'].min()} to {df['Date'].max()}")
print(f"✓ Total days in dataset: {(df['Date'].max() - df['Date'].min()).days + 1}")

# ============================================================================
# 3. CREATE CONTINUOUS CALENDAR FOR EACH SKU
# ============================================================================
print("\n[3] Creating Continuous Calendar...")

# Get all unique SKUs and date range
all_skus = df['SKU_ID'].unique()
date_range = pd.date_range(start=df['Date'].min(), end=df['Date'].max(), freq='D')

print(f"✓ Unique SKUs: {len(all_skus):,}")
print(f"✓ Date range: {len(date_range)} days")

# Create complete calendar for all SKU-Date combinations
complete_calendar = pd.DataFrame(
    [(sku, date) for sku in all_skus for date in date_range],
    columns=['SKU_ID', 'Date']
)

print(f"✓ Complete calendar created: {len(complete_calendar):,} SKU-Date combinations")

# Merge with original data
df_original_size = len(df)
df = complete_calendar.merge(df, on=['SKU_ID', 'Date'], how='left')

print(f"✓ Data after merge: {len(df):,} rows ({len(df) - df_original_size:,} new rows added)")

# ============================================================================
# 4. HANDLE MISSING VALUES - UNITS_SOLD
# ============================================================================
print("\n[4] Handling Missing Values - Units_Sold...")

units_sold_na = df['Units_Sold'].isna().sum()
print(f"Missing Units_Sold: {units_sold_na:,} ({units_sold_na/len(df)*100:.2f}%)")

# Fill missing Units_Sold with 0 (no sales on those days)
df['Units_Sold'] = df['Units_Sold'].fillna(0).astype(int)
print("✓ Units_Sold: Missing values filled with 0")

# ============================================================================
# 5. HANDLE MISSING VALUES - INVENTORY_LEVEL
# ============================================================================
print("\n[5] Handling Missing Values - Inventory_Level...")

inventory_na = df['Inventory_Level'].isna().sum()
print(f"Missing Inventory_Level: {inventory_na:,} ({inventory_na/len(df)*100:.2f}%)")

# Sort by SKU and Date for proper forward-filling
df = df.sort_values(['SKU_ID', 'Date']).reset_index(drop=True)

# Forward-fill Inventory_Level within each SKU group
df['Inventory_Level'] = df.groupby('SKU_ID')['Inventory_Level'].fillna(method='ffill')

# Flag rows with true NA (couldn't be filled)
df['Inventory_NA_Flag'] = df['Inventory_Level'].isna().astype(int)
remaining_na = df['Inventory_Level'].isna().sum()

print(f"✓ Inventory_Level: Forward-filled within SKU groups")
print(f"  Remaining NA (flagged for review): {remaining_na:,}")

# Fill remaining NAs with 0 (assume starting inventory is 0 if no prior data)
df['Inventory_Level'] = df['Inventory_Level'].fillna(0).astype(int)

# ============================================================================
# 6. HANDLE MISSING VALUES - SUPPLIER_LEAD_TIME_DAYS
# ============================================================================
print("\n[6] Handling Missing Values - Supplier_Lead_Time_Days...")

lead_time_na = df['Supplier_Lead_Time_Days'].isna().sum()
print(f"Missing Supplier_Lead_Time_Days: {lead_time_na:,}")

# Calculate median lead time per supplier
supplier_median_lead_time = df.groupby('Supplier_ID')['Supplier_Lead_Time_Days'].median()

# Impute missing values with supplier median
df['Supplier_Lead_Time_Days'] = df.apply(
    lambda row: supplier_median_lead_time[row['Supplier_ID']]
    if pd.isna(row['Supplier_Lead_Time_Days']) and row['Supplier_ID'] in supplier_median_lead_time
    else row['Supplier_Lead_Time_Days'],
    axis=1
)

print("✓ Supplier_Lead_Time_Days: Imputed with median per supplier")

# ============================================================================
# 7. FILL OTHER MISSING VALUES
# ============================================================================
print("\n[7] Filling Other Missing Values...")

# Fill categorical columns with forward-fill per SKU
categorical_cols = ['Warehouse_ID', 'Supplier_ID', 'Region']
for col in categorical_cols:
    if col in df.columns:
        df[col] = df.groupby('SKU_ID')[col].fillna(method='ffill')
        df[col] = df.groupby('SKU_ID')[col].fillna(method='bfill')

# Fill numeric columns
numeric_fill_cols = ['Reorder_Point', 'Order_Quantity', 'Unit_Cost', 'Unit_Price', 'Demand_Forecast']
for col in numeric_fill_cols:
    if col in df.columns:
        df[col] = df.groupby('SKU_ID')[col].fillna(method='ffill')
        df[col] = df.groupby('SKU_ID')[col].fillna(method='bfill')

# Fill binary flags with 0
flag_cols = ['Promotion_Flag', 'Stockout_Flag']
for col in flag_cols:
    if col in df.columns:
        df[col] = df[col].fillna(0).astype(int)

print("✓ All missing values handled")

# ============================================================================
# 8. VALIDATE UNITS_SOLD vs INVENTORY_LEVEL LOGIC
# ============================================================================
print("\n[8] Validating Units_Sold vs Inventory_Level Logic...")

# Check for impossible scenarios (sold more than inventory on stockout days)
df['Potential_Issue'] = (
    (df['Units_Sold'] > df['Inventory_Level']) &
    (df['Stockout_Flag'] == 0)
).astype(int)

issues_found = df['Potential_Issue'].sum()
print(f"Potential logic issues found: {issues_found:,} rows")

if issues_found > 0:
    print("  (Units_Sold > Inventory_Level but Stockout_Flag=0)")
    print("  → These will be monitored but not corrected automatically")

# ============================================================================
# 9. FINAL DATA QUALITY CHECK
# ============================================================================
print("\n[9] Final Data Quality Check...")

print("\nMissing values per column:")
missing_summary = df.isnull().sum()
missing_summary = missing_summary[missing_summary > 0]
if len(missing_summary) > 0:
    print(missing_summary)
else:
    print("✓ No missing values remaining")

print("\nData types:")
print(df.dtypes)

# ============================================================================
# 10. CHRONOLOGICAL TRAIN-TEST SPLIT (80-20)
# ============================================================================
print("\n[10] Creating Train-Test Split (80%-20% Chronological)...")

# Sort by date
df = df.sort_values('Date').reset_index(drop=True)

# Calculate split point
split_idx = int(len(df['Date'].unique()) * 0.8)
split_date = sorted(df['Date'].unique())[split_idx]

# Split data
train_df = df[df['Date'] < split_date].copy()
test_df = df[df['Date'] >= split_date].copy()

print(f"✓ Split date: {split_date}")
print(f"✓ Training set: {len(train_df):,} rows ({len(train_df)/len(df)*100:.1f}%)")
print(f"  Date range: {train_df['Date'].min()} to {train_df['Date'].max()}")
print(f"✓ Test set: {len(test_df):,} rows ({len(test_df)/len(df)*100:.1f}%)")
print(f"  Date range: {test_df['Date'].min()} to {test_df['Date'].max()}")

# ============================================================================
# 11. SUMMARY STATISTICS
# ============================================================================
print("\n[11] Summary Statistics...")

print("\nDataset Overview:")
print(f"Total rows: {len(df):,}")
print(f"Unique SKUs: {df['SKU_ID'].nunique():,}")
print(f"Unique Warehouses: {df['Warehouse_ID'].nunique():,}")
print(f"Unique Suppliers: {df['Supplier_ID'].nunique():,}")
print(f"Unique Regions: {df['Region'].nunique():,}")

print("\nKey Metrics:")
print(f"Total Units Sold: {df['Units_Sold'].sum():,.0f}")
print(f"Average Daily Sales per SKU: {df.groupby('SKU_ID')['Units_Sold'].mean().mean():.2f}")
print(f"Stockout Rate: {df['Stockout_Flag'].mean()*100:.2f}%")
print(f"Promotion Rate: {df['Promotion_Flag'].mean()*100:.2f}%")

print("\n" + "=" * 80)
print("BLOCK A COMPLETE ✓")
print("=" * 80)
print("\nKey variables created:")
print("  - df: Full preprocessed dataset")
print("  - train_df: Training set (80%)")
print("  - test_df: Test set (20%)")
print("  - split_date: Date separating train/test")
print("\nReady for Block B: Feature Engineering & Baseline")

"""
DEMAND FORECASTING & INVENTORY SIMULATION - SUPPLY CHAIN
Block B: Feature Engineering & Baseline Models (REVISED)
"""

# ============================================================================
# IMPORTS (Additional for Block B)
# ============================================================================
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import LabelEncoder
import scipy.stats as stats

print("\n" + "=" * 80)
print("BLOCK B: FEATURE ENGINEERING & BASELINE MODELS")
print("=" * 80)

# ============================================================================
# 1. BASELINE MODELS - CREATE FORECASTS
# ============================================================================
print("\n[1] Building Baseline Forecasts...")

# Sort data by SKU and Date for proper lag calculations
df = df.sort_values(['SKU_ID', 'Date']).reset_index(drop=True)

# ---- Baseline 1: Naïve Forecast (lag_sell_j-1) ----
df['Baseline_Naive'] = df.groupby('SKU_ID')['Units_Sold'].shift(1)
print("✓ Baseline_Naive created (j-1 lag)")

# ---- Baseline 2: Moving Average (7 days) ----
df['Baseline_MA7'] = df.groupby('SKU_ID')['Units_Sold'].transform(
    lambda x: x.rolling(window=7, min_periods=1).mean().shift(1)
)
print("✓ Baseline_MA7 created (7-day MA)")

# ---- Baseline 3: Dataset Demand_Forecast ----
df['Baseline_Dataset'] = df['Demand_Forecast']
print("✓ Baseline_Dataset created (from Demand_Forecast column)")

# Fill NaN values in baselines with 0
df['Baseline_Naive'] = df['Baseline_Naive'].fillna(0)
df['Baseline_MA7'] = df['Baseline_MA7'].fillna(0)

print("\n✓ Three baseline forecasts created and ready for evaluation")

# ============================================================================
# 2. BASELINE PERFORMANCE METRICS (ON TEST SET)
# ============================================================================
print("\n[2] Evaluating Baseline Performance on Test Set...")

def calculate_metrics(y_true, y_pred, model_name):
    """Calculate RMSE, MAE, MAPE, and WAPE"""
    # Remove NaN values
    mask = ~(pd.isna(y_true) | pd.isna(y_pred))
    y_true_clean = y_true[mask]
    y_pred_clean = y_pred[mask]

    if len(y_true_clean) == 0:
        return None

    # RMSE
    rmse = np.sqrt(mean_squared_error(y_true_clean, y_pred_clean))

    # MAE
    mae = mean_absolute_error(y_true_clean, y_pred_clean)

    # MAPE (avoid division by zero)
    mape = np.mean(np.abs((y_true_clean - y_pred_clean) / (y_true_clean + 1e-10))) * 100

    # WAPE (Weighted Absolute Percentage Error)
    wape = np.sum(np.abs(y_true_clean - y_pred_clean)) / (np.sum(y_true_clean) + 1e-10) * 100

    return {
        'Model': model_name,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'WAPE': wape
    }

# Create test baseline dataset
test_baseline = df[df['Date'] >= split_date].copy()

print(f"Test baseline dataset: {len(test_baseline):,} rows")

# Calculate metrics for each baseline
baseline_results = []

# Naïve
metrics_naive = calculate_metrics(
    test_baseline['Units_Sold'],
    test_baseline['Baseline_Naive'],
    'Naïve (j-1)'
)
if metrics_naive:
    baseline_results.append(metrics_naive)

# MA7
metrics_ma7 = calculate_metrics(
    test_baseline['Units_Sold'],
    test_baseline['Baseline_MA7'],
    'Moving Avg (7d)'
)
if metrics_ma7:
    baseline_results.append(metrics_ma7)

# Dataset
metrics_dataset = calculate_metrics(
    test_baseline['Units_Sold'],
    test_baseline['Baseline_Dataset'],
    'Dataset Forecast'
)
if metrics_dataset:
    baseline_results.append(metrics_dataset)

# Create baseline metrics dataframe
baseline_metrics_df = pd.DataFrame(baseline_results)

print("\n✓ Baseline Performance (Test Set):")
print(baseline_metrics_df.to_string(index=False))

# ============================================================================
# 3. SUPPLY CHAIN ANALYSIS
# ============================================================================
print("\n" + "=" * 80)
print("[3] SUPPLY CHAIN ANALYSIS")
print("=" * 80)

# ---- 3.1 Descriptive Statistics ----
print("\n[3.1] Descriptive Statistics (Mean & Std Dev)...")

numeric_cols = ['Units_Sold', 'Inventory_Level', 'Supplier_Lead_Time_Days',
                'Reorder_Point', 'Order_Quantity', 'Unit_Cost', 'Unit_Price']

stats_summary = df[numeric_cols].agg(['mean', 'std', 'min', 'max'])
print("\nOverall Statistics:")
print(stats_summary.round(2))

# ---- 3.2 Coefficient of Variation by SKU ----
print("\n[3.2] Coefficient of Variation (CV) by SKU...")

sku_stats = df.groupby('SKU_ID')['Units_Sold'].agg(['mean', 'std']).reset_index()
sku_stats['CV'] = sku_stats['std'] / (sku_stats['mean'] + 1e-10)
sku_stats = sku_stats.sort_values('CV', ascending=False)

print(f"\nCV Statistics across all SKUs:")
print(f"  Mean CV: {sku_stats['CV'].mean():.3f}")
print(f"  Median CV: {sku_stats['CV'].median():.3f}")
print(f"  Min CV: {sku_stats['CV'].min():.3f}")
print(f"  Max CV: {sku_stats['CV'].max():.3f}")

print(f"\nTop 5 SKUs with highest variability:")
print(sku_stats.head()[['SKU_ID', 'mean', 'std', 'CV']].to_string(index=False))

# ---- 3.3 Seasonality Analysis ----
print("\n[3.3] Identifying Seasonality...")

# Weekly seasonality (day of week)
df['DayOfWeek'] = pd.to_datetime(df['Date']).dt.dayofweek
weekly_pattern = df.groupby('DayOfWeek')['Units_Sold'].mean()

print("\nAverage Units Sold by Day of Week:")
days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
for i, day in enumerate(days):
    if i in weekly_pattern.index:
        print(f"  {day}: {weekly_pattern.iloc[i]:.2f}")

# Monthly seasonality
df['Month'] = pd.to_datetime(df['Date']).dt.month
monthly_pattern = df.groupby('Month')['Units_Sold'].mean()

print("\nAverage Units Sold by Month:")
for month in range(1, 13):
    if month in monthly_pattern.index:
        print(f"  Month {month:2d}: {monthly_pattern.loc[month]:.2f}")

# ---- 3.4 Promotion Impact ----
print("\n[3.4] Impact of Promotion_Flag on Sales...")

promo_impact = df.groupby('Promotion_Flag')['Units_Sold'].agg(['mean', 'std', 'count'])
print("\nUnits Sold by Promotion Status:")
print(promo_impact)

if 1 in promo_impact.index and 0 in promo_impact.index:
    promo_uplift = (promo_impact.loc[1, 'mean'] / promo_impact.loc[0, 'mean'] - 1) * 100
    print(f"\n✓ Promotion Uplift Effect: {promo_uplift:.2f}%")
    print(f"  (Sales are {promo_uplift:.2f}% higher during promotions)")

# ---- 3.5 Lead Time vs Stockout Correlation ----
print("\n[3.5] Supplier Lead Time Impact on Stockouts...")

correlation = df[['Supplier_Lead_Time_Days', 'Stockout_Flag']].corr().iloc[0, 1]
print(f"\nCorrelation (Lead Time vs Stockout): {correlation:.3f}")

if correlation > 0.1:
    print("  → Longer lead times are associated with more stockouts")
elif correlation < -0.1:
    print("  → Longer lead times are associated with fewer stockouts")
else:
    print("  → Weak correlation between lead time and stockouts")

# Stockout rate by lead time
leadtime_stockout = df.groupby('Supplier_Lead_Time_Days')['Stockout_Flag'].agg(['mean', 'count'])
leadtime_stockout.columns = ['Stockout_Rate', 'Count']
leadtime_stockout = leadtime_stockout[leadtime_stockout['Count'] > 100].sort_values('Stockout_Rate', ascending=False)

print("\nTop 5 Lead Times with Highest Stockout Rates:")
print(leadtime_stockout.head())

# ============================================================================
# 4. ABC/XYZ CLASSIFICATION
# ============================================================================
print("\n" + "=" * 80)
print("[4] ABC/XYZ SEGMENTATION")
print("=" * 80)

# ---- 4.1 ABC Classification (Revenue-based) ----
print("\n[4.1] ABC Classification (by Revenue)...")

# Calculate total revenue per SKU
sku_revenue = df.groupby('SKU_ID').agg({
    'Units_Sold': 'sum',
    'Unit_Price': 'mean'
}).reset_index()
sku_revenue['Revenue'] = sku_revenue['Units_Sold'] * sku_revenue['Unit_Price']

# Sort by revenue descending and calculate cumulative percentage
sku_revenue = sku_revenue.sort_values('Revenue', ascending=False).reset_index(drop=True)
sku_revenue['Cumulative_Revenue'] = sku_revenue['Revenue'].cumsum()
total_revenue = sku_revenue['Revenue'].sum()
sku_revenue['Cumulative_Pct'] = (sku_revenue['Cumulative_Revenue'] / total_revenue) * 100

# Classify ABC based on cumulative revenue
def classify_abc(cum_pct):
    if cum_pct <= 80:
        return 'A'
    elif cum_pct <= 95:
        return 'B'
    else:
        return 'C'

sku_revenue['ABC_Class'] = sku_revenue['Cumulative_Pct'].apply(classify_abc)

print("\nABC Classification Distribution:")
abc_summary = sku_revenue.groupby('ABC_Class').agg({
    'SKU_ID': 'count',
    'Revenue': 'sum'
}).reset_index()
abc_summary.columns = ['ABC_Class', 'SKU_Count', 'Total_Revenue']
abc_summary['SKU_Pct'] = (abc_summary['SKU_Count'] / len(sku_revenue)) * 100
abc_summary['Revenue_Pct'] = (abc_summary['Total_Revenue'] / total_revenue) * 100

for _, row in abc_summary.iterrows():
    print(f"  Class {row['ABC_Class']}: {row['SKU_Count']:4d} SKUs ({row['SKU_Pct']:5.1f}%) → {row['Revenue_Pct']:5.1f}% of revenue")

# ---- 4.2 XYZ Classification (Demand Variability) ----
print("\n[4.2] XYZ Classification (by Coefficient of Variation)...")

# Merge CV data
sku_revenue = sku_revenue.merge(sku_stats[['SKU_ID', 'CV']], on='SKU_ID', how='left')

# Classify XYZ based on CV
def classify_xyz(cv):
    if cv <= 0.5:
        return 'X'
    elif cv <= 1.0:
        return 'Y'
    else:
        return 'Z'

sku_revenue['XYZ_Class'] = sku_revenue['CV'].apply(classify_xyz)

print("\nXYZ Classification Distribution:")
xyz_summary = sku_revenue.groupby('XYZ_Class').agg({
    'SKU_ID': 'count'
}).reset_index()
xyz_summary.columns = ['XYZ_Class', 'SKU_Count']
xyz_summary['SKU_Pct'] = (xyz_summary['SKU_Count'] / len(sku_revenue)) * 100

for _, row in xyz_summary.iterrows():
    print(f"  Class {row['XYZ_Class']}: {row['SKU_Count']:4d} SKUs ({row['SKU_Pct']:5.1f}%)")

# ---- 4.3 Combined ABC/XYZ ----
sku_revenue['ABC_XYZ_Class'] = sku_revenue['ABC_Class'] + sku_revenue['XYZ_Class']

print("\n[4.3] Combined ABC/XYZ Matrix:")
abcxyz_matrix = pd.crosstab(sku_revenue['ABC_Class'], sku_revenue['XYZ_Class'], margins=True)
print(abcxyz_matrix)

print("\n✓ ABC/XYZ classification complete")
print("  Class interpretation:")
print("    - AX: High value, low variability (predictable best-sellers)")
print("    - CZ: Low value, high variability (difficult to forecast, low priority)")

# Merge classification back to main dataframe
df = df.merge(
    sku_revenue[['SKU_ID', 'ABC_Class', 'XYZ_Class', 'ABC_XYZ_Class', 'CV']],
    on='SKU_ID',
    how='left'
)

print("\n✓ ABC/XYZ classes added to main dataframe")

# ============================================================================
# 5. FEATURE ENGINEERING
# ============================================================================
print("\n" + "=" * 80)
print("[5] FEATURE ENGINEERING")
print("=" * 80)

# Ensure sorted by SKU and Date
df = df.sort_values(['SKU_ID', 'Date']).reset_index(drop=True)

# ---- 5.1 Time Features ----
print("\n[5.1] Creating Time Features from Date...")

df['Date_dt'] = pd.to_datetime(df['Date'])
df['week_day'] = df['Date_dt'].dt.dayofweek
df['month'] = df['Date_dt'].dt.month
df['week_number'] = df['Date_dt'].dt.isocalendar().week.astype(int)
df['day_trend'] = (df['Date_dt'] - df['Date_dt'].min()).dt.days

print("✓ Time features: week_day, month, week_number, day_trend")

# ---- 5.2 Sales Lag Features ----
print("\n[5.2] Creating Sales Lag Features...")

df['lag_sell_j1'] = df.groupby('SKU_ID')['Units_Sold'].shift(1)
df['lag_sell_j7'] = df.groupby('SKU_ID')['Units_Sold'].shift(7)
df['lag_sell_j14'] = df.groupby('SKU_ID')['Units_Sold'].shift(14)

print("✓ Lag features: lag_sell_j1, lag_sell_j7, lag_sell_j14")

# ---- 5.3 Moving Average Features ----
print("\n[5.3] Creating Moving Average Features...")

df['ma_7j'] = df.groupby('SKU_ID')['Units_Sold'].transform(
    lambda x: x.rolling(window=7, min_periods=1).mean().shift(1)
)
df['ma_28j'] = df.groupby('SKU_ID')['Units_Sold'].transform(
    lambda x: x.rolling(window=28, min_periods=1).mean().shift(1)
)

print("✓ Moving averages: ma_7j, ma_28j")

# ---- 5.4 Volatility Features (Standard Deviation) ----
print("\n[5.4] Creating Volatility Features...")

df['volatility_j7'] = df.groupby('SKU_ID')['Units_Sold'].transform(
    lambda x: x.rolling(window=7, min_periods=1).std().shift(1)
)
df['volatility_j14'] = df.groupby('SKU_ID')['Units_Sold'].transform(
    lambda x: x.rolling(window=14, min_periods=1).std().shift(1)
)

print("✓ Volatility features: volatility_j7, volatility_j14")

# ---- 5.5 Inventory Features ----
print("\n[5.5] Creating Inventory Features...")

# Days of stock = Inventory / Average daily demand
df['days_of_stock'] = df['Inventory_Level'] / (df['ma_7j'] + 1e-10)
df['days_of_stock'] = df['days_of_stock'].clip(upper=365)  # Cap at 1 year

print("✓ Inventory feature: days_of_stock")

# ---- 5.6 Promotion Features ----
print("\n[5.6] Creating Promotion Features...")

df['promotion_of_the_day'] = df['Promotion_Flag'].astype(int)
df['j1_promotion'] = df.groupby('SKU_ID')['Promotion_Flag'].shift(1).fillna(0).astype(int)

# Promotion density over last 30 days
df['promotion_density'] = df.groupby('SKU_ID')['Promotion_Flag'].transform(
    lambda x: x.rolling(window=30, min_periods=1).mean().shift(1)
)

print("✓ Promotion features: promotion_of_the_day, j1_promotion, promotion_density")

# ---- 5.7 Lead Time Features ----
print("\n[5.7] Creating Lead Time & Supplier Features...")

# Average lead time per supplier
df['average_lead_time'] = df.groupby('Supplier_ID')['Supplier_Lead_Time_Days'].transform('mean')

# Lead time variability per supplier
df['leadtime_variability'] = df.groupby('Supplier_ID')['Supplier_Lead_Time_Days'].transform('std')
df['leadtime_variability'] = df['leadtime_variability'].fillna(0)

print("✓ Lead time features: average_lead_time, leadtime_variability")

# ============================================================================
# 6. CATEGORICAL ENCODING
# ============================================================================
print("\n" + "=" * 80)
print("[6] CATEGORICAL ENCODING")
print("=" * 80)

# ---- 6.1 Label Encoding for High Cardinality Features ----
print("\n[6.1] Label Encoding for SKU_ID and Supplier_ID...")

# SKU_ID
le_sku = LabelEncoder()
df['SKU_ID_encoded'] = le_sku.fit_transform(df['SKU_ID'].astype(str))
print(f"✓ SKU_ID encoded: {df['SKU_ID'].nunique()} unique values → 0 to {df['SKU_ID_encoded'].max()}")

# Supplier_ID
le_supplier = LabelEncoder()
df['Supplier_ID_encoded'] = le_supplier.fit_transform(df['Supplier_ID'].astype(str))
print(f"✓ Supplier_ID encoded: {df['Supplier_ID'].nunique()} unique values → 0 to {df['Supplier_ID_encoded'].max()}")

# ---- 6.2 Encoding for Warehouse_ID and Region ----
print("\n[6.2] Encoding Warehouse_ID and Region...")

# Check cardinality
warehouse_card = df['Warehouse_ID'].nunique()
region_card = df['Region'].nunique()

print(f"Warehouse_ID cardinality: {warehouse_card}")
print(f"Region cardinality: {region_card}")

# Warehouse_ID
if warehouse_card < 20:
    # One-hot encoding
    warehouse_dummies = pd.get_dummies(df['Warehouse_ID'], prefix='Warehouse', drop_first=True, dtype=int)
    df = pd.concat([df, warehouse_dummies], axis=1)
    print(f"✓ Warehouse_ID one-hot encoded: {warehouse_dummies.shape[1]} dummy features")
else:
    # Label encoding
    le_warehouse = LabelEncoder()
    df['Warehouse_ID_encoded'] = le_warehouse.fit_transform(df['Warehouse_ID'].astype(str))
    print(f"✓ Warehouse_ID label encoded")

# Region
if region_card < 20:
    # One-hot encoding
    region_dummies = pd.get_dummies(df['Region'], prefix='Region', drop_first=True, dtype=int)
    df = pd.concat([df, region_dummies], axis=1)
    print(f"✓ Region one-hot encoded: {region_dummies.shape[1]} dummy features")
else:
    # Label encoding
    le_region = LabelEncoder()
    df['Region_encoded'] = le_region.fit_transform(df['Region'].astype(str))
    print(f"✓ Region label encoded")

# ---- 6.3 Encode ABC/XYZ Classes ----
print("\n[6.3] Encoding ABC/XYZ Classes...")

abc_mapping = {'A': 3, 'B': 2, 'C': 1}
xyz_mapping = {'X': 3, 'Y': 2, 'Z': 1}

df['ABC_Class_encoded'] = df['ABC_Class'].map(abc_mapping).fillna(0).astype(int)
df['XYZ_Class_encoded'] = df['XYZ_Class'].map(xyz_mapping).fillna(0).astype(int)

print("✓ ABC/XYZ classes encoded (A=3, B=2, C=1; X=3, Y=2, Z=1)")

# ============================================================================
# 7. FILL MISSING VALUES IN ENGINEERED FEATURES
# ============================================================================
print("\n[7] Handling Missing Values in Engineered Features...")

# List of features that may have NaN due to lag/rolling operations
feature_cols = [
    'lag_sell_j1', 'lag_sell_j7', 'lag_sell_j14',
    'ma_7j', 'ma_28j',
    'volatility_j7', 'volatility_j14',
    'days_of_stock',
    'j1_promotion', 'promotion_density',
    'average_lead_time', 'leadtime_variability'
]

for col in feature_cols:
    if col in df.columns:
        missing_count = df[col].isna().sum()
        if missing_count > 0:
            df[col] = df[col].fillna(0)
            print(f"  {col}: {missing_count:,} NaN values filled with 0")

print("\n✓ All engineered features have no missing values")

# ============================================================================
# 8. VERIFY ALL FEATURES ARE NUMERIC
# ============================================================================
print("\n[8] Verifying All Features Are Numeric...")

# Get all numeric columns
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
non_numeric_cols = df.select_dtypes(exclude=[np.number]).columns.tolist()

print(f"\nNumeric columns: {len(numeric_cols)}")
print(f"Non-numeric columns: {len(non_numeric_cols)}")

if non_numeric_cols:
    print(f"\nNon-numeric columns (will not be used in modeling):")
    print(f"  {non_numeric_cols}")

# ============================================================================
# 9. DEFINE FEATURE LIST FOR MODELING
# ============================================================================
print("\n[9] Defining Feature List for ML Modeling...")

# Time features
time_features = ['week_day', 'month', 'week_number', 'day_trend']

# Lag features
lag_features = ['lag_sell_j1', 'lag_sell_j7', 'lag_sell_j14']

# Moving average features
ma_features = ['ma_7j', 'ma_28j']

# Volatility features
volatility_features = ['volatility_j7', 'volatility_j14']

# Inventory features
inventory_features = ['days_of_stock', 'Inventory_Level', 'Reorder_Point']

# Promotion features
promotion_features = ['promotion_of_the_day', 'j1_promotion', 'promotion_density']

# Lead time features
leadtime_features = ['average_lead_time', 'leadtime_variability', 'Supplier_Lead_Time_Days']

# Classification features
classification_features = ['ABC_Class_encoded', 'XYZ_Class_encoded', 'CV']

# ID features (encoded)
id_features = ['SKU_ID_encoded', 'Supplier_ID_encoded']

# Warehouse and Region features
warehouse_features = [col for col in df.columns if col.startswith('Warehouse_')]
region_features = [col for col in df.columns if col.startswith('Region_')]

# If no one-hot encoded features, use label encoded
if not warehouse_features and 'Warehouse_ID_encoded' in df.columns:
    warehouse_features = ['Warehouse_ID_encoded']
if not region_features and 'Region_encoded' in df.columns:
    region_features = ['Region_encoded']

# Combine all features
all_model_features = (
    time_features +
    lag_features +
    ma_features +
    volatility_features +
    inventory_features +
    promotion_features +
    leadtime_features +
    classification_features +
    id_features +
    warehouse_features +
    region_features
)

# Verify all features exist in dataframe
all_model_features = [f for f in all_model_features if f in df.columns]

print(f"\n✓ Total features for modeling: {len(all_model_features)}")
print("\nFeature groups:")
print(f"  Time: {len(time_features)}")
print(f"  Lag: {len(lag_features)}")
print(f"  Moving Average: {len(ma_features)}")
print(f"  Volatility: {len(volatility_features)}")
print(f"  Inventory: {len(inventory_features)}")
print(f"  Promotion: {len(promotion_features)}")
print(f"  Lead Time: {len(leadtime_features)}")
print(f"  Classification: {len(classification_features)}")
print(f"  IDs: {len(id_features)}")
print(f"  Warehouse: {len(warehouse_features)}")
print(f"  Region: {len(region_features)}")

# Verify all features are numeric
non_numeric_features = [f for f in all_model_features if f not in numeric_cols]
if non_numeric_features:
    print(f"\n⚠️ WARNING: Non-numeric features found: {non_numeric_features}")
    all_model_features = [f for f in all_model_features if f in numeric_cols]
else:
    print(f"\n✓ All {len(all_model_features)} features are numeric")

# ============================================================================
# 10. UPDATE TRAIN-TEST SPLIT WITH NEW FEATURES
# ============================================================================
print("\n[10] Updating Train-Test Split...")

# Update splits
train_df = df[df['Date'] < split_date].copy()
test_df = df[df['Date'] >= split_date].copy()

print(f"✓ Training set: {len(train_df):,} rows")
print(f"  Date range: {train_df['Date'].min()} to {train_df['Date'].max()}")
print(f"✓ Test set: {len(test_df):,} rows")
print(f"  Date range: {test_df['Date'].min()} to {test_df['Date'].max()}")

# Update test_baseline with all features
test_baseline = test_df.copy()
print(f"✓ test_baseline updated with {len(test_baseline):,} rows and all engineered features")

# ============================================================================
# 11. FINAL SUMMARY
# ============================================================================
print("\n" + "=" * 80)
print("BLOCK B COMPLETE ✓")
print("=" * 80)

print("\nKey outputs created:")
print("  ✓ baseline_metrics_df: Performance metrics for 3 baselines")
print("  ✓ sku_revenue: ABC/XYZ classification per SKU")
print("  ✓ df: Full dataset with all features")
print("  ✓ train_df: Training set with features")
print("  ✓ test_df: Test set with features")
print("  ✓ test_baseline: Test set with baseline forecasts")
print("  ✓ all_model_features: List of numeric features for ML modeling")

print("\nBaseline Performance Summary:")
print(baseline_metrics_df[['Model', 'RMSE', 'MAPE', 'WAPE']].to_string(index=False))

print(f"\nTotal features ready for modeling: {len(all_model_features)}")
print(f"All features are numeric: {len([f for f in all_model_features if f in numeric_cols]) == len(all_model_features)}")

print("\n✅ Ready for Block C: Forecast Model & Metrics")

"""
DEMAND FORECASTING & INVENTORY SIMULATION - SUPPLY CHAIN
Block C: Forecast Model & Metrics
"""

# ============================================================================
# IMPORTS (Additional for Block C)
# ============================================================================
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
import joblib

print("\n" + "=" * 80)
print("BLOCK C: FORECAST MODEL & METRICS")
print("=" * 80)

# ============================================================================
# 1. ADD NAIVE SEASONALITY BASELINE
# ============================================================================
print("\n[1] Adding Naive Seasonality Baseline (j-7)...")

# Add Naive Seasonality baseline to dataframe
df['Baseline_Naive_Season'] = df.groupby('SKU_ID')['Units_Sold'].shift(7)
df['Baseline_Naive_Season'] = df['Baseline_Naive_Season'].fillna(0)

print("✓ Baseline_Naive_Season created (j-7 lag)")

# Update test_baseline
test_baseline = df[df['Date'] >= split_date].copy()

# Recalculate all baseline metrics
print("\n[1.1] Evaluating All Baselines on Test Set...")

baseline_results = []

# Naïve (j-1)
metrics = calculate_metrics(
    test_baseline['Units_Sold'],
    test_baseline['Baseline_Naive'],
    'Naïve (j-1)'
)
if metrics:
    baseline_results.append(metrics)

# MA7
metrics = calculate_metrics(
    test_baseline['Units_Sold'],
    test_baseline['Baseline_MA7'],
    'Moving Avg (7d)'
)
if metrics:
    baseline_results.append(metrics)

# Naive Seasonality (j-7)
metrics = calculate_metrics(
    test_baseline['Units_Sold'],
    test_baseline['Baseline_Naive_Season'],
    'Naïve Season (j-7)'
)
if metrics:
    baseline_results.append(metrics)

# Dataset Forecast
metrics = calculate_metrics(
    test_baseline['Units_Sold'],
    test_baseline['Baseline_Dataset'],
    'Dataset Forecast'
)
if metrics:
    baseline_results.append(metrics)

baseline_metrics_df = pd.DataFrame(baseline_results)
print("\n✓ All Baseline Performance (Test Set):")
print(baseline_metrics_df.to_string(index=False))

# ============================================================================
# 2. PREPARE DATA FOR ML MODEL
# ============================================================================
print("\n" + "=" * 80)
print("[2] PREPARING DATA FOR ML MODEL")
print("=" * 80)

print("\n[2.1] Data Structure Validation...")

# Create clean modeling dataset (remove rows with NaN in features)
df_model = df.copy()

# Check for missing values in features
missing_features = df_model[all_model_features].isnull().sum()
missing_features = missing_features[missing_features > 0]

if len(missing_features) > 0:
    print(f"\n⚠️ Features with missing values:")
    print(missing_features)
    print("\nRemoving rows with missing feature values...")
    df_model = df_model.dropna(subset=all_model_features)
else:
    print("✓ No missing values in features")

# Check for missing values in target
target_missing = df_model['Units_Sold'].isnull().sum()
if target_missing > 0:
    print(f"\n⚠️ Target has {target_missing} missing values, removing...")
    df_model = df_model.dropna(subset=['Units_Sold'])
else:
    print("✓ No missing values in target")

print(f"\n✓ Clean dataset: {len(df_model):,} rows")

# Split into train and test
train_df_model = df_model[df_model['Date'] < split_date].copy()
test_df_model = df_model[df_model['Date'] >= split_date].copy()

print(f"✓ Training set: {len(train_df_model):,} rows")
print(f"✓ Test set: {len(test_df_model):,} rows")

# Prepare feature matrices
print("\n[2.2] Preparing Feature Matrices...")

X_train = train_df_model[all_model_features].values
y_train = train_df_model['Units_Sold'].values

X_test = test_df_model[all_model_features].values
y_test = test_df_model['Units_Sold'].values

print(f"\n✓ X_train shape: {X_train.shape}")
print(f"✓ y_train shape: {y_train.shape}")
print(f"✓ X_test shape: {X_test.shape}")
print(f"✓ y_test shape: {y_test.shape}")

# Data type verification
print(f"\n✓ X_train dtype: {X_train.dtype}")
print(f"✓ y_train dtype: {y_train.dtype}")

# Check for inf/nan values
print(f"\n✓ Inf values in X_train: {np.isinf(X_train).sum()}")
print(f"✓ NaN values in X_train: {np.isnan(X_train).sum()}")
print(f"✓ Inf values in y_train: {np.isinf(y_train).sum()}")
print(f"✓ NaN values in y_train: {np.isnan(y_train).sum()}")

if np.isinf(X_train).sum() > 0 or np.isnan(X_train).sum() > 0:
    print("\n⚠️ WARNING: Invalid values detected in X_train!")
    # Replace inf with large number, nan with 0
    X_train = np.nan_to_num(X_train, nan=0.0, posinf=1e10, neginf=-1e10)
    X_test = np.nan_to_num(X_test, nan=0.0, posinf=1e10, neginf=-1e10)
    print("✓ Invalid values replaced")

print("\n✓ Data validation complete - ready for training")

# ============================================================================
# 3. MULTI-HORIZON TARGET PREPARATION
# ============================================================================
print("\n" + "=" * 80)
print("[3] MULTI-HORIZON FORECAST SETUP")
print("=" * 80)

horizons = [1, 7, 14]
print(f"\nForecasting horizons: {horizons} days (j+1, j+7, j+14)")

# Create future target variables for each horizon
print("\n[3.1] Creating Future Targets...")

for h in horizons:
    target_col = f'Units_Sold_j{h}'
    df_model[target_col] = df_model.groupby('SKU_ID')['Units_Sold'].shift(-h)
    print(f"✓ Created target: {target_col} (Units_Sold shifted by -{h} days)")

# Remove rows where future targets are NaN
print("\n[3.2] Removing Rows with NaN Targets...")

initial_rows = len(df_model)
df_model_clean = df_model.dropna(subset=[f'Units_Sold_j{h}' for h in horizons])
removed_rows = initial_rows - len(df_model_clean)

print(f"✓ Rows removed: {removed_rows:,} ({removed_rows/initial_rows*100:.2f}%)")
print(f"✓ Remaining rows: {len(df_model_clean):,}")

# Update train/test splits with clean data
train_df_clean = df_model_clean[df_model_clean['Date'] < split_date].copy()
test_df_clean = df_model_clean[df_model_clean['Date'] >= split_date].copy()

print(f"\n✓ Clean training set: {len(train_df_clean):,} rows")
print(f"✓ Clean test set: {len(test_df_clean):,} rows")

# ============================================================================
# 4. XGBOOST MODEL TRAINING (MULTI-HORIZON)
# ============================================================================
print("\n" + "=" * 80)
print("[4] XGBOOST MODEL TRAINING")
print("=" * 80)

# XGBoost hyperparameters
xgb_params = {
    'n_estimators': 200,
    'max_depth': 6,
    'learning_rate': 0.1,
    'min_child_weight': 3,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'gamma': 0.1,
    'reg_alpha': 0.1,
    'reg_lambda': 1.0,
    'random_state': 42,
    'n_jobs': -1,
    'verbosity': 0
}

print(f"\nXGBoost Hyperparameters:")
for key, value in xgb_params.items():
    print(f"  {key}: {value}")

# Dictionary to store models and predictions
models = {}
predictions_test = {}
predictions_train = {}

print("\n" + "-" * 80)

# Train separate model for each horizon
for h in horizons:
    print(f"\n[4.{horizons.index(h)+1}] Training XGBoost for horizon j+{h}...")

    target_col = f'Units_Sold_j{h}'

    # Prepare data for this horizon
    X_train_h = train_df_clean[all_model_features].values
    y_train_h = train_df_clean[target_col].values

    X_test_h = test_df_clean[all_model_features].values
    y_test_h = test_df_clean[target_col].values

    print(f"  Training samples: {len(X_train_h):,}")
    print(f"  Test samples: {len(X_test_h):,}")
    print(f"  Features: {X_train_h.shape[1]}")

    # Train model
    print(f"  Training in progress...")
    model = XGBRegressor(**xgb_params)
    model.fit(X_train_h, y_train_h, verbose=False)

    # Predict on train and test sets
    y_pred_train = model.predict(X_train_h)
    y_pred_train = np.maximum(y_pred_train, 0)  # Non-negative constraint

    y_pred_test = model.predict(X_test_h)
    y_pred_test = np.maximum(y_pred_test, 0)  # Non-negative constraint

    # Store results
    models[f'j{h}'] = model
    predictions_train[f'j{h}'] = y_pred_train
    predictions_test[f'j{h}'] = y_pred_test

    # Add predictions to dataframes
    train_df_clean[f'Pred_XGB_j{h}'] = y_pred_train
    test_df_clean[f'Pred_XGB_j{h}'] = y_pred_test

    # Calculate metrics
    metrics_test = calculate_metrics(y_test_h, y_pred_test, f'XGBoost j+{h}')

    print(f"  ✓ Model trained successfully")
    print(f"  Test Set Performance:")
    print(f"    RMSE: {metrics_test['RMSE']:.2f}")
    print(f"    MAE: {metrics_test['MAE']:.2f}")
    print(f"    MAPE: {metrics_test['MAPE']:.2f}%")
    print(f"    WAPE: {metrics_test['WAPE']:.2f}%")

print("\n" + "-" * 80)
print("✓ All horizon models trained successfully")

# ============================================================================
# 5. TIME-SERIES CROSS-VALIDATION
# ============================================================================
print("\n" + "=" * 80)
print("[5] TIME-SERIES CROSS-VALIDATION")
print("=" * 80)

print("\n[5.1] Walk-Forward Validation (Expanding Window)...")
print("Using TimeSeriesSplit with 5 folds")

n_splits = 5
tscv = TimeSeriesSplit(n_splits=n_splits)

cv_results = []

for h in horizons:
    print(f"\n  Validating horizon j+{h}...")

    target_col = f'Units_Sold_j{h}'
    X = train_df_clean[all_model_features].values
    y = train_df_clean[target_col].values

    fold_scores = {'RMSE': [], 'MAE': [], 'MAPE': [], 'WAPE': []}

    for fold, (train_idx, val_idx) in enumerate(tscv.split(X), 1):
        X_fold_train, X_fold_val = X[train_idx], X[val_idx]
        y_fold_train, y_fold_val = y[train_idx], y[val_idx]

        # Train model on fold
        model_cv = XGBRegressor(**xgb_params)
        model_cv.fit(X_fold_train, y_fold_train, verbose=False)

        # Predict on validation fold
        y_fold_pred = model_cv.predict(X_fold_val)
        y_fold_pred = np.maximum(y_fold_pred, 0)

        # Calculate metrics
        metrics = calculate_metrics(y_fold_val, y_fold_pred, f'Fold {fold}')

        if metrics:
            fold_scores['RMSE'].append(metrics['RMSE'])
            fold_scores['MAE'].append(metrics['MAE'])
            fold_scores['MAPE'].append(metrics['MAPE'])
            fold_scores['WAPE'].append(metrics['WAPE'])

    # Calculate average and std across folds
    if fold_scores['RMSE']:
        avg_metrics = {
            'Horizon': f'j+{h}',
            'RMSE_mean': np.mean(fold_scores['RMSE']),
            'RMSE_std': np.std(fold_scores['RMSE']),
            'MAE_mean': np.mean(fold_scores['MAE']),
            'MAPE_mean': np.mean(fold_scores['MAPE']),
            'WAPE_mean': np.mean(fold_scores['WAPE']),
        }
        cv_results.append(avg_metrics)

        print(f"    Cross-Validation Results:")
        print(f"      RMSE: {avg_metrics['RMSE_mean']:.2f} ± {avg_metrics['RMSE_std']:.2f}")
        print(f"      MAE:  {avg_metrics['MAE_mean']:.2f}")
        print(f"      WAPE: {avg_metrics['WAPE_mean']:.2f}%")

cv_results_df = pd.DataFrame(cv_results)

print("\n✓ Cross-Validation Summary:")
print(cv_results_df.to_string(index=False))

# ============================================================================
# 6. COMPREHENSIVE TECHNICAL METRICS
# ============================================================================
print("\n" + "=" * 80)
print("[6] TECHNICAL METRICS COMPARISON")
print("=" * 80)

def calculate_smape(y_true, y_pred):
    """Calculate Symmetric Mean Absolute Percentage Error"""
    mask = ~(pd.isna(y_true) | pd.isna(y_pred))
    y_true_clean = y_true[mask]
    y_pred_clean = y_pred[mask]

    if len(y_true_clean) == 0:
        return np.nan

    denominator = (np.abs(y_true_clean) + np.abs(y_pred_clean)) / 2
    smape = np.mean(np.abs(y_true_clean - y_pred_clean) / (denominator + 1e-10)) * 100
    return smape

# Compare all models for j+1 horizon
print("\n[6.1] Model Comparison for j+1 Horizon...")

comparison_results = []

# Get test data
test_eval = test_df_clean.copy()
target_j1 = test_eval['Units_Sold_j1']

# Baseline: Naïve (j-1)
naive_pred_j1 = test_eval['Baseline_Naive']
if not naive_pred_j1.isna().all():
    metrics = calculate_metrics(target_j1, naive_pred_j1, 'Naïve (j-1)')
    if metrics:
        metrics['SMAPE'] = calculate_smape(target_j1.values, naive_pred_j1.values)
        comparison_results.append(metrics)

# Baseline: MA7
ma7_pred_j1 = test_eval['Baseline_MA7']
if not ma7_pred_j1.isna().all():
    metrics = calculate_metrics(target_j1, ma7_pred_j1, 'MA (7d)')
    if metrics:
        metrics['SMAPE'] = calculate_smape(target_j1.values, ma7_pred_j1.values)
        comparison_results.append(metrics)

# Baseline: Naïve Seasonality (j-7)
season_pred_j1 = test_eval['Baseline_Naive_Season']
if not season_pred_j1.isna().all():
    metrics = calculate_metrics(target_j1, season_pred_j1, 'Naïve Season (j-7)')
    if metrics:
        metrics['SMAPE'] = calculate_smape(target_j1.values, season_pred_j1.values)
        comparison_results.append(metrics)

# Baseline: Dataset
dataset_pred_j1 = test_eval['Baseline_Dataset']
if not dataset_pred_j1.isna().all():
    metrics = calculate_metrics(target_j1, dataset_pred_j1, 'Dataset Forecast')
    if metrics:
        metrics['SMAPE'] = calculate_smape(target_j1.values, dataset_pred_j1.values)
        comparison_results.append(metrics)

# XGBoost
xgb_pred_j1 = test_eval['Pred_XGB_j1']
if not xgb_pred_j1.isna().all():
    metrics = calculate_metrics(target_j1, xgb_pred_j1, 'XGBoost')
    if metrics:
        metrics['SMAPE'] = calculate_smape(target_j1.values, xgb_pred_j1.values)
        comparison_results.append(metrics)

comparison_j1_df = pd.DataFrame(comparison_results)

print("\nj+1 Horizon - All Models Comparison:")
print(comparison_j1_df[['Model', 'RMSE', 'MAE', 'MAPE', 'SMAPE', 'WAPE']].to_string(index=False))

# Calculate improvement
if len(comparison_j1_df) > 1:
    baseline_rmse = comparison_j1_df[comparison_j1_df['Model'] != 'XGBoost']['RMSE'].min()
    xgb_rmse = comparison_j1_df[comparison_j1_df['Model'] == 'XGBoost']['RMSE'].values[0]
    improvement_pct = ((baseline_rmse - xgb_rmse) / baseline_rmse) * 100

    print(f"\n🎯 XGBoost Performance:")
    print(f"  Best Baseline RMSE: {baseline_rmse:.2f}")
    print(f"  XGBoost RMSE: {xgb_rmse:.2f}")
    print(f"  Improvement: {improvement_pct:+.2f}%")

# ============================================================================
# 7. BUSINESS METRICS - SUPPLY CHAIN KPIs
# ============================================================================
print("\n" + "=" * 80)
print("[7] BUSINESS METRICS - SUPPLY CHAIN KPIs")
print("=" * 80)

def calculate_supply_chain_kpis(df_eval, forecast_col, scenario_name):
    """
    Calculate comprehensive supply chain KPIs

    Parameters:
    - df_eval: DataFrame with actual demand and inventory data
    - forecast_col: Column name with demand forecast (not used in calculation but for reference)
    - scenario_name: Name of the scenario for reporting

    Returns:
    - kpi_results: Dictionary with aggregated KPIs
    - sku_kpis: DataFrame with SKU-level KPIs
    """
    df_kpi = df_eval.copy()

    # --- a) Fill Rate ---
    df_kpi['served_demand'] = np.minimum(df_kpi['Units_Sold'], df_kpi['Inventory_Level'])
    df_kpi['total_demand'] = df_kpi['Units_Sold']

    # Overall fill rate
    total_served = df_kpi['served_demand'].sum()
    total_demand = df_kpi['total_demand'].sum()
    fill_rate = (total_served / (total_demand + 1e-10)) * 100

    # --- b) Stockout Rate ---
    df_kpi['days_with_stockout'] = (
        (df_kpi['Inventory_Level'] < df_kpi['Units_Sold']) |
        (df_kpi['Stockout_Flag'] == 1)
    ).astype(int)

    stockout_rate = (df_kpi['days_with_stockout'].sum() / len(df_kpi)) * 100

    # --- c) Holding Cost ---
    annual_storage_rate = 0.20  # 20% annual storage cost
    df_kpi['unit_storage_cost'] = df_kpi['Unit_Cost'] * (annual_storage_rate / 365)
    df_kpi['holding_cost_per_day'] = df_kpi['Inventory_Level'] * df_kpi['unit_storage_cost']
    total_holding_cost = df_kpi['holding_cost_per_day'].sum()

    # --- d) Stockout Cost ---
    penalty_factor = 1.5  # 150% of margin
    df_kpi['unit_margin'] = df_kpi['Unit_Price'] - df_kpi['Unit_Cost']
    df_kpi['unit_stockout_cost'] = df_kpi['unit_margin'] * penalty_factor
    df_kpi['unmet_demand'] = np.maximum(0, df_kpi['Units_Sold'] - df_kpi['Inventory_Level'])
    df_kpi['stockout_cost_per_day'] = df_kpi['unmet_demand'] * df_kpi['unit_stockout_cost']
    total_stockout_cost = df_kpi['stockout_cost_per_day'].sum()

    # --- e) Total Cost ---
    total_cost = total_holding_cost + total_stockout_cost

    # --- SKU-Level Aggregation ---
    sku_kpis = df_kpi.groupby('SKU_ID').agg({
        'served_demand': 'sum',
        'total_demand': 'sum',
        'days_with_stockout': 'sum',
        'holding_cost_per_day': 'sum',
        'stockout_cost_per_day': 'sum',
        'Units_Sold': 'sum'  # For demand weighting
    }).reset_index()

    # Calculate SKU-level rates
    sku_kpis['days_count'] = df_kpi.groupby('SKU_ID').size().values
    sku_kpis['fill_rate'] = (sku_kpis['served_demand'] / (sku_kpis['total_demand'] + 1e-10)) * 100
    sku_kpis['stockout_rate'] = (sku_kpis['days_with_stockout'] / sku_kpis['days_count']) * 100
    sku_kpis['total_cost'] = sku_kpis['holding_cost_per_day'] + sku_kpis['stockout_cost_per_day']

    # Weighted averages (by demand/Units_Sold)
    total_demand_weight = sku_kpis['Units_Sold'].sum()
    weighted_fill_rate = (sku_kpis['fill_rate'] * sku_kpis['Units_Sold']).sum() / (total_demand_weight + 1e-10)
    weighted_stockout_rate = (sku_kpis['stockout_rate'] * sku_kpis['Units_Sold']).sum() / (total_demand_weight + 1e-10)

    # Aggregate results
    kpi_results = {
        'Scenario': scenario_name,
        'Fill_Rate': fill_rate,
        'Weighted_Fill_Rate': weighted_fill_rate,
        'Stockout_Rate': stockout_rate,
        'Weighted_Stockout_Rate': weighted_stockout_rate,
        'Holding_Cost': total_holding_cost,
        'Stockout_Cost': total_stockout_cost,
        'Total_Cost': total_cost,
        'Total_Unmet_Demand': df_kpi['unmet_demand'].sum()
    }

    return kpi_results, sku_kpis

# Calculate KPIs for Baseline Scenario
print("\n[7.1] Calculating KPIs for Baseline Scenario...")
print("(Using current inventory levels and actual demand)")

baseline_kpis, baseline_sku_kpis = calculate_supply_chain_kpis(
    test_df_clean,
    forecast_col='Units_Sold',  # Actual demand as baseline
    scenario_name='Baseline (Current State)'
)

print(f"\n📊 Baseline Scenario Results:")
print(f"  Fill Rate: {baseline_kpis['Fill_Rate']:.2f}%")
print(f"  Weighted Fill Rate: {baseline_kpis['Weighted_Fill_Rate']:.2f}%")
print(f"  Stockout Rate: {baseline_kpis['Stockout_Rate']:.2f}%")
print(f"  Weighted Stockout Rate: {baseline_kpis['Weighted_Stockout_Rate']:.2f}%")
print(f"  Holding Cost: €{baseline_kpis['Holding_Cost']:,.2f}")
print(f"  Stockout Cost: €{baseline_kpis['Stockout_Cost']:,.2f}")
print(f"  Total Cost: €{baseline_kpis['Total_Cost']:,.2f}")
print(f"  Total Unmet Demand: {baseline_kpis['Total_Unmet_Demand']:,.0f} units")

# Calculate KPIs for XGBoost Scenario
print("\n[7.2] Simulating KPIs with XGBoost Forecast...")
print("(Note: This is a simplified simulation using j+1 forecast)")

# For simplicity, we use the actual inventory in test data
# In a full simulation (Block D), we would adjust inventory based on forecast
xgb_kpis, xgb_sku_kpis = calculate_supply_chain_kpis(
    test_df_clean,
    forecast_col='Pred_XGB_j1',
    scenario_name='XGBoost Forecast'
)

print(f"\n📊 XGBoost Scenario Results:")
print(f"  Fill Rate: {xgb_kpis['Fill_Rate']:.2f}%")
print(f"  Weighted Fill Rate: {xgb_kpis['Weighted_Fill_Rate']:.2f}%")
print(f"  Stockout Rate: {xgb_kpis['Stockout_Rate']:.2f}%")
print(f"  Weighted Stockout Rate: {xgb_kpis['Weighted_Stockout_Rate']:.2f}%")
print(f"  Holding Cost: €{xgb_kpis['Holding_Cost']:,.2f}")
print(f"  Stockout Cost: €{xgb_kpis['Stockout_Cost']:,.2f}")
print(f"  Total Cost: €{xgb_kpis['Total_Cost']:,.2f}")
print(f"  Total Unmet Demand: {xgb_kpis['Total_Unmet_Demand']:,.0f} units")

# ============================================================================
# 8. BUSINESS IMPACT SUMMARY
# ============================================================================
print("\n" + "=" * 80)
print("[8] BUSINESS IMPACT SUMMARY")
print("=" * 80)

# Calculate improvements
cost_savings = baseline_kpis['Total_Cost'] - xgb_kpis['Total_Cost']
cost_savings_pct = (cost_savings / baseline_kpis['Total_Cost']) * 100

holding_cost_change = baseline_kpis['Holding_Cost'] - xgb_kpis['Holding_Cost']
stockout_cost_change = baseline_kpis['Stockout_Cost'] - xgb_kpis['Stockout_Cost']

fill_rate_change = xgb_kpis['Weighted_Fill_Rate'] - baseline_kpis['Weighted_Fill_Rate']
stockout_rate_change = baseline_kpis['Weighted_Stockout_Rate'] - xgb_kpis['Weighted_Stockout_Rate']

unmet_demand_reduction = baseline_kpis['Total_Unmet_Demand'] - xgb_kpis['Total_Unmet_Demand']

print("\n🎯 IMPACT OF XGBOOST FORECASTING vs BASELINE:")
print("\n💰 Cost Impact:")
print(f"  Total Cost Savings: €{cost_savings:,.2f} ({cost_savings_pct:+.2f}%)")
print(f"    Holding Cost Change: €{holding_cost_change:,.2f}")
print(f"    Stockout Cost Change: €{stockout_cost_change:,.2f}")

print("\n📈 Service Level Impact:")
print(f"  Fill Rate Change: {fill_rate_change:+.2f} percentage points")
print(f"  Stockout Rate Reduction: {stockout_rate_change:+.2f} percentage points")

print("\n📦 Operational Impact:")
print(f"  Unmet Demand Reduction: {unmet_demand_reduction:,.0f} units")

# Create comparison dataframe
impact_comparison = pd.DataFrame([baseline_kpis, xgb_kpis])
impact_comparison = impact_comparison[[
    'Scenario', 'Fill_Rate', 'Weighted_Fill_Rate', 'Stockout_Rate',
    'Weighted_Stockout_Rate', 'Holding_Cost', 'Stockout_Cost', 'Total_Cost'
]]

print("\n📊 Side-by-Side Comparison:")
print(impact_comparison.to_string(index=False))

# ============================================================================
# 9. FEATURE IMPORTANCE ANALYSIS
# ============================================================================
print("\n" + "=" * 80)
print("[9] FEATURE IMPORTANCE ANALYSIS")
print("=" * 80)

# Extract feature importance from j+1 model
model_j1 = models['j1']
feature_importance = pd.DataFrame({
    'Feature': all_model_features,
    'Importance': model_j1.feature_importances_
}).sort_values('Importance', ascending=False)

print("\n🔍 Top 20 Most Important Features (j+1 model):")
print(feature_importance.head(20).to_string(index=False))

# Group importance by feature type
feature_groups = {
    'Time': ['week_day', 'month', 'week_number', 'day_trend'],
    'Lag': ['lag_sell_j1', 'lag_sell_j7', 'lag_sell_j14'],
    'Moving_Avg': ['ma_7j', 'ma_28j'],
    'Volatility': ['volatility_j7', 'volatility_j14'],
    'Inventory': ['days_of_stock', 'Inventory_Level', 'Reorder_Point'],
    'Promotion': ['promotion_of_the_day', 'j1_promotion', 'promotion_density'],
    'Lead_Time': ['average_lead_time', 'leadtime_variability', 'Supplier_Lead_Time_Days'],
    'Classification': ['ABC_Class_encoded', 'XYZ_Class_encoded', 'CV']
}

print("\n📊 Feature Importance by Group:")
for group_name, features in feature_groups.items():
    group_features = [f for f in features if f in feature_importance['Feature'].values]
    if group_features:
        group_importance = feature_importance[feature_importance['Feature'].isin(group_features)]['Importance'].sum()
        print(f"  {group_name}: {group_importance:.4f}")

# ============================================================================
# 10. SUMMARY AND OUTPUTS
# ============================================================================
print("\n" + "=" * 80)
print("BLOCK C COMPLETE ✓")
print("=" * 80)

print("\n📦 Key Outputs Created:")
print("  ✓ models: Dictionary of XGBoost models {j1, j7, j14}")
print("  ✓ predictions_test: Test predictions for each horizon")
print("  ✓ test_df_clean: Test data with predictions and targets")
print("  ✓ comparison_j1_df: Technical metrics comparison (all models)")
print("  ✓ cv_results_df: Cross-validation results")
print("  ✓ baseline_kpis: Business KPIs for baseline scenario")
print("  ✓ xgb_kpis: Business KPIs for XGBoost scenario")
print("  ✓ baseline_sku_kpis: SKU-level KPIs (baseline)")
print("  ✓ xgb_sku_kpis: SKU-level KPIs (XGBoost)")
print("  ✓ feature_importance: Feature ranking")
print("  ✓ impact_comparison: Business impact summary")

print("\n🎯 Final Performance Summary:")
if 'improvement_pct' in locals():
    print(f"  Technical: {improvement_pct:+.2f}% RMSE improvement over best baseline")
print(f"  Business: €{cost_savings:,.2f} total cost savings ({cost_savings_pct:+.2f}%)")
print(f"  Service: {fill_rate_change:+.2f}pp fill rate improvement")
print(f"  Operations: {unmet_demand_reduction:,.0f} units less unmet demand")

print("\n✅ Ready for Block D: Inventory Simulation & Monte Carlo")

"""
DEMAND FORECASTING & INVENTORY SIMULATION - SUPPLY CHAIN
Block D: Inventory Simulation & Monte Carlo
"""

# ============================================================================
# IMPORTS (Additional for Block D)
# ============================================================================
from collections import deque
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

print("\n" + "=" * 80)
print("BLOCK D: INVENTORY SIMULATION & MONTE CARLO")
print("=" * 80)

# ============================================================================
# 1. SAFETY STOCK AND REORDER POINT CALCULATION
# ============================================================================
print("\n[1] CALCULATING SAFETY STOCK AND REORDER POINTS...")

# Service level by ABC class (z-score for normal distribution)
service_level_z = {
    'A': 1.28,  # 90% service level
    'B': 1.06,  # 85% service level
    'C': 0.84   # 80% service level
}

print("\nService Level Targets by ABC Class:")
for abc_class, z_value in service_level_z.items():
    service_pct = {1.28: 90, 1.06: 85, 0.84: 80}[z_value]
    print(f"  Class {abc_class}: {service_pct}% (z = {z_value})")

# Calculate safety stock and ROP per SKU
print("\n[1.1] Computing Safety Stock per SKU...")

# Get SKU-level parameters
sku_params = df.groupby('SKU_ID').agg({
    'Units_Sold': ['mean', 'std'],
    'Supplier_Lead_Time_Days': 'mean',
    'Order_Quantity': 'mean',
    'Reorder_Point': 'mean',
    'Unit_Cost': 'mean',
    'Unit_Price': 'mean',
    'ABC_Class': 'first',
    'XYZ_Class': 'first',
    'Supplier_ID': 'first'
}).reset_index()

# Flatten column names
sku_params.columns = ['SKU_ID', 'avg_demand', 'std_demand', 'avg_lead_time',
                      'avg_order_qty', 'dataset_rop', 'unit_cost', 'unit_price',
                      'ABC_Class', 'XYZ_Class', 'Supplier_ID']

# Map z-score based on ABC class
sku_params['z_score'] = sku_params['ABC_Class'].map(service_level_z)

# Calculate safety stock: z × σ_demand × √lead_time
sku_params['safety_stock'] = (
    sku_params['z_score'] *
    sku_params['std_demand'] *
    np.sqrt(sku_params['avg_lead_time'])
)

# Round to integers
sku_params['safety_stock'] = sku_params['safety_stock'].fillna(0).round().astype(int)
sku_params['avg_order_qty'] = sku_params['avg_order_qty'].round().astype(int)

print(f"Safety stock calculated for {len(sku_params)} SKUs")

# ============================================================================
# 2. REORDER POINT CALCULATION
# ============================================================================
print("\n[2] CALCULATING REORDER POINTS...")

# Parameter: use dataset ROP or calculate
use_dataset_ROP = False

if use_dataset_ROP:
    print("Using dataset Reorder_Point values...")
    sku_params['reorder_point'] = sku_params['dataset_rop'].round().astype(int)
else:
    print("Calculating Reorder_Point: avg_demand × lead_time + safety_stock...")
    sku_params['reorder_point'] = (
        sku_params['avg_demand'] * sku_params['avg_lead_time'] +
        sku_params['safety_stock']
    ).round().astype(int)

print(f"Reorder points calculated")

# ============================================================================
# 3. REORDER QUANTITY LOGIC
# ============================================================================
print("\n[3] SETTING REORDER QUANTITY LOGIC...")

# Parameter: use dataset Order_Quantity or calculate
use_dataset_order_qty = True

if use_dataset_order_qty:
    print("Using dataset Order_Quantity values...")
    sku_params['reorder_quantity'] = sku_params['avg_order_qty']
else:
    print("Calculating reorder_quantity: 0.5 × mean(Order_Quantity)...")
    sku_params['reorder_quantity'] = (sku_params['avg_order_qty'] * 0.5).round().astype(int)

print(f"Reorder quantities set")

# ============================================================================
# 4. PREPARE SIMULATION DATA
# ============================================================================
print("\n[4] PREPARING SIMULATION DATA...")

# Get initial inventory levels (first day of test period)
initial_inventory = df[df['Date'] == split_date].groupby('SKU_ID')['Inventory_Level'].first().reset_index()
initial_inventory.columns = ['SKU_ID', 'initial_inventory']

sku_params = sku_params.merge(initial_inventory, on='SKU_ID', how='left')
sku_params['initial_inventory'] = sku_params['initial_inventory'].fillna(
    sku_params['reorder_point'] * 2
).round().astype(int)

print(f"Initial inventory set for {len(sku_params)} SKUs")

# Get supplier lead time distributions
print("\n[4.1] Building Supplier Lead Time Distributions...")
supplier_leadtime_dist = {}
for supplier_id in df['Supplier_ID'].unique():
    lead_times = df[df['Supplier_ID'] == supplier_id]['Supplier_Lead_Time_Days'].dropna().values
    if len(lead_times) > 0:
        supplier_leadtime_dist[supplier_id] = lead_times
    else:
        supplier_leadtime_dist[supplier_id] = np.array([7])

print(f"Lead time distributions created for {len(supplier_leadtime_dist)} suppliers")

# Get unique SKUs
unique_skus = sku_params['SKU_ID'].unique()

# ============================================================================
# 5. INVENTORY SIMULATION FUNCTION
# ============================================================================
print("\n[5] DEFINING INVENTORY SIMULATION FUNCTION...")

def simulate_inventory(sku_id, demand_forecast, sku_info, supplier_lead_dist,
                       use_monte_carlo=False, noise_std=None):
    """
    Simulate inventory for a single SKU over the forecast horizon
    """

    # Initialize parameters
    on_hand = sku_info['initial_inventory']
    reorder_point = sku_info['reorder_point']
    order_quantity = sku_info['reorder_quantity']

    # Track orders (FIFO queue)
    order_queue = deque()

    # Results tracking
    daily_inventory = []
    daily_demand_actual = []
    daily_served = []
    daily_unmet = []
    daily_stockout_flag = []
    orders_placed = []
    orders_received = []

    # Simulation loop
    for day_idx, forecast_demand in enumerate(demand_forecast):

        # 1. Receive orders arriving today
        received_today = 0
        while order_queue and order_queue[0][0] == day_idx:
            arrival_day, qty = order_queue.popleft()
            on_hand += qty
            received_today += qty

        orders_received.append(received_today)

        # 2. Determine actual demand
        if use_monte_carlo and noise_std is not None:
            noise = np.random.normal(0, noise_std)
            actual_demand = max(0, forecast_demand + noise)
        else:
            actual_demand = forecast_demand

        actual_demand = int(round(actual_demand))

        # 3. Fulfill demand
        served = min(on_hand, actual_demand)
        unmet = max(0, actual_demand - on_hand)
        stockout = 1 if unmet > 0 else 0

        # Update inventory
        on_hand = max(0, on_hand - served)

        # 4. Check if reorder needed
        order_placed = 0
        if on_hand <= reorder_point:
            if use_monte_carlo:
                lead_time = int(np.ceil(np.random.choice(supplier_lead_dist)))
            else:
                lead_time = int(round(sku_info['avg_lead_time']))

            arrival_day = day_idx + lead_time
            order_queue.append((arrival_day, order_quantity))
            order_placed = order_quantity

        # 5. Record results
        daily_inventory.append(on_hand)
        daily_demand_actual.append(actual_demand)
        daily_served.append(served)
        daily_unmet.append(unmet)
        daily_stockout_flag.append(stockout)
        orders_placed.append(order_placed)

    # Calculate KPIs
    total_demand = sum(daily_demand_actual)
    total_served = sum(daily_served)
    total_unmet = sum(daily_unmet)

    fill_rate = (total_served / (total_demand + 1e-10)) * 100
    stockout_rate = (sum(daily_stockout_flag) / len(daily_stockout_flag)) * 100

    # Costs
    annual_storage_rate = 0.20
    unit_storage_cost = sku_info['unit_cost'] * (annual_storage_rate / 365)
    holding_cost = sum(daily_inventory) * unit_storage_cost

    penalty_factor = 1.5
    unit_margin = sku_info['unit_price'] - sku_info['unit_cost']
    unit_stockout_cost = unit_margin * penalty_factor
    stockout_cost = total_unmet * unit_stockout_cost

    total_cost = holding_cost + stockout_cost

    results = {
        'SKU_ID': sku_id,
        'fill_rate': fill_rate,
        'stockout_rate': stockout_rate,
        'holding_cost': holding_cost,
        'stockout_cost': stockout_cost,
        'total_cost': total_cost,
        'total_demand': total_demand,
        'total_served': total_served,
        'total_unmet': total_unmet,
        'avg_inventory': np.mean(daily_inventory),
        'orders_placed_count': sum([1 for x in orders_placed if x > 0])
    }

    return results

print("Inventory simulation function defined")

# ============================================================================
# 6. XGBOOST FORECAST SIMULATION j+1 (DETERMINISTIC)
# ============================================================================
print("\n" + "=" * 80)
print("[6] XGBOOST j+1 FORECAST SIMULATION (Deterministic)")
print("=" * 80)

print("\nRunning XGBoost j+1 forecast simulation...")

xgb_sim_results = []

for sku_id in tqdm(unique_skus, desc="XGBoost j+1 Sim"):
    sku_data = test_df_clean[test_df_clean['SKU_ID'] == sku_id].sort_values('Date')

    if len(sku_data) == 0:
        continue

    sku_info_row = sku_params[sku_params['SKU_ID'] == sku_id]
    if len(sku_info_row) == 0:
        continue
    sku_info_row = sku_info_row.iloc[0]

    supplier_id = sku_info_row['Supplier_ID']
    supplier_lead_dist = supplier_leadtime_dist.get(supplier_id, np.array([7]))

    sku_info = {
        'initial_inventory': sku_info_row['initial_inventory'],
        'reorder_point': sku_info_row['reorder_point'],
        'reorder_quantity': sku_info_row['reorder_quantity'],
        'avg_lead_time': sku_info_row['avg_lead_time'],
        'unit_cost': sku_info_row['unit_cost'],
        'unit_price': sku_info_row['unit_price']
    }

    demand_forecast = sku_data['Pred_XGB_j1'].fillna(0).values

    result = simulate_inventory(
        sku_id=sku_id,
        demand_forecast=demand_forecast,
        sku_info=sku_info,
        supplier_lead_dist=supplier_lead_dist,
        use_monte_carlo=False
    )

    result['ABC_Class'] = sku_info_row['ABC_Class']
    result['XYZ_Class'] = sku_info_row['XYZ_Class']
    xgb_sim_results.append(result)

xgb_sim_df = pd.DataFrame(xgb_sim_results)

print(f"\nXGBoost j+1 simulation complete: {len(xgb_sim_df)} SKUs")

# ============================================================================
# 7. XGBOOST FORECAST SIMULATION j+7 (DETERMINISTIC)
# ============================================================================
print("\n" + "=" * 80)
print("[7] XGBOOST j+7 FORECAST SIMULATION (Deterministic)")
print("=" * 80)

print("\nRunning XGBoost j+7 forecast simulation...")

xgb_j7_sim_results = []

for sku_id in tqdm(unique_skus, desc="XGBoost j+7 Sim"):
    sku_data = test_df_clean[test_df_clean['SKU_ID'] == sku_id].sort_values('Date')

    if len(sku_data) == 0:
        continue

    sku_info_row = sku_params[sku_params['SKU_ID'] == sku_id]
    if len(sku_info_row) == 0:
        continue
    sku_info_row = sku_info_row.iloc[0]

    supplier_id = sku_info_row['Supplier_ID']
    supplier_lead_dist = supplier_leadtime_dist.get(supplier_id, np.array([7]))

    sku_info = {
        'initial_inventory': sku_info_row['initial_inventory'],
        'reorder_point': sku_info_row['reorder_point'],
        'reorder_quantity': sku_info_row['reorder_quantity'],
        'avg_lead_time': sku_info_row['avg_lead_time'],
        'unit_cost': sku_info_row['unit_cost'],
        'unit_price': sku_info_row['unit_price']
    }

    # Use j+7 predictions
    demand_forecast = sku_data['Pred_XGB_j7'].fillna(0).values

    result = simulate_inventory(
        sku_id=sku_id,
        demand_forecast=demand_forecast,
        sku_info=sku_info,
        supplier_lead_dist=supplier_lead_dist,
        use_monte_carlo=False
    )

    result['ABC_Class'] = sku_info_row['ABC_Class']
    result['XYZ_Class'] = sku_info_row['XYZ_Class']
    xgb_j7_sim_results.append(result)

xgb_j7_sim_df = pd.DataFrame(xgb_j7_sim_results)

print(f"\nXGBoost j+7 simulation complete: {len(xgb_j7_sim_df)} SKUs")

# 7.5. XGBOOST FORECAST SIMULATION j+14 (DETERMINISTIC)
# ============================================================================
print("\n[7.5] XGBOOST j+14 FORECAST SIMULATION...")

xgb_j14_sim_results = []

for sku_id in tqdm(unique_skus, desc="XGBoost j+14 Sim"):
    sku_data = test_df_clean[test_df_clean['SKU_ID'] == sku_id].sort_values('Date')

    if len(sku_data) == 0:
        continue

    sku_info_row = sku_params[sku_params['SKU_ID'] == sku_id]
    if len(sku_info_row) == 0:
        continue
    sku_info_row = sku_info_row.iloc[0]

    supplier_id = sku_info_row['Supplier_ID']
    supplier_lead_dist = supplier_leadtime_dist.get(supplier_id, np.array([7]))

    sku_info = {
        'initial_inventory': sku_info_row['initial_inventory'],
        'reorder_point': sku_info_row['reorder_point'],
        'reorder_quantity': sku_info_row['reorder_quantity'],
        'avg_lead_time': sku_info_row['avg_lead_time'],
        'unit_cost': sku_info_row['unit_cost'],
        'unit_price': sku_info_row['unit_price']
    }

    demand_forecast = sku_data['Pred_XGB_j14'].fillna(0).values

    result = simulate_inventory(
        sku_id=sku_id,
        demand_forecast=demand_forecast,
        sku_info=sku_info,
        supplier_lead_dist=supplier_lead_dist,
        use_monte_carlo=False
    )

    result['ABC_Class'] = sku_info_row['ABC_Class']
    result['XYZ_Class'] = sku_info_row['XYZ_Class']
    xgb_j14_sim_results.append(result)

xgb_j14_sim_df = pd.DataFrame(xgb_j14_sim_results)

print(f"XGBoost j+14 simulation complete: {len(xgb_j14_sim_df)} SKUs")

# ============================================================================
# 8. MONTE CARLO SIMULATION SETUP
# ============================================================================
print("\n" + "=" * 80)
print("[8] MONTE CARLO SIMULATION")
print("=" * 80)

print("\n[8.1] Selecting SKUs for Monte Carlo...")

# Identify top SKUs by revenue
sku_revenue_for_mc = xgb_sim_df.merge(
    sku_params[['SKU_ID', 'unit_price']],
    on='SKU_ID',
    how='left'
)
sku_revenue_for_mc['revenue'] = sku_revenue_for_mc['total_demand'] * sku_revenue_for_mc['unit_price']
sku_revenue_for_mc = sku_revenue_for_mc.sort_values('revenue', ascending=False)

# Scaling parameters
n_top_skus = 100
n_mc_top = 1000
n_mc_other = 200

# Select top SKUs
top_skus = sku_revenue_for_mc.head(n_top_skus)['SKU_ID'].values
other_skus = sku_revenue_for_mc.iloc[n_top_skus:]['SKU_ID'].values

print(f"\nTop {len(top_skus)} SKUs selected for full Monte Carlo ({n_mc_top} runs)")
print(f"Remaining {len(other_skus)} SKUs will use reduced Monte Carlo ({n_mc_other} runs)")

# ============================================================================
# 9. MONTE CARLO SIMULATION EXECUTION
# ============================================================================
print("\n[9] RUNNING MONTE CARLO SIMULATION...")

def run_monte_carlo_sku(sku_id, n_runs, use_xgb=True):
    """Run Monte Carlo simulation for a single SKU"""

    sku_data = test_df_clean[test_df_clean['SKU_ID'] == sku_id].sort_values('Date')

    if len(sku_data) == 0:
        return None

    sku_info_row = sku_params[sku_params['SKU_ID'] == sku_id]
    if len(sku_info_row) == 0:
        return None
    sku_info_row = sku_info_row.iloc[0]

    supplier_id = sku_info_row['Supplier_ID']
    supplier_lead_dist = supplier_leadtime_dist.get(supplier_id, np.array([7]))

    sku_info = {
        'initial_inventory': sku_info_row['initial_inventory'],
        'reorder_point': sku_info_row['reorder_point'],
        'reorder_quantity': sku_info_row['reorder_quantity'],
        'avg_lead_time': sku_info_row['avg_lead_time'],
        'unit_cost': sku_info_row['unit_cost'],
        'unit_price': sku_info_row['unit_price']
    }

    if use_xgb:
        base_forecast = sku_data['Pred_XGB_j1'].fillna(0).values
    else:
        base_forecast = sku_data['Units_Sold'].values

    noise_std = sku_info_row['std_demand']

    mc_results = []
    for _ in range(n_runs):
        result = simulate_inventory(
            sku_id=sku_id,
            demand_forecast=base_forecast,
            sku_info=sku_info,
            supplier_lead_dist=supplier_lead_dist,
            use_monte_carlo=True,
            noise_std=noise_std
        )
        mc_results.append(result)

    mc_df = pd.DataFrame(mc_results)

    aggregated = {
        'SKU_ID': sku_id,
        'fill_rate_mean': mc_df['fill_rate'].mean(),
        'fill_rate_std': mc_df['fill_rate'].std(),
        'fill_rate_p5': mc_df['fill_rate'].quantile(0.05),
        'fill_rate_p95': mc_df['fill_rate'].quantile(0.95),
        'stockout_rate_mean': mc_df['stockout_rate'].mean(),
        'stockout_rate_std': mc_df['stockout_rate'].std(),
        'total_cost_mean': mc_df['total_cost'].mean(),
        'total_cost_std': mc_df['total_cost'].std(),
        'total_cost_p5': mc_df['total_cost'].quantile(0.05),
        'total_cost_p95': mc_df['total_cost'].quantile(0.95),
        'holding_cost_mean': mc_df['holding_cost'].mean(),
        'stockout_cost_mean': mc_df['stockout_cost'].mean(),
        'total_demand_mean': mc_df['total_demand'].mean(),
        'ABC_Class': sku_info_row['ABC_Class'],
        'XYZ_Class': sku_info_row['XYZ_Class']
    }

    return aggregated

# Run Monte Carlo for top SKUs
print(f"\n[9.1] Monte Carlo for Top {len(top_skus)} SKUs ({n_mc_top} runs each)...")

mc_top_results = []
for sku_id in tqdm(top_skus, desc="MC Top SKUs"):
    result = run_monte_carlo_sku(sku_id, n_runs=n_mc_top, use_xgb=True)
    if result:
        mc_top_results.append(result)

mc_top_df = pd.DataFrame(mc_top_results)
print(f"Completed: {len(mc_top_df)} top SKUs")

# Run Monte Carlo for other SKUs
print(f"\n[9.2] Monte Carlo for Other SKUs ({n_mc_other} runs each)...")

max_other_skus = 200
if len(other_skus) > max_other_skus:
    print(f"  Sampling {max_other_skus} out of {len(other_skus)} SKUs...")
    other_skus_sample = np.random.choice(other_skus, max_other_skus, replace=False)
else:
    other_skus_sample = other_skus

mc_other_results = []
for sku_id in tqdm(other_skus_sample, desc="MC Other SKUs"):
    result = run_monte_carlo_sku(sku_id, n_runs=n_mc_other, use_xgb=True)
    if result:
        mc_other_results.append(result)

mc_other_df = pd.DataFrame(mc_other_results)
print(f"Completed: {len(mc_other_df)} other SKUs")

# Combine Monte Carlo results
mc_combined_df = pd.concat([mc_top_df, mc_other_df], ignore_index=True)

print(f"\nTotal Monte Carlo results: {len(mc_combined_df)} SKUs")

# ============================================================================
# 10. BUSINESS IMPACT FOR j+7 HORIZON
# ============================================================================
print("\n" + "=" * 80)
print("[10] BUSINESS IMPACT ESTIMATION - HORIZON j+7")
print("=" * 80)

# Technical metrics for j+7
print("\n[10.1] Calculating Technical Metrics for j+7...")

forecast_models_j7 = {
    'Naïve (j-1)': 'Baseline_Naive',
    'Naïve Season (j-7)': 'Baseline_Naive_Season',
    'Moving Avg (7d)': 'Baseline_MA7',
    'Dataset Forecast': 'Baseline_Dataset',
    'XGBoost ML (j+7)': 'Pred_XGB_j7'
}

technical_metrics_j7_list = []

for model_name, forecast_col in forecast_models_j7.items():
    if forecast_col not in test_df_clean.columns:
        continue

    y_true = test_df_clean['Units_Sold_j7']
    y_pred = test_df_clean[forecast_col]

    metrics = calculate_metrics(y_true, y_pred, model_name)
    if metrics:
        metrics['SMAPE'] = calculate_smape(y_true.values, y_pred.values)
        technical_metrics_j7_list.append(metrics)

technical_metrics_j7_df = pd.DataFrame(technical_metrics_j7_list)

print("\nTechnical Metrics (j+7):")
print(technical_metrics_j7_df[['Model', 'RMSE', 'SMAPE']].to_string(index=False))

# Aggregate KPIs for j+7
print("\n[10.2] Aggregating KPIs for j+7...")

def aggregate_kpis_j7(sim_df, model_name):
    if len(sim_df) == 0:
        return None

    total_demand_all = sim_df['total_demand'].sum()
    if total_demand_all == 0:
        return None

    sim_df = sim_df.copy()
    sim_df['weight'] = sim_df['total_demand'] / total_demand_all

    weighted_fill_rate = (sim_df['fill_rate'] * sim_df['weight']).sum()
    weighted_stockout_rate = (sim_df['stockout_rate'] * sim_df['weight']).sum()

    return {
        'Model': model_name,
        'Fill_Rate_%': weighted_fill_rate,
        'Stockout_Rate_%': weighted_stockout_rate,
        'Holding_Cost_€': sim_df['holding_cost'].sum(),
        'Stockout_Cost_€': sim_df['stockout_cost'].sum(),
        'Total_Cost_€': sim_df['total_cost'].sum()
    }

# Create comparison table for j+7
business_kpis_j7_list = []

# Note: Baselines use same simulations as j+1
from copy import deepcopy
business_kpis_j7_list.append(aggregate_kpis_j7(xgb_sim_df, 'Naïve (j-1)'))
business_kpis_j7_list.append(aggregate_kpis_j7(xgb_sim_df, 'Naïve Season (j-7)'))
business_kpis_j7_list.append(aggregate_kpis_j7(xgb_sim_df, 'Moving Avg (7d)'))
business_kpis_j7_list.append(aggregate_kpis_j7(xgb_sim_df, 'Dataset Forecast'))
business_kpis_j7_list.append(aggregate_kpis_j7(xgb_j7_sim_df, 'XGBoost ML (j+7)'))

business_kpis_j7_list = [x for x in business_kpis_j7_list if x is not None]
business_kpis_j7_df = pd.DataFrame(business_kpis_j7_list)

# Create comparison table j+7
comparison_j7_table = technical_metrics_j7_df.merge(
    business_kpis_j7_df,
    on='Model',
    how='outer'
)

comparison_j7_columns = [
    'Model', 'RMSE', 'SMAPE', 'Fill_Rate_%', 'Stockout_Rate_%',
    'Holding_Cost_€', 'Stockout_Cost_€', 'Total_Cost_€', 'WAPE'
]

comparison_j7_table = comparison_j7_table[comparison_j7_columns]

print("\nComparison Table (j+7):")
print(comparison_j7_table.to_string(index=False))

# ============================================================================
# 11. SUMMARY
# ============================================================================
print("\n" + "=" * 80)
print("BLOCK D COMPLETE")
print("=" * 80)

print("\nKey Outputs Created:")
print("  xgb_sim_df - XGBoost j+1 simulation results")
print("  xgb_j7_sim_df - XGBoost j+7 simulation results")
print("  mc_combined_df - Monte Carlo simulation results")
print("  comparison_j7_table - j+7 comparison table")

print("\nReady for Block E: Business Impact & Export")

# ============================================================================
# IMPORTS (Additional for Block E)
# ============================================================================
from datetime import datetime

print("\n" + "=" * 80)
print("BLOCK E: BUSINESS IMPACT & EXPORT")
print("=" * 80)

# ============================================================================
# 1. TECHNICAL METRICS FOR ALL FORECAST MODELS (j+1)
# ============================================================================
print("\n[1] CALCULATING TECHNICAL METRICS FOR j+1 HORIZON...")

# Prepare test data for evaluation
test_eval = test_df_clean.copy()

# Define all forecast models to evaluate
forecast_models_j1 = {
    'Naïve (j-1)': 'Baseline_Naive',
    'Naïve Season (j-7)': 'Baseline_Naive_Season',
    'Moving Avg (7d)': 'Baseline_MA7',
    'Dataset Forecast': 'Baseline_Dataset',
    'XGBoost ML': 'Pred_XGB_j1'
}

# Calculate technical metrics for all models
technical_metrics_j1_list = []

for model_name, forecast_col in forecast_models_j1.items():
    if forecast_col not in test_eval.columns:
        print(f"  Warning: {forecast_col} not found, skipping {model_name}")
        continue

    y_true = test_eval['Units_Sold_j1']
    y_pred = test_eval[forecast_col]

    metrics = calculate_metrics(y_true, y_pred, model_name)

    if metrics:
        metrics['SMAPE'] = calculate_smape(y_true.values, y_pred.values)
        technical_metrics_j1_list.append(metrics)

technical_metrics_j1_df = pd.DataFrame(technical_metrics_j1_list)

print("\nTechnical Metrics (j+1 Horizon):")
print(technical_metrics_j1_df[['Model', 'MAE', 'RMSE', 'SMAPE', 'WAPE']].to_string(index=False))

# ============================================================================
# 2. RUN INVENTORY SIMULATIONS FOR ALL BASELINE MODELS (j+1)
# ============================================================================
print("\n" + "=" * 80)
print("[2] RUNNING INVENTORY SIMULATIONS FOR ALL MODELS (j+1)")
print("=" * 80)

# Run simulations for all baseline models
print("\n[2.1] Naïve (j-1) simulation...")

naive_sim_results = []

for sku_id in tqdm(unique_skus, desc="Naïve Sim"):
    sku_data = test_df_clean[test_df_clean['SKU_ID'] == sku_id].sort_values('Date')

    if len(sku_data) == 0:
        continue

    sku_info_row = sku_params[sku_params['SKU_ID'] == sku_id]
    if len(sku_info_row) == 0:
        continue
    sku_info_row = sku_info_row.iloc[0]

    supplier_id = sku_info_row['Supplier_ID']
    supplier_lead_dist = supplier_leadtime_dist.get(supplier_id, np.array([7]))

    sku_info = {
        'initial_inventory': sku_info_row['initial_inventory'],
        'reorder_point': sku_info_row['reorder_point'],
        'reorder_quantity': sku_info_row['reorder_quantity'],
        'avg_lead_time': sku_info_row['avg_lead_time'],
        'unit_cost': sku_info_row['unit_cost'],
        'unit_price': sku_info_row['unit_price']
    }

    demand_forecast = sku_data['Baseline_Naive'].fillna(0).values

    result = simulate_inventory(
        sku_id=sku_id,
        demand_forecast=demand_forecast,
        sku_info=sku_info,
        supplier_lead_dist=supplier_lead_dist,
        use_monte_carlo=False
    )

    result['ABC_Class'] = sku_info_row['ABC_Class']
    result['XYZ_Class'] = sku_info_row['XYZ_Class']
    naive_sim_results.append(result)

naive_sim_df = pd.DataFrame(naive_sim_results)
print(f"Completed: {len(naive_sim_df)} SKUs")

print("\n[2.2] Naïve Season (j-7) simulation...")

naive_season_sim_results = []

for sku_id in tqdm(unique_skus, desc="Naïve Season Sim"):
    sku_data = test_df_clean[test_df_clean['SKU_ID'] == sku_id].sort_values('Date')

    if len(sku_data) == 0:
        continue

    sku_info_row = sku_params[sku_params['SKU_ID'] == sku_id]
    if len(sku_info_row) == 0:
        continue
    sku_info_row = sku_info_row.iloc[0]

    supplier_id = sku_info_row['Supplier_ID']
    supplier_lead_dist = supplier_leadtime_dist.get(supplier_id, np.array([7]))

    sku_info = {
        'initial_inventory': sku_info_row['initial_inventory'],
        'reorder_point': sku_info_row['reorder_point'],
        'reorder_quantity': sku_info_row['reorder_quantity'],
        'avg_lead_time': sku_info_row['avg_lead_time'],
        'unit_cost': sku_info_row['unit_cost'],
        'unit_price': sku_info_row['unit_price']
    }

    demand_forecast = sku_data['Baseline_Naive_Season'].fillna(0).values

    result = simulate_inventory(
        sku_id=sku_id,
        demand_forecast=demand_forecast,
        sku_info=sku_info,
        supplier_lead_dist=supplier_lead_dist,
        use_monte_carlo=False
    )

    result['ABC_Class'] = sku_info_row['ABC_Class']
    result['XYZ_Class'] = sku_info_row['XYZ_Class']
    naive_season_sim_results.append(result)

naive_season_sim_df = pd.DataFrame(naive_season_sim_results)
print(f"Completed: {len(naive_season_sim_df)} SKUs")

print("\n[2.3] Moving Average simulation...")

ma_sim_results = []

for sku_id in tqdm(unique_skus, desc="MA Sim"):
    sku_data = test_df_clean[test_df_clean['SKU_ID'] == sku_id].sort_values('Date')

    if len(sku_data) == 0:
        continue

    sku_info_row = sku_params[sku_params['SKU_ID'] == sku_id]
    if len(sku_info_row) == 0:
        continue
    sku_info_row = sku_info_row.iloc[0]

    supplier_id = sku_info_row['Supplier_ID']
    supplier_lead_dist = supplier_leadtime_dist.get(supplier_id, np.array([7]))

    sku_info = {
        'initial_inventory': sku_info_row['initial_inventory'],
        'reorder_point': sku_info_row['reorder_point'],
        'reorder_quantity': sku_info_row['reorder_quantity'],
        'avg_lead_time': sku_info_row['avg_lead_time'],
        'unit_cost': sku_info_row['unit_cost'],
        'unit_price': sku_info_row['unit_price']
    }

    demand_forecast = sku_data['Baseline_MA7'].fillna(0).values

    result = simulate_inventory(
        sku_id=sku_id,
        demand_forecast=demand_forecast,
        sku_info=sku_info,
        supplier_lead_dist=supplier_lead_dist,
        use_monte_carlo=False
    )

    result['ABC_Class'] = sku_info_row['ABC_Class']
    result['XYZ_Class'] = sku_info_row['XYZ_Class']
    ma_sim_results.append(result)

ma_sim_df = pd.DataFrame(ma_sim_results)
print(f"Completed: {len(ma_sim_df)} SKUs")

print("\n[2.4] Dataset Forecast simulation...")

dataset_sim_results = []

for sku_id in tqdm(unique_skus, desc="Dataset Sim"):
    sku_data = test_df_clean[test_df_clean['SKU_ID'] == sku_id].sort_values('Date')

    if len(sku_data) == 0:
        continue

    sku_info_row = sku_params[sku_params['SKU_ID'] == sku_id]
    if len(sku_info_row) == 0:
        continue
    sku_info_row = sku_info_row.iloc[0]

    supplier_id = sku_info_row['Supplier_ID']
    supplier_lead_dist = supplier_leadtime_dist.get(supplier_id, np.array([7]))

    sku_info = {
        'initial_inventory': sku_info_row['initial_inventory'],
        'reorder_point': sku_info_row['reorder_point'],
        'reorder_quantity': sku_info_row['reorder_quantity'],
        'avg_lead_time': sku_info_row['avg_lead_time'],
        'unit_cost': sku_info_row['unit_cost'],
        'unit_price': sku_info_row['unit_price']
    }

    demand_forecast = sku_data['Baseline_Dataset'].fillna(0).values

    result = simulate_inventory(
        sku_id=sku_id,
        demand_forecast=demand_forecast,
        sku_info=sku_info,
        supplier_lead_dist=supplier_lead_dist,
        use_monte_carlo=False
    )

    result['ABC_Class'] = sku_info_row['ABC_Class']
    result['XYZ_Class'] = sku_info_row['XYZ_Class']
    dataset_sim_results.append(result)

dataset_sim_df = pd.DataFrame(dataset_sim_results)
print(f"Completed: {len(dataset_sim_df)} SKUs")

print("\nAll baseline simulations complete")

# ============================================================================
# 3. AGGREGATE BUSINESS KPIs WITH DEMAND WEIGHTING (j+1)
# ============================================================================
print("\n" + "=" * 80)
print("[3] AGGREGATING BUSINESS KPIs (j+1, DEMAND-WEIGHTED)")
print("=" * 80)

def aggregate_kpis_weighted(sim_df, model_name):
    """Aggregate KPIs with demand weighting"""
    if len(sim_df) == 0:
        return None

    total_demand_all = sim_df['total_demand'].sum()
    if total_demand_all == 0:
        return None

    sim_df = sim_df.copy()
    sim_df['weight'] = sim_df['total_demand'] / total_demand_all

    weighted_fill_rate = (sim_df['fill_rate'] * sim_df['weight']).sum()
    weighted_stockout_rate = (sim_df['stockout_rate'] * sim_df['weight']).sum()

    total_holding_cost = sim_df['holding_cost'].sum()
    total_stockout_cost = sim_df['stockout_cost'].sum()
    total_cost = sim_df['total_cost'].sum()

    return {
        'Model': model_name,
        'Fill_Rate_%': weighted_fill_rate,
        'Stockout_Rate_%': weighted_stockout_rate,
        'Holding_Cost_€': total_holding_cost,
        'Stockout_Cost_€': total_stockout_cost,
        'Total_Cost_€': total_cost
    }

business_kpis_j1_list = []

business_kpis_j1_list.append(aggregate_kpis_weighted(naive_sim_df, 'Naïve (j-1)'))
business_kpis_j1_list.append(aggregate_kpis_weighted(naive_season_sim_df, 'Naïve Season (j-7)'))
business_kpis_j1_list.append(aggregate_kpis_weighted(ma_sim_df, 'Moving Avg (7d)'))
business_kpis_j1_list.append(aggregate_kpis_weighted(dataset_sim_df, 'Dataset Forecast'))
business_kpis_j1_list.append(aggregate_kpis_weighted(xgb_sim_df, 'XGBoost ML'))

business_kpis_j1_list = [x for x in business_kpis_j1_list if x is not None]
business_kpis_j1_df = pd.DataFrame(business_kpis_j1_list)

print("\nBusiness KPIs (j+1, Demand-Weighted):")
print(business_kpis_j1_df.to_string(index=False))

# ============================================================================
# 4. CREATE COMPARISON TABLE (j+1)
# ============================================================================
print("\n[4] CREATING COMPARISON TABLE (j+1)...")

comparison_j1_table = technical_metrics_j1_df.merge(
    business_kpis_j1_df,
    on='Model',
    how='outer'
)

comparison_j1_columns = [
    'Model', 'RMSE', 'SMAPE', 'Fill_Rate_%', 'Stockout_Rate_%',
    'Holding_Cost_€', 'Stockout_Cost_€', 'Total_Cost_€', 'WAPE'
]

comparison_j1_table = comparison_j1_table[comparison_j1_columns]

print("\nComparison Table (j+1):")
print(comparison_j1_table.to_string(index=False))

# ============================================================================
# 5. MEASURE IMPROVEMENTS FROM ML MODEL (j+1)
# ============================================================================
print("\n[5] MEASURING ML IMPROVEMENTS (j+1)...")

ml_j1_results = comparison_j1_table[comparison_j1_table['Model'].str.contains('XGBoost|ML')]
if len(ml_j1_results) > 0:
    ml_j1_results = ml_j1_results.iloc[0]

    baseline_j1_results = comparison_j1_table[~comparison_j1_table['Model'].str.contains('XGBoost|ML')].copy()

    if len(baseline_j1_results) > 0:
        best_baseline_j1_idx = baseline_j1_results['Total_Cost_€'].idxmin()
        best_baseline_j1 = baseline_j1_results.loc[best_baseline_j1_idx]

        print(f"\nComparing ML vs Best Baseline ({best_baseline_j1['Model']})...")

        # Technical improvements
        rmse_improvement_j1 = ((best_baseline_j1['RMSE'] - ml_j1_results['RMSE']) / best_baseline_j1['RMSE']) * 100
        smape_improvement_j1 = ((best_baseline_j1['SMAPE'] - ml_j1_results['SMAPE']) / best_baseline_j1['SMAPE']) * 100

        # Business improvements
        fill_rate_improvement_j1 = ml_j1_results['Fill_Rate_%'] - best_baseline_j1['Fill_Rate_%']
        fill_rate_improvement_pct_j1 = (fill_rate_improvement_j1 / best_baseline_j1['Fill_Rate_%']) * 100

        stockout_reduction_j1 = best_baseline_j1['Stockout_Rate_%'] - ml_j1_results['Stockout_Rate_%']
        stockout_reduction_pct_j1 = (stockout_reduction_j1 / best_baseline_j1['Stockout_Rate_%']) * 100

        # Cost savings
        cost_savings_j1 = best_baseline_j1['Total_Cost_€'] - ml_j1_results['Total_Cost_€']
        cost_savings_pct_j1 = (cost_savings_j1 / best_baseline_j1['Total_Cost_€']) * 100

        print(f"\nTechnical: RMSE {rmse_improvement_j1:+.2f}%, SMAPE {smape_improvement_j1:+.2f}%")
        print(f"Service: Fill Rate {fill_rate_improvement_j1:+.2f}pp, Stockout {stockout_reduction_j1:+.2f}pp")
        print(f"Cost: €{cost_savings_j1:,.2f} ({cost_savings_pct_j1:+.2f}%)")

        improvement_summary_j1 = pd.DataFrame({
            'Metric': ['RMSE', 'SMAPE', 'Fill Rate', 'Stockout Rate', 'Total Cost'],
            'Baseline': [
                best_baseline_j1['RMSE'],
                best_baseline_j1['SMAPE'],
                best_baseline_j1['Fill_Rate_%'],
                best_baseline_j1['Stockout_Rate_%'],
                best_baseline_j1['Total_Cost_€']
            ],
            'ML_Model': [
                ml_j1_results['RMSE'],
                ml_j1_results['SMAPE'],
                ml_j1_results['Fill_Rate_%'],
                ml_j1_results['Stockout_Rate_%'],
                ml_j1_results['Total_Cost_€']
            ],
            'Improvement': [
                f"{rmse_improvement_j1:+.2f}%",
                f"{smape_improvement_j1:+.2f}%",
                f"{fill_rate_improvement_j1:+.2f}pp",
                f"{stockout_reduction_j1:+.2f}pp",
                f"€{cost_savings_j1:,.2f}"
            ]
        })

# ============================================================================
# 6. PREPARE EXPORT DATAFRAMES
# ============================================================================
print("\n" + "=" * 80)
print("[6] PREPARING DATA FOR EXPORT")
print("=" * 80)

# SKU Segmentation
print("\n[6.1] SKU Segmentation...")
sku_params['CV'] = sku_params['std_demand'] / (sku_params['avg_demand'] + 1e-10)
sku_params['ABC_XYZ_Class'] = sku_params['ABC_Class'].astype(str) + "_" + sku_params['XYZ_Class'].astype(str)

avg_inventory_by_sku = xgb_sim_df.groupby('SKU_ID')['avg_inventory'].first().reset_index()
sku_params = sku_params.merge(avg_inventory_by_sku, on='SKU_ID', how='left')

total_units_by_sku = df.groupby('SKU_ID')['Units_Sold'].sum().reset_index()
total_units_by_sku.columns = ['SKU_ID', 'total_units_sold']
sku_params = sku_params.merge(total_units_by_sku, on='SKU_ID', how='left')

sku_segmentation = sku_params[[
    'SKU_ID', 'ABC_Class', 'XYZ_Class', 'ABC_XYZ_Class',
    'avg_demand', 'std_demand', 'CV', 'avg_lead_time',
    'avg_inventory', 'total_units_sold'
]].copy()

print(f"SKU segmentation: {len(sku_segmentation)} SKUs")

# Forecast Results j+1
print("\n[6.2] Forecast Results j+1...")
forecast_j1_export = test_df_clean[[
    'Date', 'SKU_ID', 'Units_Sold',
    'Baseline_Naive', 'Baseline_Naive_Season', 'Baseline_MA7', 'Baseline_Dataset',
    'Pred_XGB_j1'
]].copy()

forecast_j1_export['Error_Naive'] = forecast_j1_export['Units_Sold'] - forecast_j1_export['Baseline_Naive']
forecast_j1_export['Error_XGB'] = forecast_j1_export['Units_Sold'] - forecast_j1_export['Pred_XGB_j1']

print(f"Forecast j+1: {len(forecast_j1_export)} rows")

# Simulation Results j+1
print("\n[6.3] Simulation Results j+1...")
simulation_j1_export_list = []

for model_name, sim_df in [
    ('Naïve (j-1)', naive_sim_df),
    ('Naïve Season (j-7)', naive_season_sim_df),
    ('Moving Avg (7d)', ma_sim_df),
    ('Dataset Forecast', dataset_sim_df),
    ('XGBoost ML', xgb_sim_df)
]:
    sim_copy = sim_df.copy()
    sim_copy['Model'] = model_name
    simulation_j1_export_list.append(sim_copy)

simulation_j1_export = pd.concat(simulation_j1_export_list, ignore_index=True)
simulation_j1_export = simulation_j1_export[[
    'Model', 'SKU_ID', 'ABC_Class', 'XYZ_Class',
    'fill_rate', 'stockout_rate', 'holding_cost', 'stockout_cost', 'total_cost',
    'total_demand', 'total_served', 'total_unmet'
]]

print(f"Simulation j+1: {len(simulation_j1_export)} rows")

# Forecast Results j+7
print("\n[6.4] Forecast Results j+7...")
forecast_j7_export = test_df_clean[[
    'Date', 'SKU_ID', 'Units_Sold',
    'Baseline_Naive', 'Baseline_Naive_Season', 'Baseline_MA7', 'Baseline_Dataset',
    'Pred_XGB_j7'
]].copy()

forecast_j7_export['Error_Naive'] = forecast_j7_export['Units_Sold'] - forecast_j7_export['Baseline_Naive']
forecast_j7_export['Error_XGB_j7'] = forecast_j7_export['Units_Sold'] - forecast_j7_export['Pred_XGB_j7']

print(f"Forecast j+7: {len(forecast_j7_export)} rows")

# Simulation Results j+7
print("\n[6.5] Simulation Results j+7...")
simulation_j7_export_list = []

for model_name, sim_df in [
    ('Naïve (j-1)', naive_sim_df),
    ('Naïve Season (j-7)', naive_season_sim_df),
    ('Moving Avg (7d)', ma_sim_df),
    ('Dataset Forecast', dataset_sim_df),
    ('XGBoost ML (j+7)', xgb_j7_sim_df)
]:
    sim_copy = sim_df.copy()
    sim_copy['Model'] = model_name
    simulation_j7_export_list.append(sim_copy)

simulation_j7_export = pd.concat(simulation_j7_export_list, ignore_index=True)
simulation_j7_export = simulation_j7_export[[
    'Model', 'SKU_ID', 'ABC_Class', 'XYZ_Class',
    'fill_rate', 'stockout_rate', 'holding_cost', 'stockout_cost', 'total_cost',
    'total_demand', 'total_served', 'total_unmet'
]]

print(f"Simulation j+7: {len(simulation_j7_export)} rows")
# ============================================================================
# 7. CREATE VALIDATION REPORT
# ============================================================================
print("\n[7] CREATING VALIDATION REPORT...")

validation_lines = []
validation_lines.append("=" * 80)
validation_lines.append("DEMAND FORECASTING & SUPPLY CHAIN - VALIDATION REPORT")
validation_lines.append("=" * 80)
validation_lines.append(f"\nGenerated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

validation_lines.append("\n" + "-" * 80)
validation_lines.append("1. DATA PREPROCESSING")
validation_lines.append("-" * 80)
validation_lines.append(f"Total rows: {len(df):,}")
validation_lines.append(f"Unique SKUs: {df['SKU_ID'].nunique():,}")
validation_lines.append(f"Date range: {df['Date'].min()} to {df['Date'].max()}")
validation_lines.append(f"Training: {train_df['Date'].min()} to {train_df['Date'].max()}")
validation_lines.append(f"Test: {test_df['Date'].min()} to {test_df['Date'].max()}")

validation_lines.append("\n" + "-" * 80)
validation_lines.append("2. MISSING VALUES")
validation_lines.append("-" * 80)
validation_lines.append("- Units_Sold: Filled with 0")
validation_lines.append("- Inventory_Level: Forward-filled per SKU")
validation_lines.append("- Supplier_Lead_Time_Days: Imputed with supplier median")
validation_lines.append("- Engineered features: Filled with 0")

validation_lines.append("\n" + "-" * 80)
validation_lines.append("3. MODEL CONFIG")
validation_lines.append("-" * 80)
validation_lines.append(f"Model: XGBoost")
validation_lines.append(f"Horizons: j+1, j+7, j+14")
validation_lines.append(f"CV: 5-fold Time Series")
validation_lines.append(f"Features: {len(all_model_features)}")

validation_lines.append("\n" + "-" * 80)
validation_lines.append("4. SIMULATION PARAMETERS")
validation_lines.append("-" * 80)
validation_lines.append(f"Annual storage rate: 20%")
validation_lines.append(f"Penalty factor: 1.5")
validation_lines.append(f"Service levels: A=90%, B=85%, C=80%")
validation_lines.append(f"Use dataset ROP: {use_dataset_ROP}")
validation_lines.append(f"Monte Carlo: {n_mc_top if 'n_mc_top' in locals() else 1000} (top), {n_mc_other if 'n_mc_other' in locals() else 200} (others)")
validation_lines.append(f"Random seed: 42")

validation_lines.append("\n" + "-" * 80)
validation_lines.append("5. KEY RESULTS")
validation_lines.append("-" * 80)
if 'best_baseline_j1' in locals():
    validation_lines.append(f"Horizon j+1:")
    validation_lines.append(f"  Best baseline: {best_baseline_j1['Model']}")
    validation_lines.append(f"  ML RMSE improvement: {rmse_improvement_j1:+.2f}%")
    validation_lines.append(f"  Cost savings: €{cost_savings_j1:,.2f}")

if 'best_baseline_j7' in locals() and 'rmse_improvement_j7' in locals():
    validation_lines.append(f"Horizon j+7:")
    validation_lines.append(f"  Best baseline: {best_baseline_j7['Model']}")
    validation_lines.append(f"  ML RMSE improvement: {rmse_improvement_j7:+.2f}%")
    validation_lines.append(f"  Cost savings: €{cost_savings_j7:,.2f}")

validation_lines.append("\n" + "=" * 80)
validation_lines.append("END OF REPORT")
validation_lines.append("=" * 80)

validation_text = "\n".join(validation_lines)

print("Validation report created")

# ============================================================================
# 8. EXPORT TO EXCEL
# ============================================================================
print("\n" + "=" * 80)
print("[8] EXPORTING TO EXCEL")
print("=" * 80)

output_filename = 'demand_forecasting_supply.xlsx'

print(f"\nCreating: {output_filename}...")

with pd.ExcelWriter(output_filename, engine='openpyxl') as writer:

    # j+1 Horizon Sheets
    forecast_j1_export.head(10000).to_excel(writer, sheet_name='Forecast_J1', index=False)
    print("  Sheet 'Forecast_J1'")

    simulation_j1_export.to_excel(writer, sheet_name='Simulation_J1', index=False)
    print("  Sheet 'Simulation_J1'")

    comparison_j1_table.to_excel(writer, sheet_name='KPI_J1', index=False)
    print("  Sheet 'KPI_J1'")

    # j+7 Horizon Sheets
    forecast_j7_export.head(10000).to_excel(writer, sheet_name='Forecast_J7', index=False)
    print("  Sheet 'Forecast_J7'")

    simulation_j7_export.to_excel(writer, sheet_name='Simulation_J7', index=False)
    print("  Sheet 'Simulation_J7'")

    if 'comparison_j7_table' in globals():
        comparison_j7_table.to_excel(writer, sheet_name='KPI_J7', index=False)
        print("  Sheet 'KPI_J7'")

    # SKU Segmentation
    sku_segmentation.to_excel(writer, sheet_name='SKU_Segmentation', index=False)
    print("  Sheet 'SKU_Segmentation'")

    # Monte Carlo (if available)
    if 'mc_combined_df' in globals() and len(mc_combined_df) > 0:
        mc_combined_df.to_excel(writer, sheet_name='Monte_Carlo', index=False)
        print("  Sheet 'Monte_Carlo'")

    # Metadata
    metadata_df = pd.DataFrame({
        'Parameter': [
            'annual_storage_rate',
            'penalty_factor',
            'service_level_A',
            'service_level_B',
            'service_level_C',
            'use_dataset_ROP',
            'use_dataset_order_qty',
            'random_seed',
            'cv_folds',
            'forecast_horizons',
            'n_mc_top',
            'n_mc_other'
        ],
        'Value': [
            0.20,
            1.5,
            '90% (z=1.28)',
            '85% (z=1.06)',
            '80% (z=0.84)',
            use_dataset_ROP,
            use_dataset_order_qty if 'use_dataset_order_qty' in locals() else True,
            42,
            5,
            'j+1, j+7, j+14',
            n_mc_top if 'n_mc_top' in locals() else 1000,
            n_mc_other if 'n_mc_other' in locals() else 200
        ]
    })
    metadata_df.to_excel(writer, sheet_name='Metadata', index=False)
    print("  Sheet 'Metadata'")

print(f"\nExcel file created: {output_filename}")

# ============================================================================
# 9. EXPORT ADDITIONAL FILES
# ============================================================================
print("\n[9] EXPORTING ADDITIONAL FILES...")

# Quick check CSV
quick_check_filename = 'quick_check.csv'
forecast_j1_export.head(100).to_csv(quick_check_filename, index=False)
print(f"Quick check: {quick_check_filename}")

# Validation report
validation_filename = 'validation_report.txt'
with open(validation_filename, 'w') as f:
    f.write(validation_text)
print(f"Validation report: {validation_filename}")

# Full forecast CSV
forecast_full_filename = 'forecast_results_full.csv'
forecast_j1_export.to_csv(forecast_full_filename, index=False)
print(f"Full forecast: {forecast_full_filename}")

# ============================================================================
# 10. FINAL SUMMARY
# ============================================================================
print("\n" + "=" * 80)
print("BLOCK E COMPLETE")
print("=" * 80)

print("\nFILES GENERATED:")
print(f"  1. {output_filename}")
print(f"  2. {quick_check_filename}")
print(f"  3. {validation_filename}")
print(f"  4. {forecast_full_filename}")

print("\nEXCEL STRUCTURE:")
print("  Forecast_J1 - Daily forecasts (horizon j+1)")
print("  Simulation_J1 - Inventory simulations (horizon j+1)")
print("  KPI_J1 - Comparison table (horizon j+1)")
print("  Forecast_J7 - Daily forecasts (horizon j+7)")
print("  Simulation_J7 - Inventory simulations (horizon j+7)")
print("  KPI_J7 - Comparison table (horizon j+7)")
print("  SKU_Segmentation - ABC/XYZ classification")
print("  Monte_Carlo - Stochastic simulation results")
print("  Metadata - Configuration parameters")

if 'best_baseline_j1' in locals():
    print("\n" + "=" * 80)
    print("BUSINESS IMPACT SUMMARY")
    print("=" * 80)

    print(f"\nHORIZON j+1 (Short-term):")
    print(f"  Best Baseline: {best_baseline_j1['Model']}")
    print(f"  RMSE: {rmse_improvement_j1:+.2f}%")
    print(f"  Fill Rate: {fill_rate_improvement_j1:+.2f}pp")
    print(f"  Cost Savings: €{cost_savings_j1:,.2f} ({cost_savings_pct_j1:+.2f}%)")

    if 'best_baseline_j7' in locals() and 'rmse_improvement_j7' in locals():
        print(f"\nHORIZON j+7 (Medium-term):")
        print(f"  Best Baseline: {best_baseline_j7['Model']}")
        print(f"  RMSE: {rmse_improvement_j7:+.2f}%")
        print(f"  Fill Rate: {fill_rate_improvement_j7:+.2f}pp")
        print(f"  Cost Savings: €{cost_savings_j7:,.2f} ({cost_savings_pct_j7:+.2f}%)")

print("\nANSWER: Does ML forecasting reduce costs and improve service?")
if 'cost_savings_j1' in locals() and cost_savings_j1 > 0:
    print(f"  YES at j+1: €{cost_savings_j1:,.0f} savings, {fill_rate_improvement_j1:.2f}pp fill rate improvement")
    if 'cost_savings_j7' in locals() and cost_savings_j7 > 0:
        print(f"  YES at j+7: €{cost_savings_j7:,.0f} savings, {fill_rate_improvement_j7:.2f}pp fill rate improvement")

print("\n" + "=" * 80)
print("PROJECT COMPLETE - ALL 5 BLOCKS EXECUTED")
print("=" * 80)

print("\nSUMMARY:")
print("  Block A - Data Loading & Preprocessing")
print("  Block B - Feature Engineering & Baselines")
print("  Block C - Forecast Models & Metrics")
print("  Block D - Inventory Simulation & Monte Carlo")
print("  Block E - Business Impact & Export")

print("\nThe complete demand forecasting and supply chain optimization system is ready")
print("All results exported and ready for dashboard integration")

# [8.1] Import Colab files module
from google.colab import files

# [8.2] Liste de tous les fichiers générés à télécharger
files_to_download = [
    'demand_forecasting_supply.xlsx',  # Excel principal avec forecast, simulation, KPI
    'quick_check.csv',                 # 100 premières lignes pour vérification rapide
    'validation_report.txt'            # Rapport des imputations et valeurs manquantes
]

# [8.3] Boucle pour télécharger chaque fichier
for f in files_to_download:
    try:
        files.download(f)
        print(f"✅ Téléchargement lancé pour : {f}")
    except Exception as e:
        print(f"⚠️ Erreur lors du téléchargement de {f} : {e}")

BLOCK A: DATA LOADING & PREPROCESSING

[1] Loading Data...


Saving supply_chain_dataset11.csv to supply_chain_dataset11.csv
✓ File uploaded: supply_chain_dataset11.csv
✓ Data loaded: 91,250 rows × 15 columns

Columns: ['Date', 'SKU_ID', 'Warehouse_ID', 'Supplier_ID', 'Region', 'Units_Sold', 'Inventory_Level', 'Supplier_Lead_Time_Days', 'Reorder_Point', 'Order_Quantity', 'Unit_Cost', 'Unit_Price', 'Promotion_Flag', 'Stockout_Flag', 'Demand_Forecast']

[2] Parsing Dates...
✓ Date range: 2024-01-01 00:00:00 to 2024-12-30 00:00:00
✓ Total days in dataset: 365

[3] Creating Continuous Calendar...
✓ Unique SKUs: 50
✓ Date range: 365 days
✓ Complete calendar created: 18,250 SKU-Date combinations
✓ Data after merge: 91,250 rows (0 new rows added)

[4] Handling Missing Values - Units_Sold...
Missing Units_Sold: 0 (0.00%)
✓ Units_Sold: Missing values filled with 0

[5] Handling Missing Values - Inventory_Level...
Missing Inventory_Level: 0 (0.00%)
✓ Inventory_Level: Forward-filled within SKU groups
  Remaining NA (flagged for review): 0

[6] Handling Mis

XGBoost j+1 Sim: 100%|██████████| 50/50 [00:00<00:00, 119.64it/s]



XGBoost j+1 simulation complete: 50 SKUs

[7] XGBOOST j+7 FORECAST SIMULATION (Deterministic)

Running XGBoost j+7 forecast simulation...


XGBoost j+7 Sim: 100%|██████████| 50/50 [00:00<00:00, 113.51it/s]



XGBoost j+7 simulation complete: 50 SKUs

[7.5] XGBOOST j+14 FORECAST SIMULATION...


XGBoost j+14 Sim: 100%|██████████| 50/50 [00:00<00:00, 108.34it/s]


XGBoost j+14 simulation complete: 50 SKUs

[8] MONTE CARLO SIMULATION

[8.1] Selecting SKUs for Monte Carlo...

Top 50 SKUs selected for full Monte Carlo (1000 runs)
Remaining 0 SKUs will use reduced Monte Carlo (200 runs)

[9] RUNNING MONTE CARLO SIMULATION...

[9.1] Monte Carlo for Top 50 SKUs (1000 runs each)...


MC Top SKUs: 100%|██████████| 50/50 [06:54<00:00,  8.28s/it]


Completed: 50 top SKUs

[9.2] Monte Carlo for Other SKUs (200 runs each)...


MC Other SKUs: 0it [00:00, ?it/s]


Completed: 0 other SKUs

Total Monte Carlo results: 50 SKUs

[10] BUSINESS IMPACT ESTIMATION - HORIZON j+7

[10.1] Calculating Technical Metrics for j+7...

Technical Metrics (j+7):
             Model     RMSE     SMAPE
       Naïve (j-1) 7.484758 46.580694
Naïve Season (j-7) 7.518501 47.076439
   Moving Avg (7d) 5.656137 34.467402
  Dataset Forecast 7.982031 50.142293
  XGBoost ML (j+7) 6.716289 41.027527

[10.2] Aggregating KPIs for j+7...

Comparison Table (j+7):
             Model     RMSE     SMAPE  Fill_Rate_%  Stockout_Rate_%  Holding_Cost_€  Stockout_Cost_€  Total_Cost_€      WAPE
  Dataset Forecast 7.982031 50.142293        100.0              0.0    21986.623062              0.0  21986.623062 43.848054
   Moving Avg (7d) 5.656137 34.467402        100.0              0.0    21986.623062              0.0  21986.623062 30.941810
       Naïve (j-1) 7.484758 46.580694        100.0              0.0    21986.623062              0.0  21986.623062 41.029607
Naïve Season (j-7) 7.518501 4

Naïve Sim: 100%|██████████| 50/50 [00:00<00:00, 188.30it/s]


Completed: 50 SKUs

[2.2] Naïve Season (j-7) simulation...


Naïve Season Sim: 100%|██████████| 50/50 [00:00<00:00, 188.83it/s]


Completed: 50 SKUs

[2.3] Moving Average simulation...


MA Sim: 100%|██████████| 50/50 [00:00<00:00, 179.26it/s]


Completed: 50 SKUs

[2.4] Dataset Forecast simulation...


Dataset Sim: 100%|██████████| 50/50 [00:00<00:00, 192.05it/s]


Completed: 50 SKUs

All baseline simulations complete

[3] AGGREGATING BUSINESS KPIs (j+1, DEMAND-WEIGHTED)

Business KPIs (j+1, Demand-Weighted):
             Model  Fill_Rate_%  Stockout_Rate_%  Holding_Cost_€  Stockout_Cost_€  Total_Cost_€
       Naïve (j-1)    99.995608         0.028495    18310.712201           74.943  18385.655201
Naïve Season (j-7)    99.991113         0.052641    18580.966117          150.348  18731.314117
   Moving Avg (7d)   100.000000         0.000000    18442.821935            0.000  18442.821935
  Dataset Forecast    99.967395         0.102272    18365.352420          825.531  19190.883420
        XGBoost ML   100.000000         0.000000    21986.623062            0.000  21986.623062

[4] CREATING COMPARISON TABLE (j+1)...

Comparison Table (j+1):
             Model     RMSE     SMAPE  Fill_Rate_%  Stockout_Rate_%  Holding_Cost_€  Stockout_Cost_€  Total_Cost_€      WAPE
  Dataset Forecast 8.019020 50.781919    99.967395         0.102272    18365.352420    

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Téléchargement lancé pour : demand_forecasting_supply.xlsx


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Téléchargement lancé pour : quick_check.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Téléchargement lancé pour : validation_report.txt
