In [1]:
"""
CRITICAL EDA: WHY DOES EVERY EXPERIMENT FAIL?
Stop guessing. Analyze the actual data to find the truth.
"""

import numpy as np
import pandas as pd
from datetime import timedelta

receivals = pd.read_csv('./Project_materials/data/kernel/receivals.csv')
prediction_mapping = pd.read_csv('./Project_materials/data/prediction_mapping.csv')

receivals['date_arrival'] = pd.to_datetime(receivals['date_arrival'], utc=True).dt.tz_localize(None)
prediction_mapping['forecast_start_date'] = pd.to_datetime(prediction_mapping['forecast_start_date'])
prediction_mapping['forecast_end_date'] = pd.to_datetime(prediction_mapping['forecast_end_date'])

receivals = receivals[receivals['net_weight'] > 0]
receivals = receivals[receivals['rm_id'].notna()]

print("="*80)
print("CRITICAL ANALYSIS: WHY EVERY EXPERIMENT FAILS")
print("="*80)

# ============================================================================
# 1. WHAT ACTUALLY HAPPENS IN TEST PERIOD?
# ============================================================================
print("\n[1] WHAT WILL ACTUALLY HAPPEN IN JAN-MAY 2025?")
print("-"*80)

# We need to predict for Jan-May 2025
# Let's look at what ACTUALLY happened in Jan-May for previous years

test_rms = prediction_mapping['rm_id'].unique()
active_2024_rms = receivals[receivals['date_arrival'].dt.year == 2024]['rm_id'].unique()

print(f"Test RMs: {len(test_rms)}")
print(f"Active in 2024: {len(active_2024_rms)}")
print(f"Inactive in 2024: {len(test_rms) - len(active_2024_rms)}")

# For active_2024 RMs, what did they deliver in Jan-May historically?
print("\n" + "="*80)
print("JAN-MAY ACTUAL DELIVERIES (Active 2024 RMs only)")
print("="*80)

active_data = receivals[receivals['rm_id'].isin(active_2024_rms)]
jan_may_data = active_data[active_data['date_arrival'].dt.month.isin([1,2,3,4,5])]

jan_may_by_year = jan_may_data.groupby(jan_may_data['date_arrival'].dt.year).agg({
    'net_weight': 'sum',
    'rm_id': 'nunique'
})
jan_may_by_year.columns = ['total_weight', 'num_rms']
print(jan_may_by_year)

print(f"\nAverage Jan-May delivery (2012-2024): {jan_may_by_year['total_weight'].mean():,.0f} kg")
print(f"2024 Jan-May: {jan_may_by_year.loc[2024, 'total_weight']:,.0f} kg")

# ============================================================================
# 2. WHAT DOES STEP 5 ACTUALLY PREDICT FOR JAN-MAY 2025?
# ============================================================================
print("\n[2] WHAT DOES STEP 5 PREDICT?")
print("-"*80)

# Step 5 submission had score 6236
# Let's estimate what it predicted

# Read the actual Step 5 predictions if we have them
import os
if os.path.exists('lightgbm_step5_no_calibration.csv'):
    step5_preds = pd.read_csv('lightgbm_step5_no_calibration.csv')
    
    # Merge with prediction_mapping to get horizon info
    step5_with_info = step5_preds.merge(prediction_mapping, on='ID')
    step5_with_info['horizon'] = (step5_with_info['forecast_end_date'] - 
                                   step5_with_info['forecast_start_date']).dt.days + 1
    
    # Group by rm_id to see total predicted per RM
    step5_by_rm = step5_with_info.groupby('rm_id').agg({
        'predicted_weight': 'sum'
    }).reset_index()
    
    print("\nStep 5 predictions by RM group:")
    
    # Active RMs
    active_preds = step5_by_rm[step5_by_rm['rm_id'].isin(active_2024_rms)]
    print(f"\nActive 2024 RMs ({len(active_preds)} RMs):")
    print(f"  Total predicted for all horizons: {active_preds['predicted_weight'].sum():,.0f} kg")
    print(f"  Average per RM: {active_preds['predicted_weight'].mean():,.0f} kg")
    
    # For Jan-May 2025 specifically (horizon ~151 days)
    step5_jan_may = step5_with_info[
        (step5_with_info['horizon'] == 151) & 
        (step5_with_info['rm_id'].isin(active_2024_rms))
    ]
    print(f"\n  Jan-May 2025 prediction (151d horizon): {step5_jan_may['predicted_weight'].sum():,.0f} kg")
    
    # Inactive RMs
    inactive_preds = step5_by_rm[~step5_by_rm['rm_id'].isin(active_2024_rms)]
    print(f"\nInactive RMs ({len(inactive_preds)} RMs):")
    print(f"  Total predicted: {inactive_preds['predicted_weight'].sum():,.0f} kg")
    print(f"  (Should be ~0 due to guardrails)")
    
    # Overall
    print(f"\nOverall Step 5 predictions:")
    print(f"  Mean prediction: {step5_preds['predicted_weight'].mean():,.0f} kg")
    print(f"  Predictions > 0: {(step5_preds['predicted_weight'] > 0).sum()}")
    print(f"  Total weight predicted: {step5_preds['predicted_weight'].sum():,.0f} kg")

else:
    print("Step 5 predictions file not found")

# ============================================================================
# 3. VALIDATION PERIOD ANALYSIS
# ============================================================================
print("\n" + "="*80)
print("[3] VALIDATION PERIOD ANALYSIS")
print("="*80)

# Compare different validation periods
validation_periods = [
    ('Jul-Aug 2024', '2024-07-01', '2024-08-31'),
    ('Aug-Sep 2024', '2024-08-01', '2024-09-30'),
    ('Aug-Nov 2024', '2024-08-01', '2024-11-30'),
    ('Sep-Nov 2024', '2024-09-01', '2024-11-30'),
    ('Oct-Nov 2024', '2024-10-01', '2024-11-30'),
    ('Nov 2024', '2024-11-01', '2024-11-30'),
]

print("\nDeliveries by validation period (Active 2024 RMs):")
print("-"*80)

for period_name, start, end in validation_periods:
    period_data = active_data[
        (active_data['date_arrival'] >= start) & 
        (active_data['date_arrival'] <= end)
    ]
    
    total_weight = period_data['net_weight'].sum()
    num_deliveries = len(period_data)
    num_days = (pd.to_datetime(end) - pd.to_datetime(start)).days + 1
    
    print(f"\n{period_name} ({num_days} days):")
    print(f"  Total weight: {total_weight:,.0f} kg")
    print(f"  Deliveries: {num_deliveries}")
    print(f"  Daily rate: {total_weight/num_days:,.0f} kg/day")
    
    # Compare to Jan-May 2024
    jan_may_2024_weight = jan_may_by_year.loc[2024, 'total_weight']
    jan_may_2024_days = 151  # Jan 1 to May 31
    jan_may_daily = jan_may_2024_weight / jan_may_2024_days
    
    ratio = (total_weight / num_days) / jan_may_daily
    print(f"  Ratio to Jan-May 2024 daily rate: {ratio:.3f}x")

# ============================================================================
# 4. MONTH-BY-MONTH BREAKDOWN
# ============================================================================
print("\n" + "="*80)
print("[4] MONTH-BY-MONTH PATTERN (Active 2024 RMs)")
print("="*80)

monthly_2024 = active_data[active_data['date_arrival'].dt.year == 2024].groupby(
    active_data[active_data['date_arrival'].dt.year == 2024]['date_arrival'].dt.month
).agg({
    'net_weight': 'sum'
})

print("\n2024 Monthly deliveries:")
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
for month in range(1, 13):
    if month in monthly_2024.index:
        weight = monthly_2024.loc[month, 'net_weight']
        print(f"  {month_names[month-1]}: {weight:,.0f} kg")

# ============================================================================
# 5. THE CRITICAL QUESTION: WHAT'S THE GROUND TRUTH?
# ============================================================================
print("\n" + "="*80)
print("[5] THE CRITICAL QUESTION")
print("="*80)

print("\nWe're trying to predict Jan-May 2025.")
print("But we don't know what will actually happen in Jan-May 2025.")
print("\nOur best proxy is Jan-May 2024:")
print(f"  Jan-May 2024 actual: {jan_may_by_year.loc[2024, 'total_weight']:,.0f} kg")

if os.path.exists('lightgbm_step5_no_calibration.csv'):
    print(f"\nStep 5 predicted for Jan-May 2025: {step5_jan_may['predicted_weight'].sum():,.0f} kg")
    
    # Compare
    diff = step5_jan_may['predicted_weight'].sum() - jan_may_by_year.loc[2024, 'total_weight']
    pct_diff = (diff / jan_may_by_year.loc[2024, 'total_weight']) * 100
    
    print(f"Difference: {diff:,.0f} kg ({pct_diff:+.1f}%)")
    
    if pct_diff > 0:
        print(f"\n⚠️  Step 5 OVER-predicts by {abs(pct_diff):.1f}%")
    else:
        print(f"\n⚠️  Step 5 UNDER-predicts by {abs(pct_diff):.1f}%")

# ============================================================================
# 6. WHY EXPERIMENTS FAIL
# ============================================================================
print("\n" + "="*80)
print("[6] WHY EVERY EXPERIMENT FAILS")
print("="*80)

print("\nHypothesis 1: You're already close to optimal")
print("  - Step 5 score: 6,236")
print("  - Every change makes it worse")
print("  - Maybe 6,236 is near the ceiling for this approach")

print("\nHypothesis 2: Test set has different patterns than 2024")
print("  - Jan-May 2025 might not follow Jan-May 2024 patterns")
print("  - Validation on any 2024 period won't help")
print("  - You need to predict 2025 behavior, not 2024 behavior")

print("\nHypothesis 3: The quantile 0.2 loss is tricky")
print("  - Under-predictions cost 0.2x")
print("  - Over-predictions cost 0.8x")
print("  - You want to predict ~80th percentile, not mean")
print("  - Alpha=0.10 might be too conservative")

print("\nHypothesis 4: Inactive RMs might not all be zero")
print("  - 143 inactive RMs, 21,450 predictions")
print("  - If even a few reactivate, you miss huge points")
print("  - Forcing all to 0 might be too aggressive")

# ============================================================================
# 7. WHAT TO TRY NEXT
# ============================================================================
print("\n" + "="*80)
print("[7] DATA-DRIVEN RECOMMENDATIONS")
print("="*80)

print("\n1. ACCEPT THAT STEP 5 IS NEAR-OPTIMAL")
print("   - Score: 6,236")
print("   - Every tweak makes it worse")
print("   - Stop trying to 'optimize' based on validation")
print("   - Focus on small, safe improvements")

print("\n2. CHECK IF ANY INACTIVE RMs SHOULD BE NON-ZERO")
print("   - Look at purchase orders for 2025")
print("   - See if any inactive RMs have planned deliveries")
print("   - Add small predictions for those only")

print("\n3. TRY ALPHA=0.15 or 0.20 (HIGHER, NOT LOWER)")
print("   - Quantile 0.2 loss wants ~80th percentile")
print("   - Alpha=0.10 might be too conservative")
print("   - Try higher alpha (MORE aggressive)")

print("\n4. ENSEMBLE STEP 5 WITH SIMPLE BASELINE")
print("   - Baseline: Jan-May 2024 actual * (horizon/151)")
print("   - Blend: 0.5 * Step5 + 0.5 * Baseline")
print("   - Hedge your bets")

print("\n" + "="*80)

CRITICAL ANALYSIS: WHY EVERY EXPERIMENT FAILS

[1] WHAT WILL ACTUALLY HAPPEN IN JAN-MAY 2025?
--------------------------------------------------------------------------------
Test RMs: 203
Active in 2024: 60
Inactive in 2024: 143

JAN-MAY ACTUAL DELIVERIES (Active 2024 RMs only)
              total_weight  num_rms
date_arrival                       
2012            18986515.0       15
2013            18139187.0       16
2014            23033698.0       17
2015            21527267.0       17
2016            22518814.0       16
2017            21989492.0       13
2018            22652949.0       15
2019            22950723.0       15
2020            19613386.0       17
2021            32420129.0       22
2022            36181591.0       27
2023            33655564.0       35
2024            37542429.0       46

Average Jan-May delivery (2012-2024): 25,477,826 kg
2024 Jan-May: 37,542,429 kg

[2] WHAT DOES STEP 5 PREDICT?
--------------------------------------------------------------------