In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import timedelta
import warnings
warnings.filterwarnings('ignore')

print("="*80)
print("EMERGENCY DIAGNOSTIC: WHY DID TEST SCORE BLOW UP?")
print("="*80)

# ============================================================================
# LOAD DATA
# ============================================================================
print("\n[1] LOADING DATA")
receivals = pd.read_csv('./Project_materials/data/kernel/receivals.csv')
prediction_mapping = pd.read_csv('./Project_materials/data/prediction_mapping.csv')
submission = pd.read_csv('submission_hybrid.csv')
train_df = pd.read_csv('training_data_hybrid.csv')

# Convert dates
receivals['date_arrival'] = pd.to_datetime(receivals['date_arrival'], utc=True).dt.tz_localize(None)
receivals = receivals[receivals['net_weight'] > 0]
receivals = receivals[receivals['rm_id'].notna()]

print(f"Receivals: {len(receivals)}")
print(f"Prediction mapping: {len(prediction_mapping)}")
print(f"Submission: {len(submission)}")
print(f"Training data: {len(train_df)}")

# ============================================================================
# CHECK 1: PREDICTION MAPPING FORMAT
# ============================================================================
print("\n" + "="*80)
print("CHECK 1: PREDICTION MAPPING STRUCTURE")
print("="*80)

print("\nPrediction mapping columns:")
print(prediction_mapping.columns.tolist())

print("\nFirst 10 rows:")
print(prediction_mapping.head(10))

print("\nUnique RM_IDs in prediction mapping:")
print(f"Count: {prediction_mapping['rm_id'].nunique()}")
print(f"RM_IDs: {sorted(prediction_mapping['rm_id'].unique())}")

# ============================================================================
# CHECK 2: HORIZON DISTRIBUTION
# ============================================================================
print("\n" + "="*80)
print("CHECK 2: HORIZON DISTRIBUTION (CRITICAL!)")
print("="*80)

# Calculate horizons from prediction_mapping
forecast_start = pd.to_datetime('2025-01-01')
prediction_mapping['forecast_end'] = pd.to_datetime(prediction_mapping['forecast_end_date'])
prediction_mapping['horizon_calculated'] = (prediction_mapping['forecast_end'] - forecast_start).dt.days

print("\nHorizon distribution in test set:")
print(prediction_mapping['horizon_calculated'].value_counts().sort_index())

print("\nHorizon statistics:")
print(prediction_mapping['horizon_calculated'].describe())

print("\nCompare to TRAINING horizons: [7, 30, 60, 90, 150]")
train_horizons = train_df['forecast_horizon'].unique()
print(f"Training horizons: {sorted(train_horizons)}")

print("\n⚠️  CRITICAL CHECK:")
max_test_horizon = prediction_mapping['horizon_calculated'].max()
max_train_horizon = train_df['forecast_horizon'].max()
print(f"Max test horizon: {max_test_horizon}")
print(f"Max train horizon: {max_train_horizon}")

if max_test_horizon > max_train_horizon:
    print("🚨 EXTRAPOLATION ALERT: Test horizons exceed training horizons!")
    print("   Model is extrapolating beyond what it was trained on!")

# ============================================================================
# CHECK 3: RM_ID OVERLAP
# ============================================================================
print("\n" + "="*80)
print("CHECK 3: RM_ID OVERLAP BETWEEN TRAIN AND TEST")
print("="*80)

train_rm_ids = set(train_df['rm_id'].unique())
test_rm_ids = set(prediction_mapping['rm_id'].unique())

overlap = train_rm_ids & test_rm_ids
only_test = test_rm_ids - train_rm_ids
only_train = train_rm_ids - test_rm_ids

print(f"\nRM_IDs in training: {len(train_rm_ids)}")
print(f"RM_IDs in test: {len(test_rm_ids)}")
print(f"RM_IDs in BOTH: {len(overlap)}")
print(f"RM_IDs ONLY in test: {len(only_test)}")
print(f"RM_IDs ONLY in train: {len(only_train)}")

if len(only_test) > 0:
    print(f"\n⚠️  Test has {len(only_test)} RM_IDs NOT seen in training!")
    print(f"These are: {sorted(only_test)[:10]}...")

# ============================================================================
# CHECK 4: FEATURE DISTRIBUTION COMPARISON
# ============================================================================
print("\n" + "="*80)
print("CHECK 4: FEATURE DISTRIBUTION (TRAIN VS TEST)")
print("="*80)

# Load test features (need to regenerate with diagnostics)
print("\nRegenerating test features for comparison...")

# Quick feature generation for test set
forecast_start = pd.to_datetime('2025-01-01')
test_features = []

for idx, row in prediction_mapping.head(100).iterrows():  # Sample first 100
    rm_id = row['rm_id']
    hist = receivals[
        (receivals['rm_id'] == rm_id) &
        (receivals['date_arrival'] < forecast_start)
    ]
    
    cutoff_365 = forecast_start - timedelta(days=365)
    recent_365 = hist[hist['date_arrival'] >= cutoff_365]
    
    if len(recent_365) > 0:
        days_since = (forecast_start - recent_365['date_arrival'].max()).days
        count_365 = len(recent_365)
        total_365 = recent_365['net_weight'].sum()
    else:
        days_since = 999
        count_365 = 0
        total_365 = 0
    
    test_features.append({
        'days_since_last': days_since,
        'count_365d': count_365,
        'total_weight_365d': total_365
    })

test_feat_df = pd.DataFrame(test_features)

print("\nKey feature comparison (sample):")
print("\nTRAIN (validation set):")
val_data = train_df[train_df['train_date'] >= '2024-09-01']
print(f"  days_since_last: mean={val_data['days_since_last'].mean():.1f}, median={val_data['days_since_last'].median():.1f}")
print(f"  count_365d: mean={val_data['count_365d'].mean():.1f}, median={val_data['count_365d'].median():.1f}")
print(f"  total_weight_365d: mean={val_data['total_weight_365d'].mean():.1f}, median={val_data['total_weight_365d'].median():.1f}")

print("\nTEST:")
print(f"  days_since_last: mean={test_feat_df['days_since_last'].mean():.1f}, median={test_feat_df['days_since_last'].median():.1f}")
print(f"  count_365d: mean={test_feat_df['count_365d'].mean():.1f}, median={test_feat_df['count_365d'].median():.1f}")
print(f"  total_weight_365d: mean={test_feat_df['total_weight_365d'].mean():.1f}, median={test_feat_df['total_weight_365d'].median():.1f}")

# ============================================================================
# CHECK 5: SUBMISSION SANITY CHECKS
# ============================================================================
print("\n" + "="*80)
print("CHECK 5: SUBMISSION SANITY CHECKS")
print("="*80)

print("\nSubmission statistics:")
print(submission['predicted_weight'].describe())

print("\nSubmission vs validation predictions:")
print(f"Val mean: {val_data['target'].mean():,.0f} kg")
print(f"Submission mean: {submission['predicted_weight'].mean():,.0f} kg")
print(f"Ratio: {submission['predicted_weight'].mean() / val_data['target'].mean():.2f}x")

print("\nPrediction distribution:")
print(f"  Zeros: {(submission['predicted_weight'] == 0).sum()} ({(submission['predicted_weight'] == 0).mean()*100:.1f}%)")
print(f"  < 1000: {(submission['predicted_weight'] < 1000).sum()}")
print(f"  1000-10000: {((submission['predicted_weight'] >= 1000) & (submission['predicted_weight'] < 10000)).sum()}")
print(f"  10000-100000: {((submission['predicted_weight'] >= 10000) & (submission['predicted_weight'] < 100000)).sum()}")
print(f"  > 100000: {(submission['predicted_weight'] >= 100000).sum()}")

# ============================================================================
# CHECK 6: SAMPLE PREDICTIONS
# ============================================================================
print("\n" + "="*80)
print("CHECK 6: SAMPLE PREDICTIONS INSPECTION")
print("="*80)

# Merge submission with prediction_mapping
merged = prediction_mapping.merge(submission, on='ID')
merged['horizon'] = (pd.to_datetime(merged['forecast_end_date']) - forecast_start).dt.days

print("\nSample predictions (first 20):")
print(merged[['ID', 'rm_id', 'horizon', 'predicted_weight']].head(20))

print("\nPredictions by horizon (test set):")
horizon_stats = merged.groupby('horizon')['predicted_weight'].agg(['count', 'mean', 'median', 'min', 'max'])
print(horizon_stats)

print("\nCompare to TRAINING by horizon:")
train_horizon_stats = train_df.groupby('forecast_horizon')['target'].agg(['count', 'mean', 'median', 'min', 'max'])
print(train_horizon_stats)

# ============================================================================
# HYPOTHESIS GENERATION
# ============================================================================
print("\n" + "="*80)
print("LIKELY CAUSES OF SCORE EXPLOSION")
print("="*80)

issues = []

if max_test_horizon > max_train_horizon:
    issues.append(f"1. EXTRAPOLATION: Test horizons ({max_test_horizon}) > Training horizons ({max_train_horizon})")

if len(only_test) > 0:
    issues.append(f"2. NEW RM_IDs: {len(only_test)} RM_IDs in test but not in training")

days_since_ratio = test_feat_df['days_since_last'].mean() / val_data['days_since_last'].mean()
if days_since_ratio > 2:
    issues.append(f"3. STALE DATA: Test RM_IDs much more inactive (days_since ratio: {days_since_ratio:.1f}x)")

if (submission['predicted_weight'] == 0).mean() < 0.30:
    issues.append(f"4. NOT ENOUGH ZEROS: Only {(submission['predicted_weight'] == 0).mean()*100:.1f}% zeros (expected >30%)")

pred_ratio = submission['predicted_weight'].mean() / val_data['target'].mean()
if pred_ratio > 1.5:
    issues.append(f"5. OVERESTIMATING: Test predictions {pred_ratio:.1f}x higher than validation targets")

if len(issues) > 0:
    print("\n🚨 ISSUES FOUND:")
    for issue in issues:
        print(f"  {issue}")
else:
    print("\n✓ No obvious issues found - deeper investigation needed")

print("\n" + "="*80)
print("DIAGNOSTIC COMPLETE - REVIEW RESULTS ABOVE")
print("="*80)

EMERGENCY DIAGNOSTIC: WHY DID TEST SCORE BLOW UP?

[1] LOADING DATA
Receivals: 122383
Prediction mapping: 30450
Submission: 30450
Training data: 2725

CHECK 1: PREDICTION MAPPING STRUCTURE

Prediction mapping columns:
['ID', 'rm_id', 'forecast_start_date', 'forecast_end_date']

First 10 rows:
   ID  rm_id forecast_start_date forecast_end_date
0   1    365          2025-01-01        2025-01-02
1   2    365          2025-01-01        2025-01-03
2   3    365          2025-01-01        2025-01-04
3   4    365          2025-01-01        2025-01-05
4   5    365          2025-01-01        2025-01-06
5   6    365          2025-01-01        2025-01-07
6   7    365          2025-01-01        2025-01-08
7   8    365          2025-01-01        2025-01-09
8   9    365          2025-01-01        2025-01-10
9  10    365          2025-01-01        2025-01-11

Unique RM_IDs in prediction mapping:
Count: 203
RM_IDs: [342, 343, 345, 346, 347, 348, 353, 354, 355, 357, 358, 360, 362, 364, 365, 366, 367, 36