In [5]:
# ==============================================================================
# DEBUG & QUICK TEST NOTEBOOK
# ==============================================================================
# Purpose: Validate data pipeline and test model training before full experiments
# 
# This notebook will:
# 1. Validate data preparation and availability
# 2. Test library compatibility and model training
# 3. Run quick model trials (5 each for linear and tree-based)
# 4. Identify potential issues before full modeling
# 5. Provide quick MAPE estimates
# ==============================================================================

import warnings
warnings.filterwarnings('ignore')

# Essential imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import pickle
import json
import gc
import time
from datetime import datetime

# ML imports with error handling
try:
    import lightgbm as lgb
    LGB_AVAILABLE = True
except ImportError:
    LGB_AVAILABLE = False
    print("⚠️ LightGBM not available")

try:
    import xgboost as xgb
    XGB_AVAILABLE = True
except ImportError:
    XGB_AVAILABLE = False
    print("⚠️ XGBoost not available")

try:
    import catboost as cb
    CB_AVAILABLE = True
except ImportError:
    CB_AVAILABLE = False
    print("⚠️ CatBoost not available")

# Sklearn imports
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, r2_score
from sklearn.model_selection import KFold

print("🔧 DEBUG & QUICK TEST NOTEBOOK")
print("="*60)
print(f"⏰ Started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"📚 Library Status:")
print(f"   LightGBM: {'✅' if LGB_AVAILABLE else '❌'}")
print(f"   XGBoost: {'✅' if XGB_AVAILABLE else '❌'}")
print(f"   CatBoost: {'✅' if CB_AVAILABLE else '❌'}")

# ==============================================================================
# 1. DATA AVAILABILITY VALIDATION
# ==============================================================================

print("\n" + "="*50)
print("1️⃣ DATA AVAILABILITY VALIDATION")
print("="*50)

# Set up paths
BASE_DIR = Path('../data')
DATA_DIR = BASE_DIR
ENGINEERED_DIR = DATA_DIR / "feature_engineered"
RESULTS_DIR = Path('../results')
MODELS_DIR = Path('../models')

# Create missing directories
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
MODELS_DIR.mkdir(parents=True, exist_ok=True)

# Check directory structure
print("📁 DIRECTORY STRUCTURE CHECK:")
required_dirs = [BASE_DIR, DATA_DIR, ENGINEERED_DIR]
for dir_path in required_dirs:
    status = "✅" if dir_path.exists() else "❌"
    print(f"   {status} {dir_path}")

if not all(d.exists() for d in required_dirs):
    print("❌ ERROR: Missing required directories")
    print("Please run the preprocessing pipeline first")
    exit(1)

# Check required files
print("\n📄 REQUIRED FILES CHECK:")
required_files = [
    ("X_train_eng.npy", "Training features"),
    ("X_val_eng.npy", "Validation features"), 
    ("X_test_eng.npy", "Test features"),
    ("y_train_eng.npy", "Training targets"),
    ("y_val_eng.npy", "Validation targets"),
    ("feature_metadata.json", "Feature metadata"),
    ("preprocessing_objects.pkl", "Preprocessing objects")
]

files_ok = True
for filename, description in required_files:
    file_path = ENGINEERED_DIR / filename
    if file_path.exists():
        size_mb = file_path.stat().st_size / (1024 * 1024)
        print(f"   ✅ {filename:<25} ({description}, {size_mb:.1f}MB)")
    else:
        print(f"   ❌ {filename:<25} (MISSING - {description})")
        files_ok = False

if not files_ok:
    print("❌ ERROR: Missing required files")
    print("Please run the preprocessing pipeline first")
    exit(1)

print("✅ All required files found")

# ==============================================================================
# 2. DATA LOADING AND VALIDATION
# ==============================================================================

print("\n" + "="*50)
print("2️⃣ DATA LOADING AND VALIDATION")
print("="*50)

try:
    # Load arrays
    print("📊 Loading data arrays...")
    X_train = np.load(ENGINEERED_DIR / "X_train_eng.npy").astype(np.float32)
    X_val = np.load(ENGINEERED_DIR / "X_val_eng.npy").astype(np.float32)
    X_test = np.load(ENGINEERED_DIR / "X_test_eng.npy").astype(np.float32)
    y_train = np.load(ENGINEERED_DIR / "y_train_eng.npy").astype(np.float32)
    y_val = np.load(ENGINEERED_DIR / "y_val_eng.npy").astype(np.float32)
    
    # Load metadata
    with open(ENGINEERED_DIR / "feature_metadata.json", 'r') as f:
        feature_metadata = json.load(f)
    feature_names = feature_metadata['final_feature_names']
    
    print("✅ Data loading successful")
    
except Exception as e:
    print(f"❌ ERROR loading data: {e}")
    exit(1)

# Data validation
print("\n📊 DATA VALIDATION:")
print(f"   Training set: {X_train.shape[0]:,} samples × {X_train.shape[1]} features")
print(f"   Validation set: {X_val.shape[0]:,} samples × {X_val.shape[1]} features")
print(f"   Test set: {X_test.shape[0]:,} samples × {X_test.shape[1]} features")
print(f"   Feature names: {len(feature_names)} available")

# Check for missing values
train_missing = np.isnan(X_train).sum()
val_missing = np.isnan(X_val).sum()
test_missing = np.isnan(X_test).sum()
y_train_missing = np.isnan(y_train).sum()
y_val_missing = np.isnan(y_val).sum()

print(f"\n🔍 MISSING VALUE CHECK:")
print(f"   X_train missing: {train_missing}")
print(f"   X_val missing: {val_missing}")
print(f"   X_test missing: {test_missing}")
print(f"   y_train missing: {y_train_missing}")
print(f"   y_val missing: {y_val_missing}")

if train_missing + val_missing + test_missing + y_train_missing + y_val_missing > 0:
    print("⚠️ WARNING: Missing values detected - may cause model failures")
else:
    print("✅ No missing values - data ready for modeling")

# Feature consistency check
print(f"\n🏷️ FEATURE CONSISTENCY:")
print(f"   X_train features: {X_train.shape[1]}")
print(f"   X_val features: {X_val.shape[1]}")
print(f"   X_test features: {X_test.shape[1]}")
print(f"   Expected features: {len(feature_names)}")

if X_train.shape[1] == X_val.shape[1] == X_test.shape[1] == len(feature_names):
    print("✅ Feature consistency verified")
else:
    print("❌ Feature inconsistency detected")

# Target analysis
print(f"\n🎯 TARGET VARIABLE ANALYSIS:")
print(f"   y_train range: ₹{y_train.min():,.0f} - ₹{y_train.max():,.0f}")
print(f"   y_train mean: ₹{y_train.mean():,.0f}")
print(f"   y_val range: ₹{y_val.min():,.0f} - ₹{y_val.max():,.0f}")
print(f"   y_val mean: ₹{y_val.mean():,.0f}")

# Memory usage
total_memory = sum([arr.nbytes for arr in [X_train, X_val, X_test, y_train, y_val]]) / (1024**2)
print(f"\n💾 MEMORY USAGE: {total_memory:.1f} MB total")

# ==============================================================================
# 3. LIBRARY COMPATIBILITY TESTING
# ==============================================================================

print("\n" + "="*50)
print("3️⃣ LIBRARY COMPATIBILITY TESTING")
print("="*50)

def test_model_training(model_name, model_class, model_params=None):
    """Test if a model can be trained without errors"""
    if model_params is None:
        model_params = {}
    
    print(f"\n🧪 Testing {model_name}...")
    
    try:
        # Create small test dataset
        X_test_small = X_train[:100].copy()
        y_test_small = y_train[:100].copy()
        
        # Create model
        model = model_class(**model_params)
        
        # Test basic training
        start_time = time.time()
        model.fit(X_test_small, y_test_small)
        train_time = time.time() - start_time
        
        # Test prediction
        y_pred = model.predict(X_test_small[:10])
        
        print(f"   ✅ {model_name}: Training OK ({train_time:.2f}s)")
        print(f"      Predictions range: {y_pred.min():.0f} - {y_pred.max():.0f}")
        
        return True, model, None
        
    except Exception as e:
        print(f"   ❌ {model_name}: FAILED - {str(e)[:100]}")
        return False, None, str(e)

# Test basic models
print("🔧 BASIC MODEL COMPATIBILITY:")

# Ridge Regression
ridge_ok, ridge_model, ridge_error = test_model_training(
    "Ridge Regression", 
    Ridge, 
    {"alpha": 1.0, "random_state": 42}
)

# Lasso Regression  
lasso_ok, lasso_model, lasso_error = test_model_training(
    "Lasso Regression",
    Lasso,
    {"alpha": 1.0, "random_state": 42, "max_iter": 2000}
)

# Random Forest
rf_ok, rf_model, rf_error = test_model_training(
    "Random Forest",
    RandomForestRegressor,
    {"n_estimators": 50, "random_state": 42, "n_jobs": -1}
)

# Test gradient boosting models if available
print("\n🚀 GRADIENT BOOSTING COMPATIBILITY:")

lgb_ok, lgb_model, lgb_error = False, None, "Not available"
if LGB_AVAILABLE:
    lgb_ok, lgb_model, lgb_error = test_model_training(
        "LightGBM",
        lgb.LGBMRegressor,
        {"n_estimators": 50, "random_state": 42, "verbose": -1}
    )

xgb_ok, xgb_model, xgb_error = False, None, "Not available"
if XGB_AVAILABLE:
    xgb_ok, xgb_model, xgb_error = test_model_training(
        "XGBoost",
        xgb.XGBRegressor,
        {"n_estimators": 50, "random_state": 42, "verbosity": 0}
    )

cb_ok, cb_model, cb_error = False, None, "Not available"
if CB_AVAILABLE:
    cb_ok, cb_model, cb_error = test_model_training(
        "CatBoost",
        cb.CatBoostRegressor,
        {"iterations": 50, "random_seed": 42, "verbose": False, "allow_writing_files": False}
    )

# Test early stopping compatibility
print("\n🛑 EARLY STOPPING COMPATIBILITY:")

def test_early_stopping():
    """Test early stopping functionality"""
    X_test_small = X_train[:200].copy()
    y_test_small = y_train[:200].copy()
    X_val_small = X_val[:50].copy()
    y_val_small = y_val[:50].copy()
    
    results = {}
    
    # Test XGBoost early stopping
    if XGB_AVAILABLE and xgb_ok:
        try:
            xgb_model = xgb.XGBRegressor(n_estimators=100, random_state=42, verbosity=0)
            xgb_model.fit(
                X_test_small, y_test_small,
                eval_set=[(X_val_small, y_val_small)],
                eval_metric='mape',
                verbose=False
            )
            results['XGBoost'] = "✅ Early stopping OK"
        except Exception as e:
            results['XGBoost'] = f"❌ Early stopping failed: {str(e)[:50]}"
    else:
        results['XGBoost'] = "⚠️ XGBoost not available"
    
    # Test LightGBM early stopping
    if LGB_AVAILABLE and lgb_ok:
        try:
            lgb_model = lgb.LGBMRegressor(n_estimators=100, random_state=42, verbose=-1)
            lgb_model.fit(
                X_test_small, y_test_small,
                eval_set=[(X_val_small, y_val_small)],
                callbacks=[lgb.early_stopping(10, verbose=False)]
            )
            results['LightGBM'] = "✅ Early stopping OK"
        except Exception as e:
            results['LightGBM'] = f"❌ Early stopping failed: {str(e)[:50]}"
    else:
        results['LightGBM'] = "⚠️ LightGBM not available"
    
    # Test CatBoost early stopping
    if CB_AVAILABLE and cb_ok:
        try:
            cb_model = cb.CatBoostRegressor(iterations=100, random_seed=42, verbose=False, allow_writing_files=False)
            cb_model.fit(
                X_test_small, y_test_small,
                eval_set=(X_val_small, y_val_small),
                early_stopping_rounds=10,
                verbose=False
            )
            results['CatBoost'] = "✅ Early stopping OK"
        except Exception as e:
            results['CatBoost'] = f"❌ Early stopping failed: {str(e)[:50]}"
    else:
        results['CatBoost'] = "⚠️ CatBoost not available"
    
    return results

early_stopping_results = test_early_stopping()
for model_name, result in early_stopping_results.items():
    print(f"   {result}")

# ==============================================================================
# 4. QUICK MODEL TRIALS
# ==============================================================================

print("\n" + "="*50)
print("4️⃣ QUICK MODEL TRIALS (5 EACH)")
print("="*50)

def calculate_mape(y_true, y_pred):
    """Calculate MAPE with zero-division protection"""
    mask = y_true != 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

def run_model_trials(model_class, model_name, base_params, param_variations, n_trials=5):
    """Run multiple trials of a model with parameter variations"""
    print(f"\n🏃 Running {n_trials} trials for {model_name}:")
    
    results = []
    
    for i in range(n_trials):
        try:
            # Vary parameters slightly for each trial
            if i < len(param_variations):
                trial_params = {**base_params, **param_variations[i]}
            else:
                trial_params = base_params.copy()
                # Add some randomness
                if 'random_state' in trial_params:
                    trial_params['random_state'] = 42 + i
                elif 'random_seed' in trial_params:
                    trial_params['random_seed'] = 42 + i
            
            # Train model
            start_time = time.time()
            model = model_class(**trial_params)
            model.fit(X_train, y_train)
            train_time = time.time() - start_time
            
            # Make predictions
            y_pred_val = model.predict(X_val)
            
            # Calculate metrics
            mape = calculate_mape(y_val, y_pred_val)
            mae = mean_absolute_error(y_val, y_pred_val)
            r2 = r2_score(y_val, y_pred_val)
            
            result = {
                'trial': i + 1,
                'mape': mape,
                'mae': mae,
                'r2': r2,
                'train_time': train_time,
                'params': trial_params
            }
            results.append(result)
            
            print(f"   Trial {i+1}: MAPE {mape:6.2f}% | MAE ₹{mae:>8,.0f} | R² {r2:.3f} | Time {train_time:4.1f}s")
            
        except Exception as e:
            print(f"   Trial {i+1}: ❌ FAILED - {str(e)[:60]}")
    
    if results:
        # Summary statistics
        mapes = [r['mape'] for r in results]
        print(f"   📊 Summary: MAPE {np.mean(mapes):.2f}% ± {np.std(mapes):.2f}% (min: {np.min(mapes):.2f}%, max: {np.max(mapes):.2f}%)")
        
        # Best trial
        best_trial = min(results, key=lambda x: x['mape'])
        print(f"   🏆 Best: Trial {best_trial['trial']} with {best_trial['mape']:.2f}% MAPE")
        
    return results

# LINEAR MODEL TRIALS
print("📈 LINEAR MODEL TRIALS:")

ridge_results = []
lasso_results = []

if ridge_ok:
    ridge_variations = [
        {},  # Default
        {"alpha": 0.1},
        {"alpha": 10.0},
        {"alpha": 100.0},
        {"alpha": 1000.0}
    ]
    ridge_results = run_model_trials(
        Ridge, "Ridge Regression", 
        {"random_state": 42}, 
        ridge_variations
    )

if lasso_ok:
    lasso_variations = [
        {},  # Default
        {"alpha": 0.01},
        {"alpha": 0.1},
        {"alpha": 10.0},
        {"alpha": 100.0}
    ]
    lasso_results = run_model_trials(
        Lasso, "Lasso Regression",
        {"random_state": 42, "max_iter": 2000},
        lasso_variations
    )

# TREE-BASED MODEL TRIALS
print("\n🌲 TREE-BASED MODEL TRIALS:")

rf_results = []
lgb_results = []
xgb_results = []
cb_results = []

if rf_ok:
    rf_variations = [
        {},  # Default
        {"n_estimators": 100},
        {"n_estimators": 200},
        {"max_depth": 10},
        {"max_depth": 20}
    ]
    rf_results = run_model_trials(
        RandomForestRegressor, "Random Forest",
        {"n_estimators": 50, "random_state": 42, "n_jobs": -1},
        rf_variations
    )

if lgb_ok:
    lgb_variations = [
        {},  # Default
        {"learning_rate": 0.01},
        {"learning_rate": 0.1},
        {"num_leaves": 20},
        {"num_leaves": 50}
    ]
    lgb_results = run_model_trials(
        lgb.LGBMRegressor, "LightGBM",
        {"n_estimators": 100, "random_state": 42, "verbose": -1},
        lgb_variations
    )

if xgb_ok:
    xgb_variations = [
        {},  # Default
        {"learning_rate": 0.01},
        {"learning_rate": 0.1},
        {"max_depth": 3},
        {"max_depth": 8}
    ]
    xgb_results = run_model_trials(
        xgb.XGBRegressor, "XGBoost",
        {"n_estimators": 100, "random_state": 42, "verbosity": 0},
        xgb_variations
    )

if cb_ok:
    cb_variations = [
        {},  # Default
        {"learning_rate": 0.01},
        {"learning_rate": 0.1},
        {"depth": 4},
        {"depth": 8}
    ]
    cb_results = run_model_trials(
        cb.CatBoostRegressor, "CatBoost",
        {"iterations": 100, "random_seed": 42, "verbose": False, "allow_writing_files": False},
        cb_variations
    )

# ==============================================================================
# 5. RESULTS SUMMARY AND RECOMMENDATIONS
# ==============================================================================

print("\n" + "="*50)
print("5️⃣ RESULTS SUMMARY AND RECOMMENDATIONS")
print("="*50)

# Collect all results
all_results = []

def add_model_results(results_list, model_name):
    if results_list:
        mapes = [r['mape'] for r in results_list]
        best_mape = min(mapes)
        mean_mape = np.mean(mapes)
        std_mape = np.std(mapes)
        
        all_results.append({
            'model': model_name,
            'best_mape': best_mape,
            'mean_mape': mean_mape,
            'std_mape': std_mape,
            'trials': len(results_list)
        })

add_model_results(ridge_results, "Ridge Regression")
add_model_results(lasso_results, "Lasso Regression")
add_model_results(rf_results, "Random Forest")
add_model_results(lgb_results, "LightGBM")
add_model_results(xgb_results, "XGBoost")
add_model_results(cb_results, "CatBoost")

if all_results:
    # Sort by best MAPE
    all_results.sort(key=lambda x: x['best_mape'])
    
    print("🏆 MODEL PERFORMANCE RANKING:")
    print("="*80)
    print(f"{'Rank':<5} {'Model':<20} {'Best MAPE':<12} {'Mean MAPE':<12} {'Std':<8} {'Status'}")
    print("="*80)
    
    for i, result in enumerate(all_results):
        status = "🎉 Target!" if result['best_mape'] < 18 else "📈 Good" if result['best_mape'] < 25 else "⚠️ High"
        print(f"{i+1:<5} {result['model']:<20} {result['best_mape']:<11.2f}% "
              f"{result['mean_mape']:<11.2f}% {result['std_mape']:<7.2f}% {status}")
    
    # Best model
    best_model = all_results[0]
    print(f"\n🥇 BEST PERFORMING MODEL:")
    print(f"   Model: {best_model['model']}")
    print(f"   Best MAPE: {best_model['best_mape']:.2f}%")
    print(f"   Mean MAPE: {best_model['mean_mape']:.2f}% (±{best_model['std_mape']:.2f}%)")
    print(f"   Target status: {'🎉 ACHIEVED' if best_model['best_mape'] < 18 else '📈 NEEDS IMPROVEMENT'}")

# System readiness assessment
print(f"\n🔧 SYSTEM READINESS ASSESSMENT:")

ready_for_modeling = True
issues = []

# Check data quality
if train_missing + val_missing + test_missing + y_train_missing + y_val_missing > 0:
    issues.append("Missing values detected in data")
    ready_for_modeling = False

# Check feature consistency
if not (X_train.shape[1] == X_val.shape[1] == X_test.shape[1] == len(feature_names)):
    issues.append("Feature dimension mismatch")
    ready_for_modeling = False

# Check model availability
working_models = sum([ridge_ok, lasso_ok, rf_ok, lgb_ok, xgb_ok, cb_ok])
if working_models < 3:
    issues.append(f"Only {working_models} models working - need at least 3")
    ready_for_modeling = False

# Check early stopping
early_stopping_working = sum([
    "Early stopping OK" in str(result) 
    for result in early_stopping_results.values()
])

if early_stopping_working == 0 and (lgb_ok or xgb_ok or cb_ok):
    issues.append("Early stopping not working for any gradient boosting model")

print(f"   Data quality: {'✅' if train_missing + val_missing + test_missing == 0 else '⚠️'}")
print(f"   Feature consistency: {'✅' if X_train.shape[1] == X_val.shape[1] == X_test.shape[1] == len(feature_names) else '❌'}")
print(f"   Working models: {working_models}/6")
print(f"   Early stopping: {early_stopping_working}/3 gradient boosting models")
print(f"   Memory usage: {'✅' if total_memory < 500 else '⚠️'} ({total_memory:.1f} MB)")

print(f"\n🎯 FINAL RECOMMENDATION:")
if ready_for_modeling and not issues:
    print("✅ SYSTEM READY FOR FULL MODELING EXPERIMENTS")
    print("   All checks passed - proceed with confidence!")
    if all_results and best_model['best_mape'] < 20:
        print(f"   Early results show promise ({best_model['best_mape']:.1f}% MAPE)")
    print("\n🚀 Recommended next steps:")
    print("   1. Run full hyperparameter optimization")
    print("   2. Implement ensemble methods")
    print("   3. Generate final predictions")
else:
    print("⚠️ ISSUES DETECTED - ADDRESS BEFORE FULL MODELING:")
    for issue in issues:
        print(f"   ❌ {issue}")
    print("\n🔧 Required actions:")
    if issues:
        for issue in issues:
            if "Missing values" in issue:
                print("   • Re-run preprocessing pipeline with proper imputation")
            elif "Feature dimension" in issue:
                print("   • Check feature engineering consistency")
            elif "models working" in issue:
                print("   • Install missing libraries or fix compatibility issues")
    else:
        print("   • Minor issues detected - modeling possible with reduced functionality")

# Performance expectations
if all_results:
    print(f"\n📊 PERFORMANCE EXPECTATIONS:")
    best_quick_mape = best_model['best_mape']
    if best_quick_mape < 15:
        print("   🎉 Excellent potential - likely to achieve <18% target with optimization")
    elif best_quick_mape < 20:
        print("   📈 Good potential - target achievable with proper hyperparameter tuning")
    elif best_quick_mape < 25:
        print("   🔍 Moderate potential - will need advanced techniques and ensembles")
    else:
        print("   ⚠️ Challenging - may need additional feature engineering or data")
    
    print(f"   Expected final MAPE range: {best_quick_mape * 0.7:.1f}% - {best_quick_mape * 0.9:.1f}%")

# ==============================================================================
# 6. ADVANCED DEBUGGING & EDGE CASE TESTING
# ==============================================================================

print("\n" + "="*50)
print("6️⃣ ADVANCED DEBUGGING & EDGE CASE TESTING")
print("="*50)

# Test data edge cases
print("🔍 DATA EDGE CASE ANALYSIS:")

# Check for extreme values
def analyze_extreme_values(arr, name):
    q1, q99 = np.percentile(arr, [1, 99])
    extreme_low = np.sum(arr < q1 * 0.1)  # Values much lower than 1st percentile
    extreme_high = np.sum(arr > q99 * 2)  # Values much higher than 99th percentile
    
    print(f"   {name}:")
    print(f"      Range: {arr.min():.0f} - {arr.max():.0f}")
    print(f"      Q1-Q99: {q1:.0f} - {q99:.0f}")
    print(f"      Extreme values: {extreme_low} low, {extreme_high} high")
    
    return extreme_low + extreme_high

extreme_features = 0
print("\n📊 Feature value distributions:")
for i in range(min(5, X_train.shape[1])):  # Check first 5 features
    feature_name = feature_names[i] if i < len(feature_names) else f"Feature_{i}"
    extremes = analyze_extreme_values(X_train[:, i], feature_name)
    extreme_features += extremes

print(f"\nTarget variable distribution:")
target_extremes = analyze_extreme_values(y_train, "Target (y_train)")

# Check for data leakage indicators
print(f"\n🔒 DATA LEAKAGE DETECTION:")
train_val_correlation = np.corrcoef(y_train.mean(), y_val.mean())[0, 1] if y_train.mean() != y_val.mean() else 1.0
print(f"   Train-Val target correlation: {'⚠️ Suspicious' if abs(train_val_correlation) > 0.95 else '✅ Normal'}")

# Check target distribution similarity
from scipy.stats import ks_2samp
try:
    ks_stat, p_value = ks_2samp(y_train, y_val)
    print(f"   Target distribution KS test: {'✅ Similar' if p_value > 0.05 else '⚠️ Different'} (p={p_value:.3f})")
except:
    print(f"   Target distribution test: ⚠️ Could not perform")

# Memory stress test
print(f"\n💾 MEMORY STRESS TEST:")
try:
    # Try to create multiple model copies
    test_models = []
    for i in range(3):
        if rf_ok:
            model = RandomForestRegressor(n_estimators=10, random_state=42)
            model.fit(X_train[:100], y_train[:100])
            test_models.append(model)
    
    print(f"   Model memory test: ✅ Can handle multiple models")
    del test_models
except Exception as e:
    print(f"   Model memory test: ⚠️ Memory constraints detected")

# Test prediction consistency
print(f"\n🔄 PREDICTION CONSISTENCY TEST:")
if ridge_ok:
    try:
        # Train same model twice with same parameters
        model1 = Ridge(alpha=1.0, random_state=42)
        model2 = Ridge(alpha=1.0, random_state=42)
        
        model1.fit(X_train[:500], y_train[:500])
        model2.fit(X_train[:500], y_train[:500])
        
        pred1 = model1.predict(X_val[:10])
        pred2 = model2.predict(X_val[:10])
        
        consistency = np.allclose(pred1, pred2, rtol=1e-10)
        print(f"   Model reproducibility: {'✅ Consistent' if consistency else '⚠️ Inconsistent'}")
        
    except Exception as e:
        print(f"   Model reproducibility: ⚠️ Test failed")

# ==============================================================================
# 7. PERFORMANCE BENCHMARKING & OPTIMIZATION HINTS
# ==============================================================================

print("\n" + "="*50)
print("7️⃣ PERFORMANCE BENCHMARKING & OPTIMIZATION HINTS")
print("="*50)

# Training time analysis
print("⏱️ TRAINING TIME ANALYSIS:")
if all_results:
    # Collect timing information from trials
    timing_analysis = {}
    
    for model_type, results_list in [
        ("Ridge", ridge_results),
        ("Lasso", lasso_results), 
        ("RandomForest", rf_results),
        ("LightGBM", lgb_results),
        ("XGBoost", xgb_results),
        ("CatBoost", cb_results)
    ]:
        if results_list:
            times = [r['train_time'] for r in results_list]
            timing_analysis[model_type] = {
                'mean_time': np.mean(times),
                'min_time': np.min(times),
                'max_time': np.max(times)
            }
    
    if timing_analysis:
        print("\n   Training speed ranking (faster is better):")
        sorted_timing = sorted(timing_analysis.items(), key=lambda x: x[1]['mean_time'])
        for i, (model, times) in enumerate(sorted_timing):
            print(f"   {i+1}. {model:<15}: {times['mean_time']:.2f}s avg ({times['min_time']:.1f}-{times['max_time']:.1f}s)")

# Feature importance quick check
print(f"\n🏷️ FEATURE IMPORTANCE QUICK CHECK:")
if rf_ok and rf_results:
    try:
        # Train a quick RF to check feature importance
        rf_temp = RandomForestRegressor(n_estimators=50, random_state=42)
        rf_temp.fit(X_train, y_train)
        
        importances = rf_temp.feature_importances_
        top_indices = np.argsort(importances)[-5:][::-1]
        
        print("   Top 5 most important features:")
        for i, idx in enumerate(top_indices):
            feature_name = feature_names[idx] if idx < len(feature_names) else f"Feature_{idx}"
            print(f"   {i+1}. {feature_name}: {importances[idx]:.4f}")
            
        # Check for feature dominance
        max_importance = np.max(importances)
        if max_importance > 0.5:
            print("   ⚠️ Single feature dominance detected - check for data leakage")
        elif max_importance < 0.01:
            print("   ⚠️ Very low feature importance - check feature engineering")
        else:
            print("   ✅ Balanced feature importance distribution")
            
    except Exception as e:
        print(f"   Feature importance check failed: {e}")

# Hyperparameter sensitivity analysis
print(f"\n🎛️ HYPERPARAMETER SENSITIVITY:")
for model_type, results_list in [
    ("LightGBM", lgb_results),
    ("XGBoost", xgb_results),
    ("CatBoost", cb_results)
]:
    if results_list and len(results_list) > 1:
        mapes = [r['mape'] for r in results_list]
        sensitivity = (np.max(mapes) - np.min(mapes)) / np.mean(mapes)
        
        if sensitivity > 0.2:
            print(f"   {model_type}: 🎯 High sensitivity - careful hyperparameter tuning needed")
        elif sensitivity > 0.1:
            print(f"   {model_type}: ⚖️ Moderate sensitivity - standard tuning sufficient")
        else:
            print(f"   {model_type}: 😌 Low sensitivity - robust to parameter changes")

# ==============================================================================
# 8. PRODUCTION READINESS CHECKLIST
# ==============================================================================

print("\n" + "="*50)
print("8️⃣ PRODUCTION READINESS CHECKLIST")
print("="*50)

checklist_items = []

# Data quality checks
checklist_items.append(("Data loading", train_missing + val_missing + test_missing == 0))
checklist_items.append(("Feature consistency", X_train.shape[1] == X_val.shape[1] == X_test.shape[1]))
checklist_items.append(("Reasonable data size", X_train.shape[0] > 1000 and X_val.shape[0] > 100))
checklist_items.append(("Memory efficiency", total_memory < 1000))  # Under 1GB

# Model availability
checklist_items.append(("Linear models working", ridge_ok and lasso_ok))
checklist_items.append(("Tree models working", rf_ok))
checklist_items.append(("Gradient boosting available", lgb_ok or xgb_ok or cb_ok))
checklist_items.append(("Multiple model types", sum([ridge_ok, lasso_ok, rf_ok, lgb_ok, xgb_ok, cb_ok]) >= 4))

# Performance indicators
if all_results:
    best_mape = min([r['best_mape'] for r in all_results])
    checklist_items.append(("Reasonable baseline performance", best_mape < 30))
    checklist_items.append(("Promising performance", best_mape < 25))
    checklist_items.append(("Target-achievable performance", best_mape < 22))

# Technical requirements
checklist_items.append(("Early stopping works", early_stopping_working > 0))
checklist_items.append(("Prediction consistency", True))  # Assume OK if we got here
checklist_items.append(("No extreme outliers", extreme_features < X_train.shape[0] * 0.01))

print("📋 PRODUCTION READINESS SCORE:")
print("="*60)

passed_checks = 0
total_checks = len(checklist_items)

for item_name, status in checklist_items:
    status_symbol = "✅" if status else "❌"
    print(f"   {status_symbol} {item_name}")
    if status:
        passed_checks += 1

readiness_score = (passed_checks / total_checks) * 100
print(f"\n📊 OVERALL READINESS: {readiness_score:.1f}% ({passed_checks}/{total_checks} checks passed)")

if readiness_score >= 90:
    print("🎉 EXCELLENT - System fully ready for production modeling")
elif readiness_score >= 75:
    print("✅ GOOD - System ready with minor optimizations needed")
elif readiness_score >= 60:
    print("⚠️ MODERATE - Address key issues before full modeling")
else:
    print("❌ POOR - Significant issues need resolution")

# ==============================================================================
# 9. QUICK FIX SUGGESTIONS
# ==============================================================================

print("\n" + "="*50)
print("9️⃣ QUICK FIX SUGGESTIONS")
print("="*50)

print("🔧 IMMEDIATE ACTIONS:")

# Failed checks solutions
failed_checks = [item for item, status in checklist_items if not status]

if not failed_checks:
    print("   🎉 No issues detected - you're ready to go!")
else:
    for item_name, _ in checklist_items:
        if (item_name, False) in [(name, status) for name, status in checklist_items if not status]:
            
            if "Data loading" in item_name:
                print("   • Re-run preprocessing pipeline to fix missing values")
                
            elif "Feature consistency" in item_name:
                print("   • Check feature engineering - ensure same transformations on all sets")
                
            elif "data size" in item_name:
                print("   • Verify preprocessing didn't over-filter the data")
                
            elif "Memory efficiency" in item_name:
                print("   • Consider feature selection or data type optimization")
                
            elif "Linear models" in item_name:
                print("   • Check sklearn installation and version compatibility")
                
            elif "Tree models" in item_name:
                print("   • Verify sklearn RandomForestRegressor is working")
                
            elif "Gradient boosting" in item_name:
                print("   • Install LightGBM, XGBoost, or CatBoost libraries")
                
            elif "Multiple model types" in item_name:
                print("   • Install missing ML libraries for better model diversity")
                
            elif "baseline performance" in item_name:
                print("   • Check feature engineering quality and target variable preprocessing")
                
            elif "Early stopping" in item_name:
                print("   • Update gradient boosting libraries to compatible versions")
                
            elif "extreme outliers" in item_name:
                print("   • Review outlier removal in preprocessing pipeline")

# Performance optimization suggestions
if all_results:
    best_model = all_results[0]
    
    print(f"\n🚀 PERFORMANCE OPTIMIZATION:")
    print(f"   Current best: {best_model['model']} with {best_model['best_mape']:.2f}% MAPE")
    
    if best_model['best_mape'] > 25:
        print("   • Focus on feature engineering - performance suggests weak features")
        print("   • Consider additional data sources or domain expertise")
        print("   • Review target variable transformation (log, sqrt, etc.)")
        
    elif best_model['best_mape'] > 20:
        print("   • Implement hyperparameter optimization (Optuna recommended)")
        print("   • Try ensemble methods with top-performing models")
        print("   • Consider feature selection to reduce noise")
        
    elif best_model['best_mape'] > 18:
        print("   • Fine-tune hyperparameters of best-performing models")
        print("   • Implement stacking ensemble with diverse base models")
        print("   • Consider advanced feature engineering techniques")
        
    else:
        print("   • You're very close! Focus on ensemble methods")
        print("   • Fine-tune the top 2-3 models and create weighted ensemble")
        print("   • Consider cross-validation for more robust estimates")

# ==============================================================================
# 10. SAVE DEBUG RESULTS
# ==============================================================================

print("\n" + "="*50)
print("🔟 SAVING DEBUG RESULTS")
print("="*50)

# Create debug results summary
debug_results = {
    'timestamp': datetime.now().isoformat(),
    'data_info': {
        'train_shape': X_train.shape,
        'val_shape': X_val.shape,
        'test_shape': X_test.shape,
        'feature_count': len(feature_names),
        'total_memory_mb': round(total_memory, 1)
    },
    'data_quality': {
        'missing_values': int(train_missing + val_missing + test_missing + y_train_missing + y_val_missing),
        'extreme_features': int(extreme_features),
        'target_range': [float(y_train.min()), float(y_train.max())]
    },
    'model_compatibility': {
        'ridge': ridge_ok,
        'lasso': lasso_ok,
        'random_forest': rf_ok,
        'lightgbm': lgb_ok,
        'xgboost': xgb_ok,
        'catboost': cb_ok
    },
    'early_stopping': early_stopping_results,
    'performance_results': all_results,
    'readiness_score': round(readiness_score, 1),
    'recommendations': {
        'ready_for_modeling': readiness_score >= 75,
        'expected_final_mape': f"{best_model['best_mape'] * 0.7:.1f}-{best_model['best_mape'] * 0.9:.1f}%" if all_results else "Unknown",
        'priority_fixes': [item for item, status in checklist_items if not status]
    }
}

# Save debug results
debug_file = RESULTS_DIR / "debug_results.json"
try:
    with open(debug_file, 'w') as f:
        json.dump(debug_results, f, indent=2, default=str)
    print(f"✅ Debug results saved: {debug_file}")
except Exception as e:
    print(f"⚠️ Could not save debug results: {e}")

# Create a simple status file for automation
status_file = RESULTS_DIR / "system_status.txt"
try:
    with open(status_file, 'w') as f:
        f.write(f"SYSTEM_STATUS={'READY' if readiness_score >= 75 else 'NOT_READY'}\n")
        f.write(f"READINESS_SCORE={readiness_score:.1f}\n")
        f.write(f"BEST_MAPE={best_model['best_mape']:.2f}\n" if all_results else "BEST_MAPE=UNKNOWN\n")
        f.write(f"TIMESTAMP={datetime.now().isoformat()}\n")
    print(f"✅ Status file saved: {status_file}")
except Exception as e:
    print(f"⚠️ Could not save status file: {e}")

print(f"\n⏰ Complete debug finished at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*60)

# ==============================================================================
# 11. FINAL SUMMARY DASHBOARD
# ==============================================================================

print(f"\n🎯 FINAL SUMMARY DASHBOARD")
print("="*60)

print(f"📊 SYSTEM STATUS: {'🟢 READY' if readiness_score >= 75 else '🟡 ISSUES' if readiness_score >= 50 else '🔴 NOT READY'}")
print(f"📈 PERFORMANCE: {best_model['model']} @ {best_model['best_mape']:.2f}% MAPE" if all_results else "No performance data")
print(f"🎯 TARGET: {'🎉 ACHIEVABLE' if all_results and best_model['best_mape'] < 22 else '📈 CHALLENGING' if all_results else 'UNKNOWN'}")
print(f"⚡ SPEED: {len([r for r in all_results if 'LightGBM' in r['model'] or 'XGBoost' in r['model']])} fast models available" if all_results else "Speed unknown")
print(f"🧠 LIBRARIES: {sum([ridge_ok, lasso_ok, rf_ok, lgb_ok, xgb_ok, cb_ok])}/6 working")
print(f"💾 MEMORY: {total_memory:.0f}MB ({'✅' if total_memory < 500 else '⚠️'})")

if all_results and readiness_score >= 75:
    print(f"\n🚀 YOU'RE READY! Expected final MAPE: {best_model['best_mape'] * 0.7:.1f}% - {best_model['best_mape'] * 0.9:.1f}%")
else:
    print(f"\n🔧 FIXES NEEDED: Address {total_checks - passed_checks} issues before full modeling")

print("="*60)

🔧 DEBUG & QUICK TEST NOTEBOOK
⏰ Started at: 2025-09-05 20:53:30
📚 Library Status:
   LightGBM: ✅
   XGBoost: ✅
   CatBoost: ✅

1️⃣ DATA AVAILABILITY VALIDATION
📁 DIRECTORY STRUCTURE CHECK:
   ✅ ..\data
   ✅ ..\data
   ✅ ..\data\feature_engineered

📄 REQUIRED FILES CHECK:
   ✅ X_train_eng.npy           (Training features, 16.2MB)
   ✅ X_val_eng.npy             (Validation features, 4.0MB)
   ✅ X_test_eng.npy            (Test features, 3.8MB)
   ✅ y_train_eng.npy           (Training targets, 0.2MB)
   ✅ y_val_eng.npy             (Validation targets, 0.0MB)
   ✅ feature_metadata.json     (Feature metadata, 0.0MB)
   ✅ preprocessing_objects.pkl (Preprocessing objects, 0.0MB)
✅ All required files found

2️⃣ DATA LOADING AND VALIDATION
📊 Loading data arrays...
✅ Data loading successful

📊 DATA VALIDATION:
   Training set: 42,436 samples × 100 features
   Validation set: 10,610 samples × 100 features
   Test set: 10,000 samples × 100 features
   Feature names: 100 available

🔍 MISSING VALUE C

In [6]:
# ==============================================================================
# 10. CROSS-VALIDATION TESTING & VALIDATION
# ==============================================================================

print("\n" + "="*50)
print("🔄 CROSS-VALIDATION TESTING & VALIDATION")
print("="*50)

def test_cv_implementation():
    """Test and debug cross-validation implementation"""
    print("🔍 TESTING CROSS-VALIDATION IMPLEMENTATION:")
    
    from sklearn.model_selection import KFold, cross_val_score
    
    # Test with a simple model first
    if ridge_ok:
        print("\n   Testing CV with Ridge Regression...")
        
        # Method 1: Manual CV (like your current implementation)
        kf = KFold(n_splits=3, shuffle=True, random_state=42)
        manual_cv_scores = []
        
        for fold, (train_idx, val_idx) in enumerate(kf.split(X_train)):
            X_fold_train, X_fold_val = X_train[train_idx], X_train[val_idx]
            y_fold_train, y_fold_val = y_train[train_idx], y_train[val_idx]
            
            # Create FRESH model for each fold
            fold_model = Ridge(alpha=1.0, random_state=42)
            fold_model.fit(X_fold_train, y_fold_train)
            y_fold_pred = fold_model.predict(X_fold_val)
            
            fold_mape = calculate_mape(y_fold_val, y_fold_pred)
            manual_cv_scores.append(fold_mape)
            print(f"      Manual Fold {fold+1}: {fold_mape:.3f}% MAPE")
        
        manual_cv_mean = np.mean(manual_cv_scores)
        manual_cv_std = np.std(manual_cv_scores)
        
        # Method 2: sklearn cross_val_score
        def mape_scorer(estimator, X_val, y_val):
            y_pred = estimator.predict(X_val)
            return -calculate_mape(y_val, y_pred)  # Negative because sklearn maximizes
        
        sklearn_model = Ridge(alpha=1.0, random_state=42)
        sklearn_cv_scores = cross_val_score(
            sklearn_model, X_train, y_train, 
            cv=KFold(n_splits=3, shuffle=True, random_state=42),
            scoring=mape_scorer
        )
        sklearn_cv_scores = -sklearn_cv_scores  # Convert back to positive MAPE
        
        sklearn_cv_mean = np.mean(sklearn_cv_scores)
        sklearn_cv_std = np.std(sklearn_cv_scores)
        
        print(f"\n   📊 CV Results Comparison:")
        print(f"      Manual CV:  {manual_cv_mean:.3f}% ± {manual_cv_std:.3f}%")
        print(f"      Sklearn CV: {sklearn_cv_mean:.3f}% ± {sklearn_cv_std:.3f}%")
        
        cv_difference = abs(manual_cv_mean - sklearn_cv_mean)
        if cv_difference < 0.1:
            print("      ✅ CV implementations consistent")
        else:
            print(f"      ⚠️ CV difference: {cv_difference:.3f}% - check implementation")
        
        return manual_cv_mean, sklearn_cv_mean
    
    return None, None

# Run CV testing
manual_cv, sklearn_cv = test_cv_implementation()

def test_gradient_boosting_cv():
    """Test CV with gradient boosting models"""
    print("\n🚀 GRADIENT BOOSTING CV TESTING:")
    
    cv_results = {}
    
    # Test each available gradient boosting model
    models_to_test = []
    if lgb_ok:
        models_to_test.append(("LightGBM", lgb.LGBMRegressor(n_estimators=50, random_state=42, verbose=-1)))
    if xgb_ok:
        models_to_test.append(("XGBoost", xgb.XGBRegressor(n_estimators=50, random_state=42, verbosity=0)))
    if cb_ok:
        models_to_test.append(("CatBoost", cb.CatBoostRegressor(iterations=50, random_seed=42, verbose=False, allow_writing_files=False)))
    
    for model_name, model in models_to_test:
        print(f"\n   Testing {model_name} CV...")
        
        try:
            # Use smaller dataset for speed
            X_small = X_train[:1000]
            y_small = y_train[:1000]
            
            # Test our CV function (fixed version)
            def fixed_cv_test(model, X, y, cv_folds=3):
                kf = KFold(n_splits=cv_folds, shuffle=True, random_state=42)
                cv_scores = []
                
                for train_idx, val_idx in kf.split(X):
                    X_fold_train, X_fold_val = X[train_idx], X[val_idx]
                    y_fold_train, y_fold_val = y[train_idx], y[val_idx]
                    
                    # Create fresh model for each fold
                    if hasattr(model, 'get_params'):
                        fold_model = type(model)(**model.get_params())
                    else:
                        from sklearn.base import clone
                        fold_model = clone(model)
                    
                    fold_model.fit(X_fold_train, y_fold_train)
                    y_pred = fold_model.predict(X_fold_val)
                    cv_scores.append(calculate_mape(y_fold_val, y_pred))
                
                return cv_scores
            
            cv_scores = fixed_cv_test(model, X_small, y_small)
            cv_mean = np.mean(cv_scores)
            cv_std = np.std(cv_scores)
            
            print(f"      {model_name} CV: {cv_mean:.3f}% ± {cv_std:.3f}%")
            print(f"      Individual folds: {[f'{score:.3f}%' for score in cv_scores]}")
            
            # Check for reasonable results
            if cv_mean < 50 and cv_std < 10:
                print(f"      ✅ {model_name} CV results look reasonable")
            else:
                print(f"      ⚠️ {model_name} CV results may be problematic")
            
            cv_results[model_name] = {
                'mean': cv_mean,
                'std': cv_std,
                'scores': cv_scores
            }
            
        except Exception as e:
            print(f"      ❌ {model_name} CV failed: {str(e)[:100]}")
    
    return cv_results

# Test gradient boosting CV
gb_cv_results = test_gradient_boosting_cv()

# ==============================================================================
# 11. OPTUNA OPTIMIZATION TESTING
# ==============================================================================

print("\n" + "="*50)
print("🎯 OPTUNA OPTIMIZATION TESTING")
print("="*50)

try:
    import optuna
    OPTUNA_AVAILABLE = True
    print("✅ Optuna available for hyperparameter optimization")
except ImportError:
    OPTUNA_AVAILABLE = False
    print("❌ Optuna not available - install with: pip install optuna")

def test_optuna_optimization():
    """Test Optuna optimization with a simple model"""
    if not OPTUNA_AVAILABLE:
        print("⚠️ Skipping Optuna test - library not available")
        return None
    
    print("\n🔍 TESTING OPTUNA OPTIMIZATION:")
    
    # Use small dataset for speed
    X_small = X_train[:2000]
    y_small = y_train[:2000]
    X_val_small = X_val[:500]
    y_val_small = y_val[:500]
    
    def objective(trial):
        # Test with LightGBM if available, otherwise Random Forest
        if lgb_ok:
            model = lgb.LGBMRegressor(
                n_estimators=trial.suggest_int('n_estimators', 50, 200),
                learning_rate=trial.suggest_float('learning_rate', 0.01, 0.2),
                num_leaves=trial.suggest_int('num_leaves', 10, 100),
                random_state=42,
                verbose=-1
            )
        elif rf_ok:
            model = RandomForestRegressor(
                n_estimators=trial.suggest_int('n_estimators', 50, 200),
                max_depth=trial.suggest_int('max_depth', 5, 20),
                min_samples_split=trial.suggest_int('min_samples_split', 2, 10),
                random_state=42,
                n_jobs=1  # Single job for speed
            )
        else:
            return float('inf')  # No suitable model available
        
        try:
            model.fit(X_small, y_small)
            y_pred = model.predict(X_val_small)
            mape = calculate_mape(y_val_small, y_pred)
            return mape
        except Exception:
            return float('inf')
    
    try:
        # Create study with minimal trials for testing
        study = optuna.create_study(direction='minimize', sampler=optuna.samplers.TPESampler(seed=42))
        
        print("   Running 5 Optuna trials for testing...")
        study.optimize(objective, n_trials=5, timeout=60, show_progress_bar=False)
        
        print(f"   ✅ Optuna test completed")
        print(f"      Best MAPE: {study.best_value:.3f}%")
        print(f"      Best params: {study.best_params}")
        print(f"      Trials completed: {len(study.trials)}")
        
        # Check if optimization is working
        trial_values = [trial.value for trial in study.trials if trial.value is not None]
        if len(trial_values) > 1:
            improvement = max(trial_values) - min(trial_values)
            if improvement > 1.0:  # At least 1% MAPE improvement
                print(f"      ✅ Optimization working - {improvement:.2f}% MAPE range")
            else:
                print(f"      ⚠️ Limited optimization range - {improvement:.2f}% MAPE range")
        
        return study
        
    except Exception as e:
        print(f"   ❌ Optuna test failed: {str(e)[:100]}")
        return None

# Test Optuna
optuna_study = test_optuna_optimization()

def analyze_cv_stability():
    """Analyze CV stability across different models"""
    print("\n📊 CV STABILITY ANALYSIS:")
    
    if gb_cv_results:
        print("\n   CV Stability by Model:")
        for model_name, results in gb_cv_results.items():
            cv_coefficient = results['std'] / results['mean'] if results['mean'] > 0 else float('inf')
            
            print(f"      {model_name}:")
            print(f"         Mean: {results['mean']:.3f}%")
            print(f"         Std:  {results['std']:.3f}%")
            print(f"         CV:   {cv_coefficient:.3f}")
            
            if cv_coefficient < 0.1:
                print(f"         ✅ Very stable")
            elif cv_coefficient < 0.2:
                print(f"         ✅ Stable") 
            elif cv_coefficient < 0.3:
                print(f"         ⚠️ Moderate stability")
            else:
                print(f"         ❌ Unstable - high variance")

analyze_cv_stability()

# ==============================================================================
# 12. OPTIMIZATION READINESS ASSESSMENT
# ==============================================================================

print("\n" + "="*50)
print("🚀 OPTIMIZATION READINESS ASSESSMENT")
print("="*50)

optimization_readiness = []

# CV Implementation Check
if manual_cv is not None and sklearn_cv is not None:
    cv_consistent = abs(manual_cv - sklearn_cv) < 0.5
    optimization_readiness.append(("CV Implementation", cv_consistent))
else:
    optimization_readiness.append(("CV Implementation", False))

# Gradient Boosting CV Check
gb_cv_working = len(gb_cv_results) > 0 and all(r['mean'] < 50 for r in gb_cv_results.values())
optimization_readiness.append(("Gradient Boosting CV", gb_cv_working))

# Optuna Availability
optimization_readiness.append(("Optuna Available", OPTUNA_AVAILABLE))

# Optuna Functionality
optuna_working = optuna_study is not None and len(optuna_study.trials) > 0
optimization_readiness.append(("Optuna Working", optuna_working))

# Model Diversity
diverse_models = sum([lgb_ok, xgb_ok, cb_ok, rf_ok]) >= 3
optimization_readiness.append(("Model Diversity", diverse_models))

# Data Size Adequacy
adequate_data = X_train.shape[0] > 5000 and X_val.shape[0] > 1000
optimization_readiness.append(("Adequate Data Size", adequate_data))

print("📋 OPTIMIZATION READINESS CHECKLIST:")
print("="*45)

opt_passed = 0
opt_total = len(optimization_readiness)

for check_name, status in optimization_readiness:
    status_symbol = "✅" if status else "❌"
    print(f"   {status_symbol} {check_name}")
    if status:
        opt_passed += 1

opt_readiness_score = (opt_passed / opt_total) * 100
print(f"\n📊 OPTIMIZATION READINESS: {opt_readiness_score:.1f}% ({opt_passed}/{opt_total} checks passed)")

if opt_readiness_score >= 80:
    print("🎉 EXCELLENT - Ready for full hyperparameter optimization")
elif opt_readiness_score >= 60:
    print("✅ GOOD - Ready for optimization with minor limitations")
else:
    print("⚠️ LIMITED - Address key issues before optimization")

# Recommendations
print(f"\n💡 OPTIMIZATION RECOMMENDATIONS:")

if not OPTUNA_AVAILABLE:
    print("   • Install Optuna: pip install optuna")

if not gb_cv_working:
    print("   • Fix gradient boosting CV implementation")

if not cv_consistent:
    print("   • Debug CV implementation - results inconsistent")

if not diverse_models:
    print("   • Install more gradient boosting libraries for better optimization")

if opt_readiness_score >= 60:
    print("   • Start with LightGBM or XGBoost optimization")
    print("   • Use 50-100 Optuna trials for initial runs")
    print("   • Focus on n_estimators, learning_rate, max_depth parameters")

print()


🔄 CROSS-VALIDATION TESTING & VALIDATION
🔍 TESTING CROSS-VALIDATION IMPLEMENTATION:

   Testing CV with Ridge Regression...
      Manual Fold 1: 30.829% MAPE
      Manual Fold 2: 30.618% MAPE
      Manual Fold 3: 30.697% MAPE

   📊 CV Results Comparison:
      Manual CV:  30.715% ± 0.087%
      Sklearn CV: 30.715% ± 0.087%
      ✅ CV implementations consistent

🚀 GRADIENT BOOSTING CV TESTING:

   Testing LightGBM CV...
      LightGBM CV: 30.677% ± 0.676%
      Individual folds: ['30.865%', '31.395%', '29.770%']
      ✅ LightGBM CV results look reasonable

   Testing XGBoost CV...
      XGBoost CV: 31.762% ± 0.909%
      Individual folds: ['30.876%', '31.400%', '33.011%']
      ✅ XGBoost CV results look reasonable

   Testing CatBoost CV...
      CatBoost CV: 30.942% ± 1.516%
      Individual folds: ['33.014%', '30.384%', '29.428%']
      ✅ CatBoost CV results look reasonable

🎯 OPTUNA OPTIMIZATION TESTING


[I 2025-09-05 20:58:08,623] A new study created in memory with name: no-name-0e82c52e-8989-44c2-9c50-14a7cc801fca


✅ Optuna available for hyperparameter optimization

🔍 TESTING OPTUNA OPTIMIZATION:
   Running 5 Optuna trials for testing...


[I 2025-09-05 20:58:09,133] Trial 0 finished with value: 31.08011963517963 and parameters: {'n_estimators': 106, 'learning_rate': 0.19063571821788408, 'num_leaves': 76}. Best is trial 0 with value: 31.08011963517963.
[I 2025-09-05 20:58:09,359] Trial 1 finished with value: 29.93761089844685 and parameters: {'n_estimators': 140, 'learning_rate': 0.039643541684062936, 'num_leaves': 24}. Best is trial 1 with value: 29.93761089844685.
[I 2025-09-05 20:58:09,593] Trial 2 finished with value: 30.35769204127513 and parameters: {'n_estimators': 58, 'learning_rate': 0.1745734676972377, 'num_leaves': 64}. Best is trial 1 with value: 29.93761089844685.
[I 2025-09-05 20:58:10,248] Trial 3 finished with value: 29.9210115270211 and parameters: {'n_estimators': 156, 'learning_rate': 0.013911053916202464, 'num_leaves': 98}. Best is trial 3 with value: 29.9210115270211.
[I 2025-09-05 20:58:10,533] Trial 4 finished with value: 29.73229978552449 and parameters: {'n_estimators': 175, 'learning_rate': 0.05

   ✅ Optuna test completed
      Best MAPE: 29.732%
      Best params: {'n_estimators': 175, 'learning_rate': 0.05034443102887247, 'num_leaves': 26}
      Trials completed: 5
      ✅ Optimization working - 1.35% MAPE range

📊 CV STABILITY ANALYSIS:

   CV Stability by Model:
      LightGBM:
         Mean: 30.677%
         Std:  0.676%
         CV:   0.022
         ✅ Very stable
      XGBoost:
         Mean: 31.762%
         Std:  0.909%
         CV:   0.029
         ✅ Very stable
      CatBoost:
         Mean: 30.942%
         Std:  1.516%
         CV:   0.049
         ✅ Very stable

🚀 OPTIMIZATION READINESS ASSESSMENT
📋 OPTIMIZATION READINESS CHECKLIST:
   ✅ CV Implementation
   ✅ Gradient Boosting CV
   ✅ Optuna Available
   ✅ Optuna Working
   ✅ Model Diversity
   ✅ Adequate Data Size

📊 OPTIMIZATION READINESS: 100.0% (6/6 checks passed)
🎉 EXCELLENT - Ready for full hyperparameter optimization

💡 OPTIMIZATION RECOMMENDATIONS:
   • Start with LightGBM or XGBoost optimization
   • Use