# 📊 Simulation 3: Model + Kalibrasyon

DataFrame kalibrasyon ile model eğitimi.

In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

try:
    from risk_pipeline.pipeline16 import RiskModelPipeline, Config
except:
    import sys
    sys.path.append('..')
    from src.risk_pipeline.pipeline16 import RiskModelPipeline, Config

print("✅ Ready")

In [None]:
# Veri setlerini oluştur
np.random.seed(456)

def create_data(n, prefix, target_rate=0.15):
    return pd.DataFrame({
        'app_id': [f'{prefix}_{i:05d}' for i in range(n)],
        'app_dt': pd.date_range('2023-01-01', periods=n, freq='D'),
        'target': np.random.binomial(1, target_rate, n),
        'age': np.random.randint(18, 70, n),
        'income': np.random.lognormal(10, 0.5, n),
        'score': np.random.normal(650, 100, n).clip(300, 850),
        'debt': np.random.exponential(20000, n),
        'employment_years': np.random.exponential(5, n).clip(0, 30),
        'education': np.random.choice(['HS', 'BSc', 'MSc'], n),
        'marital': np.random.choice(['Single', 'Married', 'Other'], n),
        'region': np.random.choice(['North', 'South', 'East', 'West'], n)
    })

# Eğitim verisi
train_df = create_data(3000, 'TRAIN', 0.15)
print(f"Training: {train_df.shape}, Target rate: {train_df['target'].mean():.2%}")

# Kalibrasyon verisi (farklı dağılım)
cal_df = create_data(1000, 'CAL', 0.18)  # Daha yüksek default rate
print(f"Calibration: {cal_df.shape}, Target rate: {cal_df['target'].mean():.2%}")

In [None]:
# Kalibrasyon ile pipeline
cfg = Config(
    id_col='app_id',
    time_col='app_dt',
    target_col='target',
    
    # DataFrame kalibrasyon
    calibration_df=cal_df,  # DataFrame direkt
    calibration_method='isotonic',  # veya 'sigmoid'
    
    # Model settings
    cv_folds=3,
    hpo_timeout=60,
    hpo_n_trials=10,
    
    # Output
    output_folder='outputs_calibrated',
    output_excel_path='calibrated_model_report.xlsx'
)

print("Config with calibration ready")

In [None]:
# Pipeline çalıştır
pipeline = RiskModelPipeline(cfg)
pipeline.run(train_df)

print("\n" + "="*60)
print("✅ Pipeline Complete")
print("="*60)

In [None]:
# Kalibrasyon kontrolü
print("Calibration Check:")
print(f"Calibrator type: {type(pipeline.calibrator_).__name__ if pipeline.calibrator_ else 'None'}")

# Kalibrasyon öncesi/sonrası karşılaştırma için test
if pipeline.calibrator_ is not None:
    # Test verisi oluştur
    test_df = create_data(500, 'TEST', 0.20)
    
    # WOE dönüşümü uygula (basitleştirilmiş)
    # Gerçek uygulamada pipeline'ın WOE mapping'ini kullanın
    
    print("\n✅ Calibration applied successfully!")
    print("Calibrator will adjust model probabilities for better calibration.")

In [None]:
# Model sonuçları
print("\nModel Results:")
print(f"Best Model: {pipeline.best_model_name_}")
print(f"Final Features: {len(pipeline.final_vars_)}")

if pipeline.models_summary_ is not None:
    best = pipeline.models_summary_[pipeline.models_summary_['model'] == pipeline.best_model_name_].iloc[0]
    print(f"\nPerformance:")
    print(f"  AUC (OOT): {best.get('auc_oot', 'N/A')}")
    print(f"  Gini (OOT): {best.get('gini_oot', 'N/A')}")