# Risk Model Pipeline - Notebook Usage Examples

Bu notebook risk-model-pipeline paketinin farklı kullanım senaryolarını gösterir.

## 1. Paketi Yükleme

In [None]:
# Paketi GitHub'dan yükleme
!pip install git+https://github.com/selimoksuz/risk-model-pipeline.git

# veya local olarak clone'ladıysanız
# !pip install -e /path/to/risk-model-pipeline

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import joblib
import json

# Pipeline imports
from risk_pipeline.pipeline16 import Config, RiskModelPipeline
from risk_pipeline.utils.scoring import load_model_artifacts, score_data
from risk_pipeline.utils.pipeline_runner import run_pipeline_from_dataframe, get_full_config

## 2. Senaryo 1: Sadece Model Eğitimi (Kalibrasyon Yok, Skorlama Yok)

In [None]:
# DataFrame hazırlama (sizin verileriniz olacak)
train_df = pd.DataFrame({
    'app_id': range(1000),
    'app_dt': pd.date_range('2024-01-01', periods=1000, freq='D'),
    'target': np.random.binomial(1, 0.2, 1000),  # %20 default rate
    'age': np.random.randint(18, 70, 1000),
    'income': np.random.lognormal(10, 0.5, 1000),
    'credit_score': np.random.randint(300, 850, 1000),
    'region': np.random.choice(['A', 'B', 'C'], 1000)
})

print(f"Training data shape: {train_df.shape}")
print(f"Default rate: {train_df['target'].mean():.2%}")

In [None]:
# Model eğitimi - SADECE MODEL
config = Config(
    id_col="app_id",
    time_col="app_dt",
    target_col="target",
    
    # Performans ayarları
    hpo_trials=5,  # Hızlı test için düşük
    hpo_timeout_sec=30,
    
    # Kalibrasyon YOK
    calibration_data_path=None,
    
    random_state=42
)

# Pipeline çalıştır
pipeline = RiskModelPipeline(config)
results = pipeline.run(train_df)

print(f"\nBest Model: {pipeline.best_model_name_}")
print(f"Final Features: {pipeline.final_vars_}")
print(f"Model AUC: {results.get('best_auc', 'N/A')}")

In [None]:
# Modeli kaydet
model_path = f"model_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pkl"
joblib.dump(pipeline.models_[pipeline.best_model_name_], model_path)
print(f"Model saved: {model_path}")

## 3. Senaryo 2: Model + Kalibrasyon (Skorlama Yok)

In [None]:
# Kalibrasyon verisi hazırlama
calibration_df = pd.DataFrame({
    'app_id': range(2000, 2500),
    'app_dt': pd.date_range('2024-06-01', periods=500, freq='D'),
    'target': np.random.binomial(1, 0.25, 500),
    'age': np.random.randint(18, 70, 500),
    'income': np.random.lognormal(10, 0.5, 500),
    'credit_score': np.random.randint(300, 850, 500),
    'region': np.random.choice(['A', 'B', 'C'], 500)
})

# Kalibrasyon verisini kaydet
calibration_df.to_csv('calibration_data.csv', index=False)
print(f"Calibration data: {calibration_df.shape}")

In [None]:
# Model + Kalibrasyon
config_with_cal = Config(
    id_col="app_id",
    time_col="app_dt",
    target_col="target",
    
    # Kalibrasyon AÇIK
    calibration_data_path="calibration_data.csv",
    calibration_method="isotonic",  # veya "sigmoid"
    
    hpo_trials=5,
    hpo_timeout_sec=30,
    random_state=42
)

pipeline_cal = RiskModelPipeline(config_with_cal)
results_cal = pipeline_cal.run(train_df)

print(f"\nModel with calibration trained!")
print(f"Calibrator: {type(pipeline_cal.calibrator_) if hasattr(pipeline_cal, 'calibrator_') else 'None'}")

## 4. Senaryo 3: Sadece Skorlama (Önceden Eğitilmiş Model)

In [None]:
# Skorlama verisi hazırlama
scoring_df = pd.DataFrame({
    'app_id': range(3000, 4000),
    'app_dt': pd.date_range('2024-08-01', periods=1000, freq='D'),
    'target': [np.nan] * 600 + list(np.random.binomial(1, 0.3, 400)),  # %60'ı targetsız
    'age': np.random.randint(18, 70, 1000),
    'income': np.random.lognormal(10, 0.5, 1000),
    'credit_score': np.random.randint(300, 850, 1000),
    'region': np.random.choice(['A', 'B', 'C'], 1000)
})

print(f"Scoring data: {scoring_df.shape}")
print(f"With target: {(~scoring_df['target'].isna()).sum()}")
print(f"Without target: {scoring_df['target'].isna().sum()}")

In [None]:
# Model artifacts'ları yükle
run_id = results['run_id']  # Önceki eğitimden
output_folder = 'outputs'

model, final_features, woe_mapping, calibrator = load_model_artifacts(output_folder, run_id)

print(f"Model loaded: {type(model).__name__}")
print(f"Calibrator: {'Available' if calibrator else 'Not available'}")

In [None]:
# Skorlama yap
from risk_pipeline.utils.scoring import apply_woe_transform

# Training scores for PSI (opsiyonel)
train_woe = apply_woe_transform(train_df, woe_mapping)
# ... feature mapping logic ...
training_scores = None  # PSI istemiyorsanız None bırakabilirsiniz

# Skorlama
scoring_results = score_data(
    scoring_df=scoring_df,
    model=model,
    final_features=final_features,
    woe_mapping=woe_mapping,
    calibrator=calibrator,  # Varsa kullanılır
    training_scores=training_scores,  # PSI için
    feature_mapping=None
)

print(f"\nScoring Results:")
print(f"Total scored: {scoring_results['n_total']}")
print(f"With target: {scoring_results['n_with_target']}")
print(f"Without target: {scoring_results['n_without_target']}")

if 'with_target' in scoring_results:
    print(f"\nPerformance (with target):")
    print(f"  AUC: {scoring_results['with_target']['auc']:.3f}")
    print(f"  Gini: {scoring_results['with_target']['gini']:.3f}")

## 5. Senaryo 4: Sonradan Kalibrasyon Ekleme

In [None]:
# Önceden eğitilmiş model var, şimdi kalibrasyon ekleyelim
from sklearn.isotonic import IsotonicRegression
from sklearn.calibration import CalibratedClassifierCV

# Kalibrasyon verisi hazırla
cal_df = pd.DataFrame({
    'app_id': range(5000, 5300),
    'app_dt': pd.date_range('2024-09-01', periods=300, freq='D'),
    'target': np.random.binomial(1, 0.22, 300),
    'age': np.random.randint(18, 70, 300),
    'income': np.random.lognormal(10, 0.5, 300),
    'credit_score': np.random.randint(300, 850, 300),
    'region': np.random.choice(['A', 'B', 'C'], 300)
})

# WOE dönüşümü uygula
cal_woe = apply_woe_transform(cal_df, woe_mapping)
X_cal = cal_woe[final_features] if isinstance(final_features, list) else cal_woe
y_cal = cal_df['target'].values

# Model tahminleri al
raw_scores = model.predict_proba(X_cal)[:, 1] if hasattr(model, 'predict_proba') else model.predict(X_cal)

# Kalibratör eğit
calibrator = IsotonicRegression(out_of_bounds='clip')
calibrator.fit(raw_scores.reshape(-1, 1), y_cal)

# Kalibratörü kaydet
joblib.dump(calibrator, f'calibrator_{run_id}.pkl')
print("Calibrator trained and saved!")

## 6. Senaryo 5: Tümleşik Kullanım (Helper Function ile)

In [None]:
# En kolay kullanım: helper function
from risk_pipeline.utils.pipeline_runner import run_pipeline_from_dataframe

# Tek fonksiyonda her şey
results = run_pipeline_from_dataframe(
    df=train_df,
    id_col="app_id",
    time_col="app_dt",
    target_col="target",
    
    # Opsiyonel parametreler
    calibration_data_path="calibration_data.csv",  # İsterseniz
    scoring_df=scoring_df,  # İsterseniz
    
    # Performans
    hpo_trials=10,
    hpo_timeout_sec=60,
    
    # Çıktılar
    output_folder="my_outputs",
    output_excel="my_report.xlsx"
)

print(f"\nPipeline Results:")
print(f"Best Model: {results['best_model']}")
print(f"Final Features: {results['final_features']}")
print(f"Files saved to: {results['output_folder']}")

## 7. Özel Kullanım: Kendi Modelinizi Entegre Etme

In [None]:
# Kendi modelinizi kullanmak isterseniz
from sklearn.ensemble import GradientBoostingClassifier

# Pipeline'dan WOE dönüşümlerini al
woe_mapping = pipeline.woe_map
final_features = pipeline.final_vars_

# WOE dönüşümü uygula
from risk_pipeline.stages.woe import apply_woe
train_woe = apply_woe(train_df, woe_mapping)
X_train = train_woe[final_features]
y_train = train_df['target']

# Kendi modelinizi eğitin
my_model = GradientBoostingClassifier(n_estimators=100, random_state=42)
my_model.fit(X_train, y_train)

print("Custom model trained!")

# Skorlama için kullanabilirsiniz
scoring_results = score_data(
    scoring_df=scoring_df,
    model=my_model,  # Kendi modeliniz
    final_features=final_features,
    woe_mapping=woe_mapping,
    calibrator=None,  # İsterseniz ekleyin
    training_scores=None
)

## 8. Batch Scoring Örneği

In [None]:
# Büyük veri için batch scoring
def batch_score(df, model, features, woe_mapping, batch_size=1000):
    """Score large datasets in batches"""
    all_scores = []
    
    for i in range(0, len(df), batch_size):
        batch = df.iloc[i:i+batch_size]
        
        # Score batch
        batch_results = score_data(
            scoring_df=batch,
            model=model,
            final_features=features,
            woe_mapping=woe_mapping,
            calibrator=None,
            training_scores=None
        )
        
        all_scores.extend(batch_results['scores'])
        print(f"Batch {i//batch_size + 1}: {len(batch)} records scored")
    
    return np.array(all_scores)

# Kullanım
large_df = pd.concat([scoring_df] * 10)  # 10,000 rows
scores = batch_score(large_df, model, final_features, woe_mapping)
print(f"\nTotal scored: {len(scores)}")

## 9. Model Performans Monitoring

In [None]:
# Zaman içinde PSI takibi
def monitor_psi(model, features, woe_mapping, training_scores, new_data_list):
    """Monitor PSI over time"""
    psi_history = []
    
    for period, new_df in enumerate(new_data_list):
        results = score_data(
            scoring_df=new_df,
            model=model,
            final_features=features,
            woe_mapping=woe_mapping,
            calibrator=None,
            training_scores=training_scores
        )
        
        psi = results.get('psi_score', None)
        if psi:
            psi_history.append({
                'period': period,
                'psi': psi,
                'status': 'Stable' if psi < 0.1 else 'Drift' if psi < 0.25 else 'Significant Drift'
            })
    
    return pd.DataFrame(psi_history)

# Simülasyon
monthly_data = [scoring_df.sample(100) for _ in range(6)]  # 6 aylık veri
psi_df = monitor_psi(model, final_features, woe_mapping, training_scores, monthly_data)
print(psi_df)