# 🎯 Simulation 4: Sadece Skorlama

Önceden eğitilmiş model ile skorlama yapma.

In [None]:
import pandas as pd
import numpy as np
import joblib
import json
import warnings
warnings.filterwarnings('ignore')

try:
    from risk_pipeline.utils.scoring import score_data, load_model_artifacts
except:
    import sys
    sys.path.append('..')
    from src.risk_pipeline.utils.scoring import score_data, load_model_artifacts

print("✅ Ready for scoring")

In [None]:
# Önce bir model eğitelim (demo için)
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Demo veri
np.random.seed(789)
X = np.random.randn(1000, 5)
y = (X[:, 0] + X[:, 1] * 0.5 + np.random.randn(1000) * 0.5 > 0).astype(int)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model eğit
model = RandomForestClassifier(n_estimators=10, random_state=42)
model.fit(X_train, y_train)

# Model kaydet
import os
os.makedirs('outputs_scoring', exist_ok=True)
joblib.dump(model, 'outputs_scoring/demo_model.pkl')

# Feature listesi
features = [f'feature_{i}' for i in range(5)]
with open('outputs_scoring/features.json', 'w') as f:
    json.dump(features, f)

print(f"Demo model trained and saved")
print(f"Accuracy: {model.score(X_test, y_test):.3f}")

In [None]:
# Skorlama verisi oluştur (bazıları target'sız)
n_score = 2000

scoring_df = pd.DataFrame({
    'app_id': [f'SCORE_{i:05d}' for i in range(n_score)],
    'app_dt': pd.date_range('2024-01-01', periods=n_score, freq='D'),
    'target': [np.nan] * 1200 + list(np.random.binomial(1, 0.25, 800)),  # %60 target'sız
    'feature_0': np.random.randn(n_score),
    'feature_1': np.random.randn(n_score),
    'feature_2': np.random.randn(n_score),
    'feature_3': np.random.randn(n_score),
    'feature_4': np.random.randn(n_score)
})

print(f"Scoring data: {scoring_df.shape}")
print(f"With target: {(~scoring_df['target'].isna()).sum()}")
print(f"Without target: {scoring_df['target'].isna().sum()}")

In [None]:
# Model ve features yükle
model = joblib.load('outputs_scoring/demo_model.pkl')
with open('outputs_scoring/features.json', 'r') as f:
    features = json.load(f)

print(f"Model loaded: {type(model).__name__}")
print(f"Features: {features}")

In [None]:
# Basit skorlama (WOE mapping olmadan)
X_score = scoring_df[features].values
scores = model.predict_proba(X_score)[:, 1]

# Sonuçları DataFrame'e ekle
scoring_df['score'] = scores
scoring_df['risk_band'] = pd.qcut(scores, 10, labels=False) + 1

print("\nScoring Complete!")
print(f"Score range: [{scores.min():.3f}, {scores.max():.3f}]")
print(f"Mean score: {scores.mean():.3f}")

In [None]:
# Target'lı kayıtlar için performans
from sklearn.metrics import roc_auc_score, classification_report

has_target = ~scoring_df['target'].isna()
if has_target.sum() > 0:
    y_true = scoring_df.loc[has_target, 'target']
    y_score = scoring_df.loc[has_target, 'score']
    
    auc = roc_auc_score(y_true, y_score)
    gini = 2 * auc - 1
    
    print(f"\nPerformance (on {has_target.sum()} records with target):")
    print(f"AUC: {auc:.4f}")
    print(f"Gini: {gini:.4f}")

In [None]:
# Risk band analizi
print("\nRisk Band Analysis:")
print("="*60)

# Target'lı kayıtlar için
with_target_df = scoring_df[has_target].copy()
band_analysis = with_target_df.groupby('risk_band').agg({
    'target': ['count', 'sum', 'mean'],
    'score': ['min', 'max', 'mean']
})

band_analysis.columns = ['Count', 'Defaults', 'Default_Rate', 'Min_Score', 'Max_Score', 'Avg_Score']
print(band_analysis)

# Monotonicity check
default_rates = band_analysis['Default_Rate'].values
is_monotonic = all(default_rates[i] <= default_rates[i+1] for i in range(len(default_rates)-1))
print(f"\nMonotonic trend: {'✅ Yes' if is_monotonic else '❌ No'}")

In [None]:
# Skorları kaydet
output_file = 'outputs_scoring/scored_data.csv'
scoring_df.to_csv(output_file, index=False)
print(f"\n✅ Scored data saved to: {output_file}")
print(f"Total records: {len(scoring_df):,}")