# Experiment 1: Baseline Models with Class Weights

Training LightGBM, CatBoost, XGBoost with class weights (no SMOTE).

**Target Metrics:**
- F1 >= 0.90
- AUC-ROC >= 0.95
- FPR < 0.1%

In [1]:
import pickle
import numpy as np
import pandas as pd
import time
from sklearn.metrics import f1_score, roc_auc_score, precision_score, recall_score, confusion_matrix
from sklearn.metrics import precision_recall_curve, average_precision_score
import lightgbm as lgb
from catboost import CatBoostClassifier
import xgboost as xgb
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [2]:
# load data from notebook 01
with open('../data/processed_data.pkl', 'rb') as f:
    data = pickle.load(f)

X_train = data['X_train']
X_test = data['X_test']
y_train = data['y_train']
y_test = data['y_test']
feature_names = data['feature_names']
scale_pos = data['scale_pos_weight']

print(f"Train: {len(X_train):,}, Test: {len(X_test):,}")
print(f"Features: {len(feature_names)}")
print(f"Scale pos weight: {scale_pos:.1f}")

Train: 800,000, Test: 200,000
Features: 61
Scale pos weight: 330.7


In [3]:
def find_optimal_threshold(y_true, y_proba):
    """Find threshold that maximizes F1 score."""
    precision, recall, thresholds = precision_recall_curve(y_true, y_proba)
    f1_scores = 2 * (precision * recall) / (precision + recall + 1e-10)
    best_idx = np.argmax(f1_scores)
    return thresholds[best_idx] if best_idx < len(thresholds) else 0.5

def evaluate_model(y_true, y_proba, threshold=None):
    """Evaluate model with optimal or given threshold."""
    if threshold is None:
        threshold = find_optimal_threshold(y_true, y_proba)
    
    y_pred = (y_proba >= threshold).astype(int)
    
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    
    return {
        'threshold': threshold,
        'auc': roc_auc_score(y_true, y_proba),
        'pr_auc': average_precision_score(y_true, y_proba),
        'f1': f1_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred),
        'recall': recall_score(y_true, y_pred),
        'fpr': fp / (fp + tn),
        'tp': tp, 'fp': fp, 'tn': tn, 'fn': fn
    }

## 1. LightGBM

In [4]:
lgb_model = lgb.LGBMClassifier(
    n_estimators=500,
    num_leaves=128,
    learning_rate=0.03,
    scale_pos_weight=scale_pos,
    feature_fraction=0.7,
    bagging_fraction=0.7,
    bagging_freq=5,
    reg_alpha=0.1,
    reg_lambda=1.0,
    random_state=42,
    n_jobs=-1,
    verbose=-1
)

start = time.time()
lgb_model.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    callbacks=[lgb.early_stopping(50), lgb.log_evaluation(100)]
)
lgb_time = time.time() - start

lgb_proba = lgb_model.predict_proba(X_test)[:, 1]
lgb_results = evaluate_model(y_test, lgb_proba)

print(f"\nLightGBM ({lgb_time:.1f}s):")
print(f"  AUC: {lgb_results['auc']:.4f}")
print(f"  F1: {lgb_results['f1']:.4f}")
print(f"  Precision: {lgb_results['precision']:.4f}")
print(f"  Recall: {lgb_results['recall']:.4f}")
print(f"  FPR: {lgb_results['fpr']:.4%}")

Training until validation scores don't improve for 50 rounds
[100]	valid_0's binary_logloss: 0.0586014
[200]	valid_0's binary_logloss: 0.0234222
[300]	valid_0's binary_logloss: 0.0126453
[400]	valid_0's binary_logloss: 0.0087105
[500]	valid_0's binary_logloss: 0.00730667
Did not meet early stopping. Best iteration is:
[500]	valid_0's binary_logloss: 0.00730667

LightGBM (13.6s):
  AUC: 0.9425
  F1: 0.8086
  Precision: 0.9559
  Recall: 0.7007
  FPR: 0.0095%


## 2. CatBoost

In [5]:
cat_model = CatBoostClassifier(
    iterations=500,
    learning_rate=0.03,
    depth=8,
    l2_leaf_reg=3,
    class_weights=[1, scale_pos],
    random_seed=42,
    verbose=100,
    early_stopping_rounds=50
)

start = time.time()
cat_model.fit(X_train, y_train, eval_set=[(X_test, y_test)])
cat_time = time.time() - start

cat_proba = cat_model.predict_proba(X_test)[:, 1]
cat_results = evaluate_model(y_test, cat_proba)

print(f"\nCatBoost ({cat_time:.1f}s):")
print(f"  AUC: {cat_results['auc']:.4f}")
print(f"  F1: {cat_results['f1']:.4f}")
print(f"  Precision: {cat_results['precision']:.4f}")
print(f"  Recall: {cat_results['recall']:.4f}")
print(f"  FPR: {cat_results['fpr']:.4%}")

0:	learn: 0.6635900	test: 0.6643687	best: 0.6643687 (0)	total: 84ms	remaining: 41.9s
100:	learn: 0.2299621	test: 0.2781862	best: 0.2780603 (99)	total: 3.1s	remaining: 12.2s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.2713556063
bestIteration = 142

Shrink model to first 143 iterations.

CatBoost (6.1s):
  AUC: 0.9570
  F1: 0.8689
  Precision: 0.9934
  Recall: 0.7721
  FPR: 0.0015%


## 3. XGBoost

In [6]:
xgb_model = xgb.XGBClassifier(
    n_estimators=500,
    max_depth=8,
    learning_rate=0.03,
    scale_pos_weight=scale_pos,
    subsample=0.8,
    colsample_bytree=0.7,
    reg_alpha=0.1,
    reg_lambda=1.0,
    random_state=42,
    n_jobs=-1,
    eval_metric='auc',
    early_stopping_rounds=50
)

start = time.time()
xgb_model.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=100)
xgb_time = time.time() - start

xgb_proba = xgb_model.predict_proba(X_test)[:, 1]
xgb_results = evaluate_model(y_test, xgb_proba)

print(f"\nXGBoost ({xgb_time:.1f}s):")
print(f"  AUC: {xgb_results['auc']:.4f}")
print(f"  F1: {xgb_results['f1']:.4f}")
print(f"  Precision: {xgb_results['precision']:.4f}")
print(f"  Recall: {xgb_results['recall']:.4f}")
print(f"  FPR: {xgb_results['fpr']:.4%}")

[0]	validation_0-auc:0.87032
[100]	validation_0-auc:0.96099
[200]	validation_0-auc:0.96223
[271]	validation_0-auc:0.96147

XGBoost (11.6s):
  AUC: 0.9626
  F1: 0.8755
  Precision: 0.9831
  Recall: 0.7891
  FPR: 0.0040%


## 4. Comparison

In [7]:
results = {
    'LightGBM': lgb_results,
    'CatBoost': cat_results,
    'XGBoost': xgb_results
}

comparison = pd.DataFrame({
    name: {
        'AUC-ROC': r['auc'],
        'PR-AUC': r['pr_auc'],
        'F1': r['f1'],
        'Precision': r['precision'],
        'Recall': r['recall'],
        'FPR': r['fpr'],
        'Threshold': r['threshold']
    }
    for name, r in results.items()
}).T

print("\n" + "="*60)
print("BASELINE RESULTS")
print("="*60)
print(comparison.round(4))

best = comparison['F1'].idxmax()
print(f"\nBest model: {best} (F1 = {comparison.loc[best, 'F1']:.4f})")


BASELINE RESULTS
          AUC-ROC  PR-AUC      F1  Precision  Recall     FPR  Threshold
LightGBM   0.9425  0.7430  0.8086     0.9559  0.7007  0.0001     0.3463
CatBoost   0.9570  0.8155  0.8689     0.9934  0.7721  0.0000     0.5340
XGBoost    0.9626  0.8269  0.8755     0.9831  0.7891  0.0000     0.4703

Best model: XGBoost (F1 = 0.8755)


In [8]:
# feature importance
importance = cat_model.get_feature_importance()
feat_imp = pd.DataFrame({
    'feature': feature_names,
    'importance': importance
}).sort_values('importance', ascending=False)

print("\nTop 15 Features:")
print(feat_imp.head(15).to_string(index=False))


Top 15 Features:
                      feature  importance
                       amount   18.772846
         amount_vs_mean_ratio   18.752379
                   amount_log   14.883606
               amount_sum_24h   13.653489
                is_high_value    3.332770
        ratio_to_customer_max    2.489376
               velocity_score    2.200832
                        month    1.673035
            amount_mean_total    1.485194
              channel_encoded    1.370542
              amount_change_1    1.207332
amount_deviation_from_rolling    1.106391
                    month_cos    1.010487
                    month_sin    0.997927
                amount_std_7d    0.961910


In [9]:
# save models and results
baseline_data = {
    'models': {
        'lightgbm': lgb_model,
        'catboost': cat_model,
        'xgboost': xgb_model
    },
    'predictions': {
        'lightgbm': lgb_proba,
        'catboost': cat_proba,
        'xgboost': xgb_proba
    },
    'results': results,
    'best_model': best
}

with open('../models/baseline_models.pkl', 'wb') as f:
    pickle.dump(baseline_data, f)

# also save CatBoost separately
cat_model.save_model('../models/catboost_baseline.cbm')

print("Models saved to ../models/")

Models saved to ../models/
