# Training Models

In [None]:
import pandas as pd
import numpy as np
import joblib
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, roc_auc_score, roc_curve, precision_recall_curve
import wandb
from wandb.sklearn import plot_classifier, plot_roc, plot_precision_recall


In [None]:
# Initialize Weights & Biases
wandb.init(project="fraud_detection_pipeline", name="baseline_vs_tree_models")

# Load data
DATA_PATH = "../data/train_eval_transactions.csv"
df_rest = pd.read_csv(DATA_PATH)

# Feature selection
FEATURES = [
    "log_amount", "is_high_value", "is_night", "cvv_mismatch",
    "country_mismatch", "card_not_present", "high_risk_category"
]
TARGET = "isFraud"

X = df_rest[FEATURES]
y = df_rest[TARGET]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)

# Models
models = {
    "LogisticRegression": LogisticRegression(class_weight="balanced", max_iter=1000),
    "RandomForest": RandomForestClassifier(n_estimators=100, class_weight="balanced", random_state=42),
    "XGBoost": XGBClassifier(scale_pos_weight=(len(y_train[y_train==0]) / len(y_train[y_train==1])),
                               use_label_encoder=False, eval_metric="logloss", random_state=42)
}

# Training & Evaluation
for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    proba = model.predict_proba(X_test)[:, 1]

    # Metrics
    auc = roc_auc_score(y_test, proba)
    report = classification_report(y_test, preds, output_dict=True)

    wandb.log({
        f"{name}/ROC_AUC": auc,
        f"{name}/Precision": report["1"]["precision"],
        f"{name}/Recall": report["1"]["recall"],
        f"{name}/F1": report["1"]["f1-score"]
    })

    # Save model
    save_path = f"../models/saved_models/{name}.pkl"
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    joblib.dump(model, save_path)
    wandb.save(save_path)

    # Plot ROC curve
    fpr, tpr, _ = roc_curve(y_test, proba)
    plt.figure()
    plt.plot(fpr, tpr, label=f"{name} (AUC = {auc:.2f})")
    plt.plot([0, 1], [0, 1], linestyle="--", color="gray")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title(f"ROC Curve - {name}")
    plt.legend()
    roc_plot_path = f"../models/plots/roc_{name}.png"
    os.makedirs(os.path.dirname(roc_plot_path), exist_ok=True)
    plt.savefig(roc_plot_path)
    wandb.log({f"{name}/ROC Curve": wandb.Image(roc_plot_path)})
    plt.close()

wandb.finish()