# Q2 Credit Fraud: Traditional vs Deep Models

This notebook compares classical machine learning models against neural networks on the credit card fraud dataset using shared preprocessing profiles and persists trained neural weights under `assets/snapshots`.

In [1]:
from pathlib import Path
import os, sys
sys.path.append(str(Path(os.getcwd()).parent))

In [2]:
from __future__ import annotations

import json
import os
from pathlib import Path
from typing import Dict, Mapping, Sequence

import numpy as np
import pandas as pd
import torch
from sklearn.metrics import accuracy_score, f1_score
from tqdm.auto import tqdm

from data_processings import (
    apply_base_preprocessing,
    apply_feature_sets,
    apply_post_split_transforms,
    append_target,
    get_experiment_config,
    get_preprocessing_config,
    load_config,
    load_credit_card_data,
    select_feature_columns,
)
from models import build_model
from models.neural import build_dataloader, build_neural_model, predict_proba, train_neural_model

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if DEVICE.type == "cuda":
    torch.backends.cudnn.benchmark = True

ROOT = Path(os.getcwd()).parent
SNAPSHOT_DIR = ROOT / "assets" / "snapshots"
SNAPSHOT_DIR.mkdir(parents=True, exist_ok=True)

CONFIG = load_config()
EXPERIMENT_KEY = "q2_credit_fraud"
EXPERIMENT_CFG = get_experiment_config(EXPERIMENT_KEY)
DATASET_KEY = EXPERIMENT_CFG["dataset"]
PREPROCESSING_CFG = get_preprocessing_config(DATASET_KEY)
DEEP_MODELS_CFG = CONFIG.get("deep_models", {})

FEATURE_SETS_FIXED = tuple(EXPERIMENT_CFG.get("feature_sets_fixed") or ["baseline"])
CLASSICAL_MODELS = tuple(EXPERIMENT_CFG.get("models", []))
NEURAL_MODELS = tuple(EXPERIMENT_CFG.get("neural_models", []))
SPLIT_CFG = EXPERIMENT_CFG.get("split", {})
METRICS = tuple(EXPERIMENT_CFG.get("metrics", ["accuracy"]))

SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

PROFILE_DATA_CACHE: Dict[str, Dict[str, object]] = {}


def load_dataset() -> pd.DataFrame:
    options = EXPERIMENT_CFG.get("dataset_options", {})
    return load_credit_card_data(
        parse_dates=options.get("parse_dates", False),
        limit_rows=options.get("limit_rows"),
    )


def sanitize_features_target(features: pd.DataFrame, target: pd.Series) -> tuple[pd.DataFrame, pd.Series]:
    target_name = target.name or "target"
    combined = pd.concat([features, target.rename(target_name)], axis=1)
    combined = combined.replace([np.inf, -np.inf], np.nan).dropna(axis=0)
    cleaned_target = combined[target_name].astype(int)
    cleaned_features = combined.drop(columns=[target_name]).astype(np.float32)
    return cleaned_features, cleaned_target


def split_time_series_frame(
    df: pd.DataFrame,
    split_cfg: Mapping[str, object],
) -> tuple[pd.DataFrame, pd.DataFrame]:
    method = split_cfg.get("method", "time")
    if method != "time":
        raise ValueError(f"Unsupported split method: {method}")
    test_size = float(split_cfg.get("test_size", 0.2))
    split_idx = int(len(df) * (1 - test_size))
    split_idx = max(1, min(split_idx, len(df) - 1))
    return df.iloc[:split_idx].copy(), df.iloc[split_idx:].copy()


def prepare_labelled_frame(
    base_df: pd.DataFrame,
    feature_sets: Sequence[str],
    profile_config: Mapping[str, object],
) -> pd.DataFrame:
    enriched = apply_feature_sets(
        base_df,
        DATASET_KEY,
        feature_sets,
        config_override=profile_config,
    )
    labelled = append_target(
        enriched,
        DATASET_KEY,
        config_override=profile_config,
    )
    return labelled.replace([np.inf, -np.inf], np.nan).dropna(axis=0)


def prepare_profile_data(raw_df: pd.DataFrame, profile_name: str) -> Dict[str, object] | None:
    if profile_name in PROFILE_DATA_CACHE:
        return PROFILE_DATA_CACHE[profile_name]

    base_df, profile_config = apply_base_preprocessing(
        raw_df,
        DATASET_KEY,
        profile_name=profile_name,
    )
    labelled = prepare_labelled_frame(base_df, FEATURE_SETS_FIXED, profile_config)
    if len(labelled) < 50:
        PROFILE_DATA_CACHE[profile_name] = None
        return None

    train_df, test_df = split_time_series_frame(labelled, SPLIT_CFG)
    train_df, test_df = apply_post_split_transforms(train_df, test_df, profile_config)

    X_train, y_train = select_feature_columns(train_df, DATASET_KEY, config_override=profile_config)
    X_test, y_test = select_feature_columns(test_df, DATASET_KEY, config_override=profile_config)

    X_train, y_train = sanitize_features_target(X_train, y_train)
    X_test, y_test = sanitize_features_target(X_test, y_test)

    if len(X_train) < 50 or len(X_test) < 25:
        PROFILE_DATA_CACHE[profile_name] = None
        return None

    X_test = X_test.reindex(columns=X_train.columns, fill_value=0.0)

    entry = {
        "profile_config": profile_config,
        "train_df": train_df,
        "test_df": test_df,
        "X_train": X_train,
        "y_train": y_train,
        "X_test": X_test,
        "y_test": y_test,
    }
    PROFILE_DATA_CACHE[profile_name] = entry
    return entry


def evaluate_classical_models(data_entry: Dict[str, object], profile_name: str) -> list[dict[str, object]]:
    if not CLASSICAL_MODELS:
        return []

    X_train: pd.DataFrame = data_entry["X_train"]
    y_train: pd.Series = data_entry["y_train"]
    X_test: pd.DataFrame = data_entry["X_test"]
    y_test: pd.Series = data_entry["y_test"]

    records: list[dict[str, object]] = []
    X_train_np = X_train.to_numpy(dtype=np.float64)
    y_train_np = y_train.to_numpy()
    X_test_np = X_test.to_numpy(dtype=np.float64)
    y_test_np = y_test.to_numpy()

    for model_key in CLASSICAL_MODELS:
        model = build_model(model_key)
        model.fit(X_train_np, y_train_np)
        preds = model.predict(X_test_np)

        result = {
            "preproc_profile": profile_name,
            "model": model_key,
            "model_type": "traditional",
            "train_samples": len(X_train_np),
            "test_samples": len(X_test_np),
            "num_features": X_train.shape[1],
        }
        if "accuracy" in METRICS:
            result["accuracy"] = accuracy_score(y_test_np, preds)
        if "f1" in METRICS:
            result["f1"] = f1_score(y_test_np, preds, zero_division=0)
        records.append(result)

    return records


def stratified_class_weights(labels: pd.Series) -> torch.Tensor:
    classes, counts = np.unique(labels, return_counts=True)
    total = counts.sum()
    weights = total / (len(classes) * counts)
    return torch.tensor(weights, dtype=torch.float32)


def train_neural_profiles(raw_df: pd.DataFrame, overwrite: bool = False) -> list[dict[str, object]]:
    logs: list[dict[str, object]] = []
    profile_names = PREPROCESSING_CFG.get(EXPERIMENT_CFG.get("ablation_sets_key")) or []

    for profile_name in profile_names:
        data_entry = prepare_profile_data(raw_df, profile_name)
        if not data_entry:
            continue

        X_train: pd.DataFrame = data_entry["X_train"]
        y_train: pd.Series = data_entry["y_train"]

        input_dim = X_train.shape[1]
        num_classes = int(y_train.nunique())

        train_tensor = torch.from_numpy(X_train.to_numpy(dtype=np.float32))
        label_tensor = torch.from_numpy(y_train.to_numpy(dtype=np.int64))

        val_fraction = 0.2
        split_idx = max(1, int(len(train_tensor) * (1 - val_fraction)))
        if split_idx >= len(train_tensor):
            split_idx = len(train_tensor) - 1
        train_features = train_tensor[:split_idx]
        train_labels = label_tensor[:split_idx]
        val_features = train_tensor[split_idx:]
        val_labels = label_tensor[split_idx:]

        if len(val_features) == 0:
            val_features = train_features.clone()
            val_labels = train_labels.clone()

        class_weights = stratified_class_weights(y_train)
        pin_memory = DEVICE.type == "cuda"

        for model_key in NEURAL_MODELS:
            snapshot_path = SNAPSHOT_DIR / f"{profile_name}_{model_key}.pth"
            history_path = SNAPSHOT_DIR / f"{profile_name}_{model_key}_history.json"
            if snapshot_path.exists() and not overwrite:
                continue

            model, training_cfg = build_neural_model(model_key, input_dim, DEEP_MODELS_CFG, num_classes=num_classes)
            train_loader = build_dataloader(train_features, train_labels, training_cfg.batch_size, shuffle=True, pin_memory=pin_memory)
            val_loader = build_dataloader(val_features, val_labels, training_cfg.batch_size, shuffle=False, pin_memory=pin_memory)

            history = train_neural_model(
                model,
                train_loader,
                val_loader,
                epochs=training_cfg.epochs,
                learning_rate=training_cfg.learning_rate,
                class_weights=class_weights,
                device=DEVICE,
                progress_label=f"Train {profile_name}:{model_key}",
                save_path=snapshot_path,
                metadata={
                    "model_key": model_key,
                    "profile": profile_name,
                    "input_dim": input_dim,
                    "num_classes": num_classes,
                },
            )

            with history_path.open("w", encoding="utf-8") as history_file:
                json.dump(history, history_file, indent=2)

            logs.append(
                {
                    "profile": profile_name,
                    "model": model_key,
                    "epochs": training_cfg.epochs,
                    "snapshot": snapshot_path.name,
                }
            )

            if DEVICE.type == "cuda":
                torch.cuda.empty_cache()

    return logs


def evaluate_neural_models(data_entry: Dict[str, object], profile_name: str) -> list[dict[str, object]]:
    if not NEURAL_MODELS:
        return []

    X_test: pd.DataFrame = data_entry["X_test"]
    y_test: pd.Series = data_entry["y_test"]
    input_dim = X_test.shape[1]
    num_classes = int(data_entry["y_train"].nunique())

    X_test_tensor = torch.from_numpy(X_test.to_numpy(dtype=np.float32))
    y_test_tensor = torch.from_numpy(y_test.to_numpy(dtype=np.int64))
    pin_memory = DEVICE.type == "cuda"
    test_loader = build_dataloader(X_test_tensor, y_test_tensor, batch_size=1024, shuffle=False, pin_memory=pin_memory)

    records: list[dict[str, object]] = []
    for model_key in NEURAL_MODELS:
        snapshot_path = SNAPSHOT_DIR / f"{profile_name}_{model_key}.pth"
        if not snapshot_path.exists():
            continue

        payload = torch.load(snapshot_path, map_location=DEVICE)
        metadata = payload.get("metadata", {})
        input_dim_meta = int(metadata.get("input_dim", input_dim))
        num_classes_meta = int(metadata.get("num_classes", num_classes))

        model, _ = build_neural_model(model_key, input_dim_meta, DEEP_MODELS_CFG, num_classes=num_classes_meta)
        model.load_state_dict(payload["state_dict"])
        model = model.to(DEVICE)

        probs = predict_proba(model, test_loader)
        preds = torch.argmax(probs, dim=1).numpy()
        y_true = y_test_tensor.numpy()

        result = {
            "preproc_profile": profile_name,
            "model": model_key,
            "model_type": "neural",
            "train_samples": len(data_entry["X_train"]),
            "test_samples": len(X_test_tensor),
            "num_features": input_dim_meta,
        }
        if "accuracy" in METRICS:
            result["accuracy"] = accuracy_score(y_true, preds)
        if "f1" in METRICS:
            result["f1"] = f1_score(y_true, preds, zero_division=0)
        records.append(result)

    return records


def run_evaluation(raw_df: pd.DataFrame) -> pd.DataFrame:
    profile_names = PREPROCESSING_CFG.get(EXPERIMENT_CFG.get("ablation_sets_key")) or []
    records: list[dict[str, object]] = []

    for profile_name in profile_names:
        data_entry = prepare_profile_data(raw_df, profile_name)
        if not data_entry:
            continue

        records.extend(evaluate_classical_models(data_entry, profile_name))
        records.extend(evaluate_neural_models(data_entry, profile_name))

    return pd.DataFrame(records)


In [3]:
raw_df = load_dataset()
PROFILE_DATA_CACHE.clear()
print(f'Dataset shape: {raw_df.shape}')

Dataset shape: (200000, 31)


In [4]:
training_logs = train_neural_profiles(raw_df, overwrite=False)
training_logs_df = pd.DataFrame(training_logs)
training_logs_df if not training_logs_df.empty else 'No neural training executed (snapshots already present).'

Train P0_baseline:mlp:   0%|          | 0/20 [00:00<?, ?it/s]

Train P0_baseline:lstm:   0%|          | 0/25 [00:00<?, ?it/s]



Train P0_baseline:transformer:   0%|          | 0/25 [00:00<?, ?it/s]

Train P1_robust_with_winsorize:mlp:   0%|          | 0/20 [00:00<?, ?it/s]

Train P1_robust_with_winsorize:lstm:   0%|          | 0/25 [00:00<?, ?it/s]



Train P1_robust_with_winsorize:transformer:   0%|          | 0/25 [00:00<?, ?it/s]

Train P2_minmax_global:mlp:   0%|          | 0/20 [00:00<?, ?it/s]

Train P2_minmax_global:lstm:   0%|          | 0/25 [00:00<?, ?it/s]



Train P2_minmax_global:transformer:   0%|          | 0/25 [00:00<?, ?it/s]

Train P3_log_amount_no_scaling:mlp:   0%|          | 0/20 [00:00<?, ?it/s]

Train P3_log_amount_no_scaling:lstm:   0%|          | 0/25 [00:00<?, ?it/s]



Train P3_log_amount_no_scaling:transformer:   0%|          | 0/25 [00:00<?, ?it/s]

Unnamed: 0,profile,model,epochs,snapshot
0,P0_baseline,mlp,20,P0_baseline_mlp.pth
1,P0_baseline,lstm,25,P0_baseline_lstm.pth
2,P0_baseline,transformer,25,P0_baseline_transformer.pth
3,P1_robust_with_winsorize,mlp,20,P1_robust_with_winsorize_mlp.pth
4,P1_robust_with_winsorize,lstm,25,P1_robust_with_winsorize_lstm.pth
5,P1_robust_with_winsorize,transformer,25,P1_robust_with_winsorize_transformer.pth
6,P2_minmax_global,mlp,20,P2_minmax_global_mlp.pth
7,P2_minmax_global,lstm,25,P2_minmax_global_lstm.pth
8,P2_minmax_global,transformer,25,P2_minmax_global_transformer.pth
9,P3_log_amount_no_scaling,mlp,20,P3_log_amount_no_scaling_mlp.pth


In [5]:
ablation_results = run_evaluation(raw_df)
sort_columns = [col for col in ['preproc_profile', 'model_type', 'model'] if col in ablation_results.columns]
if sort_columns:
    ablation_results = ablation_results.sort_values(sort_columns)
ablation_results.reset_index(drop=True)



Unnamed: 0,preproc_profile,model,model_type,train_samples,test_samples,num_features,accuracy,f1
0,P0_baseline,lstm,neural,160000,40000,30,0.99745,0.30137
1,P0_baseline,mlp,neural,160000,40000,30,0.978,0.051724
2,P0_baseline,transformer,neural,160000,40000,30,0.978425,0.046409
3,P0_baseline,decision_tree,traditional,160000,40000,30,0.99785,0.295082
4,P0_baseline,gradient_boosting,traditional,160000,40000,30,0.99935,0.48
5,P0_baseline,logistic_regression,traditional,160000,40000,30,0.9993,0.5
6,P0_baseline,naive_bayes,traditional,160000,40000,30,0.97645,0.044625
7,P0_baseline,random_forest,traditional,160000,40000,30,0.99965,0.740741
8,P0_baseline,svm,traditional,160000,40000,30,0.9995,0.545455
9,P1_robust_with_winsorize,lstm,neural,160000,40000,30,0.864925,0.009533


## Notes
- Neural model weights and per-epoch loss histories are stored under `assets/snapshots` and re-used on subsequent runs.
- Set `overwrite=True` when calling `train_neural_profiles` to retrain snapshots.
- Ensure PyTorch with CUDA is installed to take advantage of GPU acceleration.