# Q2 Credit Card Fraud: Structured Pipeline

This notebook compares classical machine-learning models and neural networks on the credit card fraud dataset using the new transform pipeline described in `assets/configs/q2/*.json`. Each profile has its own configuration file specifying the sequence of transforms, so the code below stays identical regardless of the preprocessing recipe.

In [1]:
from pathlib import Path
import os, sys

MAIN_PATH = Path(os.getcwd()).parent
sys.path.append(str(MAIN_PATH))


In [2]:
from __future__ import annotations

import json
from pathlib import Path
from typing import Mapping, Sequence

import numpy as np
import pandas as pd
import torch
from sklearn.metrics import accuracy_score, f1_score
from tqdm.auto import tqdm

from data_processings.datasets import CreditCardDataset
from data_processings.pipeline_builder import build_pipeline_from_config, load_pipeline_config
from data_processings.transforms import DFXPipeline
from models import build_model
from models.neural import build_dataloader, build_neural_model, predict_proba, train_neural_model
from data_processings import load_config


In [3]:
CONFIG_DIR = MAIN_PATH / 'assets' / 'configs' / 'q2'
SNAPSHOT_DIR = MAIN_PATH / 'assets' / 'snapshots'
SNAPSHOT_DIR.mkdir(parents=True, exist_ok=True)

PROFILE_CONFIGS = {
    'P0_baseline': CONFIG_DIR / 'P0_baseline.json',
    'P1_robust_with_winsorize': CONFIG_DIR / 'P1_robust_with_winsorize.json',
    'P2_minmax_global': CONFIG_DIR / 'P2_minmax_global.json',
    'P3_log_amount_no_scaling': CONFIG_DIR / 'P3_log_amount_no_scaling.json',
}

GLOBAL_CONFIG = load_config()
DEEP_MODELS_CFG = GLOBAL_CONFIG.get('deep_models', {})
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if DEVICE.type == 'cuda':
    torch.backends.cudnn.benchmark = True


In [4]:
def split_time_series_frame(df: pd.DataFrame, split_cfg: Mapping[str, object]) -> tuple[pd.DataFrame, pd.DataFrame]:
    method = split_cfg.get('method', 'time')
    if method != 'time':
        raise ValueError('Only time-based splits are supported in this notebook')
    test_size = float(split_cfg.get('test_size', 0.2))
    split_idx = int(len(df) * (1 - test_size))
    split_idx = max(1, min(split_idx, len(df) - 1))
    return df.iloc[:split_idx].copy(), df.iloc[split_idx:].copy()


def prepare_train_test(
    raw_df: pd.DataFrame,
    pipeline: DFXPipeline,
    split_cfg: Mapping[str, object],
    target_column: str,
) -> tuple[pd.DataFrame, pd.Series, pd.DataFrame, pd.Series]:
    """Split raw data, run the pipeline, and ensure the target column survives."""
    train_df, test_df = split_time_series_frame(raw_df, split_cfg)
    train_df = train_df.reset_index(drop=True)
    test_df = test_df.reset_index(drop=True)

    processed_train = pipeline.fit_transform(train_df)
    processed_test = pipeline.transform(test_df)

    if target_column not in processed_train.columns:
        if target_column in train_df.columns and len(train_df) == len(processed_train):
            processed_train[target_column] = train_df[target_column].to_numpy()
        else:
            available = ", ".join(processed_train.columns.tolist())
            raise KeyError(
                f"Target column '{target_column}' missing after preprocessing. Available columns: {available}"
            )

    if target_column not in processed_test.columns:
        if target_column in test_df.columns and len(test_df) == len(processed_test):
            processed_test[target_column] = test_df[target_column].to_numpy()
        else:
            available = ", ".join(processed_test.columns.tolist())
            raise KeyError(
                f"Target column '{target_column}' missing after preprocessing (test set). Available columns: {available}"
            )

    y_train = processed_train.pop(target_column).astype(int)
    y_test = processed_test.pop(target_column).astype(int)
    return processed_train, y_train, processed_test, y_test


def evaluate_classical_models(
    model_keys: Sequence[str],
    metrics: Sequence[str],
    X_train: pd.DataFrame,
    y_train: pd.Series,
    X_test: pd.DataFrame,
    y_test: pd.Series,
    profile_name: str,
) -> list[dict[str, object]]:
    records: list[dict[str, object]] = []
    if not model_keys:
        print(f"[{profile_name}] No classical models configured; skipping classical evaluation.")
        return records

    print(f"[{profile_name}] Evaluating classical models: {', '.join(model_keys)}")
    X_train_np = X_train.to_numpy(dtype=np.float64)
    y_train_np = y_train.to_numpy()
    X_test_np = X_test.to_numpy(dtype=np.float64)
    y_test_np = y_test.to_numpy()

    for model_key in model_keys:
        print(f"  -> fitting {model_key}...", end='', flush=True)
        model = build_model(model_key)
        model.fit(X_train_np, y_train_np)
        preds = model.predict(X_test_np)
        print(" done.")

        record: dict[str, object] = {
            'preproc_profile': profile_name,
            'model': model_key,
            'model_type': 'traditional',
            'train_samples': len(X_train_np),
            'test_samples': len(X_test_np),
            'num_features': X_train.shape[1],
        }
        if 'accuracy' in metrics:
            record['accuracy'] = accuracy_score(y_test_np, preds)
        if 'f1' in metrics:
            record['f1'] = f1_score(y_test_np, preds, zero_division=0)
        records.append(record)

    return records


def train_neural_models(
    profile_name: str,
    model_keys: Sequence[str],
    X_train: pd.DataFrame,
    y_train: pd.Series,
    metrics: Sequence[str],
) -> list[dict[str, object]]:
    logs: list[dict[str, object]] = []
    if not model_keys:
        print(f"[{profile_name}] No neural models configured; skipping neural training.")
        return logs

    X_tensor = torch.from_numpy(X_train.to_numpy(dtype=np.float32))
    y_tensor = torch.from_numpy(y_train.to_numpy(dtype=np.int64))

    if len(X_tensor) < 2:
        print(f"[{profile_name}] Not enough samples to train neural models; skipping.")
        return logs

    val_fraction = 0.2
    split_idx = max(1, int(len(X_tensor) * (1 - val_fraction)))
    if split_idx >= len(X_tensor):
        split_idx = len(X_tensor) - 1
    train_features = X_tensor[:split_idx]
    train_labels = y_tensor[:split_idx]
    val_features = X_tensor[split_idx:]
    val_labels = y_tensor[split_idx:]

    if len(val_features) == 0:
        val_features = train_features.clone()
        val_labels = train_labels.clone()

    class_values, counts = np.unique(train_labels.numpy(), return_counts=True)
    if len(class_values) < 2:
        print(f"[{profile_name}] Neural training skipped: only one class present after balancing.")
        return logs
    total = counts.sum()
    weights = total / (len(class_values) * counts)
    class_weights = torch.tensor(weights, dtype=torch.float32)

    pin_memory = DEVICE.type == 'cuda'
    print(f"[{profile_name}] Training neural models: {', '.join(model_keys)}")

    for model_key in model_keys:
        if model_key not in DEEP_MODELS_CFG:
            print(f"  -> {model_key} not found in deep model config; skipping.")
            continue
        deep_cfg = DEEP_MODELS_CFG[model_key]
        snapshot_path = SNAPSHOT_DIR / f"{profile_name}_{model_key}.pth"
        history_path = SNAPSHOT_DIR / f"{profile_name}_{model_key}_history.json"
        if snapshot_path.exists():
            print(f"  -> {model_key}: snapshot exists, skipping training.")
            logs.append({
                'profile': profile_name,
                'model': model_key,
                'status': 'skipped (snapshot found)',
            })
            continue

        print(f"  -> training {model_key} ({train_features.shape[0]} samples)...")
        model, training_cfg = build_neural_model(model_key, X_train.shape[1], DEEP_MODELS_CFG, num_classes=len(class_values))
        train_loader = build_dataloader(train_features, train_labels, training_cfg.batch_size, shuffle=True, pin_memory=pin_memory)
        val_loader = build_dataloader(val_features, val_labels, training_cfg.batch_size, shuffle=False, pin_memory=pin_memory)

        history = train_neural_model(
            model,
            train_loader,
            val_loader,
            epochs=training_cfg.epochs,
            learning_rate=training_cfg.learning_rate,
            class_weights=class_weights,
            device=DEVICE,
            progress_label=f"Train {profile_name}:{model_key}",
            save_path=snapshot_path,
            metadata={
                'model_key': model_key,
                'profile': profile_name,
                'input_dim': X_train.shape[1],
                'num_classes': len(class_values),
                'metrics': list(metrics),
            },
            grad_clip=training_cfg.grad_clip,
        )

        with history_path.open('w', encoding='utf-8') as handle:
            json.dump(history, handle, indent=2)

        logs.append({
            'profile': profile_name,
            'model': model_key,
            'status': 'trained',
            'epochs': training_cfg.epochs,
            'snapshot': snapshot_path.name,
        })

        if DEVICE.type == 'cuda':
            torch.cuda.empty_cache()

    return logs


def evaluate_neural_models(
    profile_name: str,
    model_keys: Sequence[str],
    X_test: pd.DataFrame,
    y_test: pd.Series,
    metrics: Sequence[str],
) -> list[dict[str, object]]:
    records: list[dict[str, object]] = []
    if not model_keys:
        print(f"[{profile_name}] No neural models configured; skipping neural evaluation.")
        return records

    y_true = y_test.to_numpy()
    test_tensor = torch.from_numpy(X_test.to_numpy(dtype=np.float32))
    test_labels = torch.from_numpy(y_true.astype(np.int64))

    pin_memory = DEVICE.type == 'cuda'
    test_loader = build_dataloader(test_tensor, test_labels, batch_size=1024, shuffle=False, pin_memory=pin_memory)

    print(f"[{profile_name}] Evaluating neural models: {', '.join(model_keys)}")
    for model_key in model_keys:
        snapshot_path = SNAPSHOT_DIR / f"{profile_name}_{model_key}.pth"
        if not snapshot_path.exists() or model_key not in DEEP_MODELS_CFG:
            print(f"  -> {model_key}: snapshot missing; skipping evaluation.")
            continue
        payload = torch.load(snapshot_path, map_location=DEVICE)
        metadata = payload.get('metadata', {})
        model, _ = build_neural_model(
            model_key,
            input_dim=int(metadata.get('input_dim', X_test.shape[1])),
            config=DEEP_MODELS_CFG,
            num_classes=int(metadata.get('num_classes', len(np.unique(y_true)))),
        )
        model.load_state_dict(payload['state_dict'])
        model = model.to(DEVICE)
        probs = predict_proba(model, test_loader)
        preds = torch.argmax(probs, dim=1).cpu().numpy()

        record: dict[str, object] = {
            'preproc_profile': profile_name,
            'model': model_key,
            'model_type': 'neural',
            'train_samples': None,
            'test_samples': len(X_test),
            'num_features': X_test.shape[1],
        }
        if 'accuracy' in metrics:
            record['accuracy'] = accuracy_score(y_true, preds)
        if 'f1' in metrics:
            record['f1'] = f1_score(y_true, preds, zero_division=0)
        records.append(record)

    return records



In [5]:
dataset_loader = CreditCardDataset()
base_config = load_pipeline_config(PROFILE_CONFIGS['P0_baseline'])
raw_df = dataset_loader.load(base_config.get('dataset_options', {}))
print(f'Dataset shape: {raw_df.shape}')

Dataset shape: (10000, 31)


In [6]:

all_records = []
training_logs = []

for profile_name in tqdm(PROFILE_CONFIGS.keys(), desc='Profiles'):
    cfg = load_pipeline_config(PROFILE_CONFIGS[profile_name])
    pipeline, metadata = build_pipeline_from_config(cfg)
    target_column = cfg.get('target_column', metadata.get('target_column', 'Class'))
    split_cfg = cfg.get('split', {'method': 'time', 'test_size': 0.2})

    X_train, y_train, X_test, y_test = prepare_train_test(raw_df, pipeline, split_cfg, target_column)

    classical_records = evaluate_classical_models(cfg.get('models', []), cfg.get('metrics', ['accuracy']), X_train, y_train, X_test, y_test, profile_name)
    all_records.extend(classical_records)

    train_logs = train_neural_models(profile_name, cfg.get('neural_models', []), X_train, y_train, cfg.get('metrics', ['accuracy']))
    training_logs.extend(train_logs)

    neural_records = evaluate_neural_models(profile_name, cfg.get('neural_models', []), X_test, y_test, cfg.get('metrics', ['accuracy']))
    all_records.extend(neural_records)

results_df = pd.DataFrame(all_records)
results_df


Profiles:   0%|          | 0/4 [00:00<?, ?it/s]

[P0_baseline] Evaluating classical models: logistic_regression, naive_bayes, decision_tree, svm, random_forest, gradient_boosting
  -> fitting logistic_regression... done.
  -> fitting naive_bayes... done.
  -> fitting decision_tree... done.
  -> fitting svm... done.
  -> fitting random_forest... done.
  -> fitting gradient_boosting... done.
[P0_baseline] Training neural models: mlp, residual_mlp, transformer
  -> training mlp (6400 samples)...


Train P0_baseline:mlp:   0%|          | 0/20 [00:00<?, ?it/s]

  -> training residual_mlp (6400 samples)...


Train P0_baseline:residual_mlp:   0%|          | 0/30 [00:00<?, ?it/s]

  -> training transformer (6400 samples)...


Train P0_baseline:transformer:   0%|          | 0/25 [00:00<?, ?it/s]

[P0_baseline] Evaluating neural models: mlp, residual_mlp, transformer
[P1_robust_with_winsorize] Evaluating classical models: logistic_regression, naive_bayes, decision_tree, svm, random_forest, gradient_boosting
  -> fitting logistic_regression... done.
  -> fitting naive_bayes... done.
  -> fitting decision_tree... done.
  -> fitting svm... done.
  -> fitting random_forest... done.
  -> fitting gradient_boosting... done.
[P1_robust_with_winsorize] Training neural models: mlp, residual_mlp, transformer
  -> training mlp (6400 samples)...


Train P1_robust_with_winsorize:mlp:   0%|          | 0/20 [00:00<?, ?it/s]

  -> training residual_mlp (6400 samples)...


Train P1_robust_with_winsorize:residual_mlp:   0%|          | 0/30 [00:00<?, ?it/s]

  -> training transformer (6400 samples)...


Train P1_robust_with_winsorize:transformer:   0%|          | 0/25 [00:00<?, ?it/s]

[P1_robust_with_winsorize] Evaluating neural models: mlp, residual_mlp, transformer
[P2_minmax_global] Evaluating classical models: logistic_regression, naive_bayes, decision_tree, svm, random_forest, gradient_boosting
  -> fitting logistic_regression... done.
  -> fitting naive_bayes... done.
  -> fitting decision_tree... done.
  -> fitting svm... done.
  -> fitting random_forest... done.
  -> fitting gradient_boosting... done.
[P2_minmax_global] Training neural models: mlp, residual_mlp, transformer
  -> training mlp (6400 samples)...


Train P2_minmax_global:mlp:   0%|          | 0/20 [00:00<?, ?it/s]

  -> training residual_mlp (6400 samples)...


Train P2_minmax_global:residual_mlp:   0%|          | 0/30 [00:00<?, ?it/s]

  -> training transformer (6400 samples)...


Train P2_minmax_global:transformer:   0%|          | 0/25 [00:00<?, ?it/s]

[P2_minmax_global] Evaluating neural models: mlp, residual_mlp, transformer
[P3_log_amount_no_scaling] Evaluating classical models: logistic_regression, naive_bayes, decision_tree, svm, random_forest, gradient_boosting
  -> fitting logistic_regression... done.
  -> fitting naive_bayes... done.
  -> fitting decision_tree... done.
  -> fitting svm... done.
  -> fitting random_forest... done.
  -> fitting gradient_boosting... done.
[P3_log_amount_no_scaling] Training neural models: mlp, residual_mlp, transformer
  -> training mlp (6400 samples)...


Train P3_log_amount_no_scaling:mlp:   0%|          | 0/20 [00:00<?, ?it/s]

  -> training residual_mlp (6400 samples)...


Train P3_log_amount_no_scaling:residual_mlp:   0%|          | 0/30 [00:00<?, ?it/s]

  -> training transformer (6400 samples)...


Train P3_log_amount_no_scaling:transformer:   0%|          | 0/25 [00:00<?, ?it/s]

[P3_log_amount_no_scaling] Evaluating neural models: mlp, residual_mlp, transformer


Unnamed: 0,preproc_profile,model,model_type,train_samples,test_samples,num_features,accuracy,f1
0,P0_baseline,logistic_regression,traditional,8000.0,2000,30,0.9965,0.787879
1,P0_baseline,naive_bayes,traditional,8000.0,2000,30,0.9915,0.604651
2,P0_baseline,decision_tree,traditional,8000.0,2000,30,0.9965,0.774194
3,P0_baseline,svm,traditional,8000.0,2000,30,0.9955,0.470588
4,P0_baseline,random_forest,traditional,8000.0,2000,30,0.9985,0.888889
5,P0_baseline,gradient_boosting,traditional,8000.0,2000,30,0.9965,0.774194
6,P0_baseline,mlp,neural,,2000,30,0.996,0.764706
7,P0_baseline,residual_mlp,neural,,2000,30,0.996,0.75
8,P0_baseline,transformer,neural,,2000,30,0.941,0.169014
9,P1_robust_with_winsorize,logistic_regression,traditional,8000.0,2000,30,0.998,0.866667


In [7]:
training_logs_df = pd.DataFrame(training_logs); training_logs_df if not training_logs_df.empty else 'No neural training required.'

Unnamed: 0,profile,model,status,epochs,snapshot
0,P0_baseline,mlp,trained,20,P0_baseline_mlp.pth
1,P0_baseline,residual_mlp,trained,30,P0_baseline_residual_mlp.pth
2,P0_baseline,transformer,trained,25,P0_baseline_transformer.pth
3,P1_robust_with_winsorize,mlp,trained,20,P1_robust_with_winsorize_mlp.pth
4,P1_robust_with_winsorize,residual_mlp,trained,30,P1_robust_with_winsorize_residual_mlp.pth
5,P1_robust_with_winsorize,transformer,trained,25,P1_robust_with_winsorize_transformer.pth
6,P2_minmax_global,mlp,trained,20,P2_minmax_global_mlp.pth
7,P2_minmax_global,residual_mlp,trained,30,P2_minmax_global_residual_mlp.pth
8,P2_minmax_global,transformer,trained,25,P2_minmax_global_transformer.pth
9,P3_log_amount_no_scaling,mlp,trained,20,P3_log_amount_no_scaling_mlp.pth


## Notes
- Each profile uses its own configuration under `assets/configs/q2/`, making the preprocessing pipeline explicit and reusable.
- Neural checkpoints are stored in `assets/snapshots/` and reused on subsequent runs unless removed.