# Fraud Detection — Hybrid TF-IDF + RoBERTa (Colab)

This notebook uploads `text_dataset.csv`, performs a stratified train/test split, extracts **TF-IDF features** and **RoBERTa contextual embeddings**, fuses them in a hybrid model, and reports evaluation metrics.

**Architecture:** Text → TF-IDF (5000-d → 256-d projection) + RoBERTa [CLS] (768-d) → Concatenate (1024-d) → Classifier → Fraud / Not Fraud

In [None]:
# Install dependencies (Colab-compatible — use Colab's pre-installed pandas/numpy)
!pip -q install transformers scikit-learn scipy joblib

In [None]:
import os
import io
import warnings
import joblib
import numpy as np
import pandas as pd
from pathlib import Path
import scipy.sparse

import torch
import torch.nn as nn
from torch.utils.data import Dataset

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    roc_auc_score,
    f1_score,
    accuracy_score,
    average_precision_score,
)

from transformers import (
    RobertaTokenizer,
    RobertaModel,
    Trainer,
    TrainingArguments,
    EarlyStoppingCallback,
)

warnings.filterwarnings("ignore")

try:
    from google.colab import files
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

In [None]:
# Config
OUTPUT_DIR = Path('./roberta_fraud_model')
MODEL_NAME = 'roberta-base'
MAX_LEN = 256
BATCH_SIZE = 8
GRAD_ACCUM = 4
EPOCHS = 5           # restored — 3 was too few, model barely learned
LR = 2e-5            # restored — 1e-5 was too slow for 5 epochs
DROPOUT = 0.3        # restored — 0.4 was too aggressive with layer freezing
LABEL_SMOOTHING = 0.05  # reduced from 0.1 — less target softening
SEED = 42
SYNTH_CAP = 150      # max synthetic fraud rows to include (was 500)

# TF-IDF config
TFIDF_MAX_FEATURES = 2000    # reduced from 5000 — fewer features = less overfitting
TFIDF_NGRAM_RANGE = (1, 2)   # unigrams + bigrams

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')
if device.type == 'cuda':
    print(f'GPU: {torch.cuda.get_device_name(0)}')

print('\nNext cell will ask you to upload text_dataset.csv')

In [None]:
# Upload and load text_dataset.csv + synthetic_text_dataset.csv
if IN_COLAB:
    print('Upload text_dataset.csv and synthetic_text_dataset.csv ...')
    uploaded = files.upload()
    if 'text_dataset.csv' not in uploaded:
        raise ValueError(f'Expected text_dataset.csv. Uploaded: {list(uploaded.keys())}')
    df = pd.read_csv(io.BytesIO(uploaded['text_dataset.csv']))
    if 'synthetic_text_dataset.csv' in uploaded:
        synth_df = pd.read_csv(io.BytesIO(uploaded['synthetic_text_dataset.csv']))
    else:
        raise ValueError('Also upload synthetic_text_dataset.csv')
else:
    df = pd.read_csv('processed_data/text_dataset.csv')
    synth_df = pd.read_csv('processed_data/synthetic_text_dataset.csv')

print(f'Original dataset: {df.shape}  |  Synthetic (fraud): {synth_df.shape}')
print(f'Original fraud dist:\n{df["fraud_label"].value_counts()}')

# Cap synthetic data to prevent overfitting
# The full 500 synthetic rows (all fraud) push fraud to 20.7%, which was
# too aggressive. Capping at SYNTH_CAP gives a more balanced boost.
if len(synth_df) > SYNTH_CAP:
    synth_df = synth_df.sample(n=SYNTH_CAP, random_state=SEED)
    print(f'\nCapped synthetic data to {SYNTH_CAP} rows (from {len(synth_df)})')

# Combine original + capped synthetic
df = pd.concat([df, synth_df], ignore_index=True)
n_total = len(df)
n_fraud = df['fraud_label'].sum()
print(f'\nCombined dataset: {df.shape}')
print(f'Combined fraud dist:\n{df["fraud_label"].value_counts()}')
print(f'Fraud ratio: {100*n_fraud/n_total:.1f}%\n')

text_cols = ['title_cleaned', 'description_cleaned', 'review1_cleaned', 'review2_cleaned']
for col in text_cols:
    df[col] = df[col].fillna('')

df['text'] = (
    'Title: ' + df['title_cleaned']
    + ' Description: ' + df['description_cleaned']
    + ' Review1: ' + df['review1_cleaned']
    + ' Review2: ' + df['review2_cleaned']
)

In [None]:
# Train / Test split (stratified)
train_df, test_df = train_test_split(
    df, test_size=0.2, random_state=SEED, stratify=df['fraud_label']
)
print(f'Train: {len(train_df)} | Test: {len(test_df)}')
print(f'Train fraud ratio: {train_df["fraud_label"].mean():.4f}')
print(f'Test  fraud ratio: {test_df["fraud_label"].mean():.4f}\n')

# Fit TF-IDF on training text only (prevent data leakage)
print('Fitting TF-IDF vectorizer on training set ...')
tfidf = TfidfVectorizer(
    max_features=TFIDF_MAX_FEATURES,
    ngram_range=TFIDF_NGRAM_RANGE,
    sublinear_tf=True,
    strip_accents='unicode',
)
tfidf_train = tfidf.fit_transform(train_df['text']).toarray().astype(np.float32)
tfidf_test  = tfidf.transform(test_df['text']).toarray().astype(np.float32)

TFIDF_DIM = tfidf_train.shape[1]
print(f'TF-IDF vocabulary size: {len(tfidf.vocabulary_)}')
print(f'TF-IDF feature dimension: {TFIDF_DIM}')

In [None]:
# Tokenizer & dataset (with TF-IDF features)
tokenizer = RobertaTokenizer.from_pretrained(MODEL_NAME)

class FraudTextDataset(Dataset):
    """Dataset that returns RoBERTa tokens + TF-IDF features."""
    def __init__(self, texts, labels, tfidf_matrix):
        self.encodings = tokenizer(
            texts,
            truncation=True,
            padding='max_length',
            max_length=MAX_LEN,
            return_tensors='pt',
        )
        self.labels = torch.tensor(labels, dtype=torch.long)
        self.tfidf  = torch.tensor(tfidf_matrix, dtype=torch.float32)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {
            'input_ids':      self.encodings['input_ids'][idx],
            'attention_mask': self.encodings['attention_mask'][idx],
            'tfidf_features': self.tfidf[idx],
            'labels':         self.labels[idx],
        }

print('Tokenizing training set ...')
train_dataset = FraudTextDataset(
    train_df['text'].tolist(), train_df['fraud_label'].tolist(), tfidf_train
)
print('Tokenizing test set ...')
test_dataset = FraudTextDataset(
    test_df['text'].tolist(), test_df['fraud_label'].tolist(), tfidf_test
)

In [None]:
# Hybrid model: RoBERTa [CLS] + TF-IDF → classifier

class RoBERTaTfidfFraudModel(nn.Module):
    """
    Concatenates RoBERTa's [CLS] embedding (768-d) with TF-IDF features
    before a shared classification head.

    Anti-overfitting measures:
      - Freeze all RoBERTa layers except the last 4 encoder blocks + pooler
      - Dropout (0.3) in projection and classifier
      - Light label smoothing (0.05) in the loss function
    Note: The main overconfidence fix is NO class_weight + capped synthetic data.
    """
    def __init__(self, model_name, tfidf_dim, num_labels=2, dropout=DROPOUT):
        super().__init__()
        self.roberta = RobertaModel.from_pretrained(model_name)
        roberta_dim = self.roberta.config.hidden_size  # 768

        # Freeze all RoBERTa except last 4 encoder layers + pooler
        # Last-2 was too aggressive — model couldn't learn enough signal.
        for name, param in self.roberta.named_parameters():
            param.requires_grad = False
        # Unfreeze last 4 encoder layers (layers 8-11 of 0-11)
        for layer in self.roberta.encoder.layer[-4:]:
            for param in layer.parameters():
                param.requires_grad = True
        # Unfreeze pooler if it exists
        if hasattr(self.roberta, 'pooler') and self.roberta.pooler is not None:
            for param in self.roberta.pooler.parameters():
                param.requires_grad = True

        self.tfidf_proj = nn.Sequential(
            nn.Linear(tfidf_dim, 256),
            nn.ReLU(),
            nn.Dropout(dropout),
        )

        fused_dim = roberta_dim + 256  # 768 + 256 = 1024

        self.classifier = nn.Sequential(
            nn.Linear(fused_dim, 256),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(256, num_labels),
        )

    def forward(self, input_ids, attention_mask, tfidf_features, labels=None):
        # RoBERTa [CLS] embedding
        roberta_out = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
        cls_emb = roberta_out.last_hidden_state[:, 0, :]  # [batch, 768]

        # Project TF-IDF to 256-d
        tfidf_emb = self.tfidf_proj(tfidf_features)        # [batch, 256]

        # Concatenate and classify
        fused = torch.cat([cls_emb, tfidf_emb], dim=-1)    # [batch, 1024]
        logits = self.classifier(fused)                     # [batch, 2]

        loss = None
        if labels is not None:
            # Light label smoothing prevents extreme overconfidence.
            # With 0.05: [0,1] → [0.025, 0.975] — barely noticeable but effective.
            loss_fn = nn.CrossEntropyLoss(label_smoothing=LABEL_SMOOTHING)
            loss = loss_fn(logits, labels)

        return {'loss': loss, 'logits': logits}


# Print class distribution
n_neg = (train_df['fraud_label'] == 0).sum()
n_pos = (train_df['fraud_label'] == 1).sum()
print(f'Class distribution: not-fraud={n_neg}, fraud={n_pos} ({100*n_pos/(n_neg+n_pos):.1f}%)')

# Instantiate hybrid model
model = RoBERTaTfidfFraudModel(MODEL_NAME, tfidf_dim=TFIDF_DIM)
model = model.to(device)

trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total     = sum(p.numel() for p in model.parameters())
frozen    = total - trainable
print(f'Total params:     {total:,}')
print(f'Trainable params: {trainable:,} ({100*trainable/total:.1f}%)')
print(f'Frozen params:    {frozen:,} ({100*frozen/total:.1f}%)')
print(f'Architecture: RoBERTa [CLS](768) + TF-IDF proj(256) → fused(1024) → classifier → 2')
print(f'Regularization: dropout={DROPOUT}, label_smoothing={LABEL_SMOOTHING}, freeze=all except last 4 layers')

# Training args
training_args = TrainingArguments(
    output_dir=str(OUTPUT_DIR),
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE * 2,
    gradient_accumulation_steps=GRAD_ACCUM,
    learning_rate=LR,
    weight_decay=0.01,
    warmup_ratio=0.1,
    fp16=torch.cuda.is_available(),
    eval_strategy='epoch',
    save_strategy='epoch',
    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model='f1',
    greater_is_better=True,
    save_total_limit=2,
    seed=SEED,
    report_to='none',
)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    probs = torch.softmax(torch.tensor(logits), dim=-1)[:, 1].numpy()
    preds = np.argmax(logits, axis=-1)
    return {
        'accuracy': accuracy_score(labels, preds),
        'f1': f1_score(labels, preds),
        'roc_auc': roc_auc_score(labels, probs),
        'avg_precision': average_precision_score(labels, probs),
    }

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)],
)

In [None]:
# Train
print('=' * 60)
print('  Starting RoBERTa fine-tuning')
print('=' * 60)
trainer.train()

In [None]:
# Evaluate on test set
preds_output = trainer.predict(test_dataset)
logits = preds_output.predictions
probs = torch.softmax(torch.tensor(logits), dim=-1)[:, 1].numpy()
y_pred = np.argmax(logits, axis=-1)
y_true = test_df['fraud_label'].values

acc = accuracy_score(y_true, y_pred)
f1  = f1_score(y_true, y_pred)
roc = roc_auc_score(y_true, probs)
ap  = average_precision_score(y_true, probs)

print('=' * 60)
print('  Final Evaluation — RoBERTa + TF-IDF Hybrid')
print('=' * 60)
print(f'Accuracy          : {acc:.4f}')
print(f'F1 Score (fraud)  : {f1:.4f}')
print(f'ROC-AUC           : {roc:.4f}')
print(f'Avg Precision (PR): {ap:.4f}\n')
print('Classification Report:')
print(classification_report(y_true, y_pred, target_names=['Not Fraud', 'Fraud']))
print('Confusion Matrix:')
print(confusion_matrix(y_true, y_pred))

In [None]:
# Export test-set predictions for ensemble
text_preds_df = pd.DataFrame({
    'product_id': test_df['product_id'].values,
    'fraud_label': y_true,
    'text_fraud_proba': probs,
    'text_pred': y_pred,
})
text_preds_df.to_csv('text_test_predictions.csv', index=False)
print(f'Saved text_test_predictions.csv  ({len(text_preds_df)} rows)')
print(text_preds_df.head())

# Download for local ensemble
if IN_COLAB:
    files.download('text_test_predictions.csv')

In [None]:
# Save best model, tokenizer, and TF-IDF vectorizer
save_path = OUTPUT_DIR / 'best_model'
save_path.mkdir(parents=True, exist_ok=True)

# Save full hybrid model state dict
torch.save(model.state_dict(), save_path / 'hybrid_roberta_tfidf.pth')

# Save tokenizer (needed for RoBERTa inference)
tokenizer.save_pretrained(str(save_path))

# Save TF-IDF vectorizer (needed to transform new text at inference)
joblib.dump(tfidf, save_path / 'tfidf_vectorizer.joblib')

print(f'Saved to: {save_path.resolve()}')
print(f'  - hybrid_roberta_tfidf.pth  (model weights)')
print(f'  - tokenizer files           (RoBERTa tokenizer)')
print(f'  - tfidf_vectorizer.joblib   (TF-IDF vectorizer)')

# Optional: download in Colab
if IN_COLAB:
    import shutil
    shutil.make_archive('roberta_tfidf_model', 'zip', save_path)
    files.download('roberta_tfidf_model.zip')