# RoBERTa Detection Model After Generated Data
**Authors:** Matías Arévalo, Pilar Guerrero, Moritz Goebbels, Tomás Lock, Allan Stalker  
**Date:** January – May 2025  

## Purpose
Create a RoBERTa Model to detect scam/spam messages. Here we use the `train.csv` and `val.csv` files we created from the generated and original data.

To run this notebook, that file should be place in the `generated_data/` folder. If not, file paths should be changed in order for the notebook to run properly.

## Import Libraries

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
import numpy as np
import torch
from transformers import Trainer, TrainingArguments
from transformers import RobertaConfig, RobertaForSequenceClassification
from transformers import RobertaTokenizer
from transformers import EarlyStoppingCallback
from torch.nn import functional as F
from transformers import RobertaConfig, RobertaForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, confusion_matrix, classification_report
)
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    average_precision_score,
    confusion_matrix,
    classification_report
)
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import torch
from torch.utils.data import DataLoader

## Import Data & Preprocessing

### Loading Data

In [None]:
train = pd.read_csv('generated_data/train.csv')
head()

In [None]:
val = pd.read_csv('generated_data/val.csv')
df.head()

In [None]:
train['label'] = train['label'].map({'ham': 0, 'spam': 1})
val['label'] = val['label'].map({'ham': 0, 'spam': 1})

### X and y Values

In [None]:
X_train = train['clean_message'].dropna()
y_train = train['label'].loc[X_train.index]
X_val   = val['clean_message'].dropna()
y_val   = val['label'].loc[X_val.index]

## Detection Model

### Tokenization

In [None]:
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
train_encodings = tokenizer(
    train['clean_message'].astype(str).tolist(),  # Just in case any non-strings sneak in
    padding=True,
    truncation=True,
    max_length=256,
    return_tensors='pt'
)

val_encodings = tokenizer(
    val['clean_message'].tolist(),
    truncation=True,
    padding=True,
    max_length=128
)

### Loss Function
This custom loss function is designed to address class imbalance in spam detection by assigning a higher weight to the minority class (spam). In standard classification tasks, models tend to focus on the majority class (ham in this case), which can lead to poor recall for the underrepresented class. By subclassing RobertaForSequenceClassification and overriding the compute_loss method, the model is explicitly instructed to penalize misclassifications of spam messages more heavily using CrossEntropyLoss with class weights (e.g., [1.0, 2.0]). This weighting scheme tells the model that predicting spam incorrectly is twice as costly as predicting ham incorrectly, encouraging it to pay more attention to spam patterns and improving its recall and F1-score for that class during training.

In [None]:
class CustomRobertaForSequenceClassification(RobertaForSequenceClassification):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels")
        outputs = model(**inputs)
        logits = outputs.get("logits")
        loss_fct = torch.nn.CrossEntropyLoss(weight=torch.tensor([1.0, 2.0]).to(logits.device))  # weight spam more
        loss = loss_fct(logits, labels)
        return (loss, outputs) if return_outputs else loss

### Model Architecture

In [None]:
config = RobertaConfig.from_pretrained("roberta-base", num_labels=2, hidden_dropout_prob=0.2, attention_probs_dropout_prob=0.2)
model = CustomRobertaForSequenceClassification(config)

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    precision_all, recall_all, f1_all, _ = precision_recall_fscore_support(
        labels, predictions, average=None, zero_division=0
    )

    precision_spam = precision_all[1]
    recall_spam = recall_all[1]
    f1_spam = f1_all[1]

    acc = accuracy_score(labels, predictions)

    try:
        if logits.shape[1] == 2:
            probs = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)
            auc = roc_auc_score(labels, probs[:, 1])
        else:
            probs = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)
            auc = roc_auc_score(labels, probs, multi_class='ovr')
    except:
        auc = float('nan')

    return {
        'accuracy': acc,
        'precision_spam': precision_spam,
        'recall_spam': recall_spam,
        'f1_spam': f1_spam,
        'auc': auc
    }

### Training Model

In [None]:
class SMSDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item
    def __len__(self):
        return len(self.labels)

train_dataset = SMSDataset(train_encodings, train['label'].tolist())
val_dataset = SMSDataset(val_encodings, val['label'].tolist())

In [None]:
training_args = TrainingArguments(
    output_dir="./results_roberta",
    eval_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=3,
    load_best_model_at_end=True,
    metric_for_best_model="f1_spam",  # <-- CHANGED
    greater_is_better=True,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=32,
    gradient_accumulation_steps = 4,
    num_train_epochs=epochs,
    weight_decay=0.1,
    warmup_steps=100,
    lr_scheduler_type="cosine",
    learning_rate=2e-5,                    # <-- LOWER LR
    max_grad_norm=1.0,                    # <-- GRADIENT CLIP
    logging_dir="./logs_roberta",
    logging_steps=100,
    report_to="tensorboard",
    fp16=True
)

In [None]:
trainer = Trainer(
    model = CustomRobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=2),
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

# deafault loss used is cross entropy
# maybe later plug in  FocalLoss or Label Smoothing

In [None]:
trainer.train()

In [None]:
import os
os.listdir("results_roberta")

In [None]:
%ls ./logs_roberta

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir ./logs_roberta

## Test Model (Inference and Metrics)

In [None]:
def evaluate_model(model, val_dataset, batch_size=32, threshold=0.5):
    """
    Evaluates a transformer model on a torch Dataset (like SMSDataset) and prints formatted metrics.
    """

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    dataloader = DataLoader(val_dataset, batch_size=batch_size)

    all_probs = []
    all_labels = []

    with torch.no_grad():
        for batch in dataloader:
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            logits = outputs.logits
            probs = torch.softmax(logits, dim=1)[:, 1]  # Prob for class "spam"
            all_probs.extend(probs.cpu().numpy())
            all_labels.extend(batch['labels'].cpu().numpy())

    # Convert to arrays
    all_probs = np.array(all_probs)
    all_labels = np.array(all_labels)
    all_preds = (all_probs >= threshold).astype(int)

    # Metrics
    acc = accuracy_score(all_labels, all_preds)
    prec = precision_score(all_labels, all_preds)
    rec = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    auc = roc_auc_score(all_labels, all_probs)
    pr_auc = average_precision_score(all_labels, all_probs)

    print(f"Accuracy: {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall: {rec:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print()

    # Classification Report
    print("Classification Report:\n")
    print(classification_report(all_labels, all_preds, target_names=["ham", "spam"]))

    # Confusion Matrix
    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(4, 3))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["ham", "spam"], yticklabels=["ham", "spam"])
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title("Confusion Matrix")
    plt.tight_layout()
    plt.show()

    # Return all metrics in case you want to log or save them
    return {
        "accuracy": acc,
        "precision": prec,
        "recall": rec,
        "f1": f1,
        "roc_auc": auc,
        "pr_auc": pr_auc,
        "confusion_matrix": cm
    }

In [None]:
from transformers import RobertaForSequenceClassification

checkpoint_path = "./results_roberta/checkpoint-7660"

In [None]:
evaluate_model(
    model = CustomRobertaForSequenceClassification.from_pretrained(checkpoint_path),
    val_dataset=val_dataset,
    batch_size=32,
    threshold=0.5
)

In [None]:
import shutil
shutil.make_archive('/content/checkpoint-7660', 'zip', '/content/results_roberta/checkpoint-7660')