# Model Quality Benchmark
This notebook evaluates the performance of a RoBERTa model on email phishing detection.

In [None]:
import pandas as pd
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

## 1. Load Dataset

In [None]:
df = pd.read_csv('dataset/dataset.csv')
print("Dataset shape:", df.shape)
df.head()

## 2. Load Model and Tokenizer

In [None]:
model_path = './models/roberta-large-mnli-email-phishing-20250923T162611Z-1-001/roberta-large-mnli-email-phishing/checkpoint-276'
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

## 3. Generate Predictions

In [None]:
def predict(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    outputs = model(**inputs)
    return outputs.logits.argmax().item()

df['model_prediction'] = df['statement'].apply(predict)

## 4. Create Example Table

In [None]:
sample = df.sample(5)[['id', 'statement', 'real answer', 'model_prediction']]
print("Sample Predictions:")
print(sample.to_markdown(index=False))

## 5. Calculate Metrics

In [None]:
y_true = df['real answer']
y_pred = df['model_prediction']

metrics = {
    'Accuracy': accuracy_score(y_true, y_pred),
    'Precision': precision_score(y_true, y_pred, average='weighted'),
    'Recall': recall_score(y_true, y_pred, average='weighted'),
    'F1 Score': f1_score(y_true, y_pred, average='weighted')
}

print("\nModel Performance Metrics:")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

## 6. Visualize Results

In [None]:
# Confusion Matrix
plt.figure(figsize=(8, 6))
cm = confusion_matrix(y_true, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['Negative', 'Positive'],
            yticklabels=['Negative', 'Positive'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

# Metrics Bar Chart
plt.figure(figsize=(10, 5))
plt.bar(metrics.keys(), metrics.values(), color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])
plt.ylim(0, 1)
plt.title('Model Performance Metrics')
plt.ylabel('Score')
plt.xticks(rotation=45)
plt.show()