# Phishing Email Detection with DistilBERT

Binary text classification: **phishing (1)** vs **legitimate (0)** emails.

- Model: [`distilbert-base-uncased`](https://huggingface.co/distilbert-base-uncased)
- Framework: HuggingFace Transformers + PyTorch
- Runtime: Google Colab (GPU recommended)

---

## 1 â€” Install Dependencies

In [1]:
!pip install -q transformers datasets accelerate scikit-learn matplotlib seaborn


[notice] A new release of pip is available: 25.3 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


## 2 â€” Imports

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch

from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    confusion_matrix,
    classification_report,
)

from transformers import (
    DistilBertTokenizerFast,
    DistilBertForSequenceClassification,
    TrainingArguments,
    Trainer,
)
from datasets import Dataset

# Check device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if device.type == "cuda":
    print(f"GPU: {torch.cuda.get_device_name(0)}")

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


Using device: cpu


## 3 â€” Load Dataset

Upload your CSV file to Colab (or mount Google Drive).  
The CSV must have at least two columns:
- **`text`** â€” the email content (string)
- **`label`** â€” binary target (`1` = phishing, `0` = legitimate)

In [3]:
# â”€â”€ Option A: Upload CSV directly in Colab â”€â”€
# from google.colab import files
# uploaded = files.upload()  # opens a file picker
# CSV_PATH = list(uploaded.keys())[0]

# â”€â”€ Option B: Set path manually â”€â”€
CSV_PATH = "dataset.csv"  # <-- change this to your file path

df = pd.read_csv(CSV_PATH)
print(f"Dataset shape: {df.shape}")
df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'dataset.csv'

## 4 â€” Explore & Clean

In [None]:
# â”€â”€ Rename columns if needed â”€â”€
# Uncomment and adjust if your columns are named differently:
# df = df.rename(columns={"email_body": "text", "is_phishing": "label"})

# Ensure required columns exist
assert "text" in df.columns, "CSV must have a 'text' column"
assert "label" in df.columns, "CSV must have a 'label' column"

# Drop rows with missing text
df = df.dropna(subset=["text"])
df["text"] = df["text"].astype(str)
df["label"] = df["label"].astype(int)

print(f"Cleaned dataset shape: {df.shape}")
print(f"\nLabel distribution:")
print(df["label"].value_counts())
print(f"\nPhishing ratio: {df['label'].mean():.2%}")

In [None]:
# Label distribution bar chart
fig, ax = plt.subplots(figsize=(5, 3))
df["label"].value_counts().plot(kind="bar", color=["#2ecc71", "#e74c3c"], ax=ax)
ax.set_xticklabels(["Legitimate (0)", "Phishing (1)"], rotation=0)
ax.set_ylabel("Count")
ax.set_title("Label Distribution")
plt.tight_layout()
plt.show()

In [None]:
# Text length distribution
df["text_len"] = df["text"].str.len()

fig, ax = plt.subplots(figsize=(7, 3))
df.groupby("label")["text_len"].plot(kind="hist", bins=50, alpha=0.6, ax=ax,
                                      legend=True)
ax.set_xlabel("Character length")
ax.set_title("Text Length Distribution by Label")
ax.legend(["Legitimate", "Phishing"])
plt.tight_layout()
plt.show()

print(df.groupby("label")["text_len"].describe().round(0))
df = df.drop(columns=["text_len"])  # cleanup

## 5 â€” Train / Validation Split

In [None]:
TRAIN_RATIO = 0.8  # 80% train, 20% validation
RANDOM_SEED = 42

train_df, val_df = train_test_split(
    df[["text", "label"]],
    test_size=1 - TRAIN_RATIO,
    stratify=df["label"],
    random_state=RANDOM_SEED,
)

train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)

print(f"Train: {len(train_df)} samples")
print(f"Val:   {len(val_df)} samples")

## 6 â€” Tokenization

In [None]:
MODEL_NAME = "distilbert-base-uncased"
MAX_LENGTH = 512  # DistilBERT max context length

tokenizer = DistilBertTokenizerFast.from_pretrained(MODEL_NAME)


def tokenize_function(examples):
    """Tokenize a batch of texts with padding and truncation."""
    return tokenizer(
        examples["text"],
        padding="max_length",
        truncation=True,
        max_length=MAX_LENGTH,
    )


# Convert pandas DataFrames to HuggingFace Datasets
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

# Tokenize
train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)

# Set format for PyTorch
train_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])
val_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

print(f"Tokenized train: {train_dataset}")
print(f"Tokenized val:   {val_dataset}")

## 7 â€” Load Pre-trained DistilBERT for Classification

In [None]:
NUM_LABELS = 2  # binary classification

model = DistilBertForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=NUM_LABELS,
)

model.to(device)
print(f"Model loaded on {device}")
print(f"Parameters: {sum(p.numel() for p in model.parameters()):,}")

## 8 â€” Training Configuration

In [None]:
OUTPUT_DIR = "./distilbert-phishing"

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    learning_rate=2e-5,
    weight_decay=0.01,
    warmup_ratio=0.1,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    fp16=torch.cuda.is_available(),  # mixed precision on GPU
    report_to="none",  # disable W&B / MLflow logging
    seed=RANDOM_SEED,
)

print("Training arguments configured.")

## 9 â€” Metrics Callback

In [None]:
def compute_metrics(eval_pred):
    """Compute accuracy, precision, recall, and F1 for the Trainer."""
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)

    acc = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, average="binary"
    )

    return {
        "accuracy": acc,
        "precision": precision,
        "recall": recall,
        "f1": f1,
    }

## 10 â€” Train

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    processing_class=tokenizer,
    compute_metrics=compute_metrics,
)

print("Starting training...\n")
train_result = trainer.train()

print(f"\nTraining complete!")
print(f"Total steps: {train_result.global_step}")
print(f"Training loss: {train_result.training_loss:.4f}")

## 11 â€” Evaluate on Validation Set

In [None]:
eval_results = trainer.evaluate()

print("=" * 40)
print("Validation Results")
print("=" * 40)
for key, value in eval_results.items():
    if key.startswith("eval_"):
        name = key.replace("eval_", "").capitalize()
        print(f"  {name:>12s}: {value:.4f}")
print("=" * 40)

## 12 â€” Confusion Matrix & Classification Report

In [None]:
# Get predictions on the validation set
preds_output = trainer.predict(val_dataset)
y_pred = np.argmax(preds_output.predictions, axis=-1)
y_true = val_df["label"].values

# Classification report
target_names = ["Legitimate (0)", "Phishing (1)"]
print(classification_report(y_true, y_pred, target_names=target_names))

# Confusion matrix heatmap
cm = confusion_matrix(y_true, y_pred)
fig, ax = plt.subplots(figsize=(5, 4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=target_names, yticklabels=target_names, ax=ax)
ax.set_xlabel("Predicted")
ax.set_ylabel("Actual")
ax.set_title("Confusion Matrix")
plt.tight_layout()
plt.show()

## 13 â€” Save Model & Tokenizer

In [None]:
SAVE_DIR = "./distilbert-phishing-final"

trainer.save_model(SAVE_DIR)
tokenizer.save_pretrained(SAVE_DIR)

print(f"Model and tokenizer saved to: {SAVE_DIR}")
print(f"Contents: {os.listdir(SAVE_DIR)}")

## 14 â€” Inference: Predict on New Emails

Use the trained model to classify any email text.

In [None]:
def predict_email(text: str, model=model, tokenizer=tokenizer) -> dict:
    """
    Classify a single email as phishing or legitimate.

    Returns:
        dict with 'label' (0 or 1), 'label_name', and 'confidence'.
    """
    model.eval()
    inputs = tokenizer(
        text,
        return_tensors="pt",
        padding="max_length",
        truncation=True,
        max_length=MAX_LENGTH,
    ).to(device)

    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.softmax(outputs.logits, dim=-1)
        pred_label = torch.argmax(probs, dim=-1).item()
        confidence = probs[0][pred_label].item()

    label_name = "Phishing" if pred_label == 1 else "Legitimate"
    return {
        "label": pred_label,
        "label_name": label_name,
        "confidence": round(confidence, 4),
    }

In [None]:
# â”€â”€ Test with sample emails â”€â”€

test_emails = [
    "Dear user, your account has been compromised. Click here immediately to verify your identity: http://totallylegit-bank.com/login",
    "Hi team, please find attached the Q3 financial report. Let me know if you have any questions. Best regards, Sarah",
    "URGENT: You have won a $1,000,000 lottery! Claim your prize now by sending your bank details to claim@prize-winner.net",
    "Meeting reminder: Project sync tomorrow at 10 AM in Conference Room B. Agenda attached.",
]

print("=" * 60)
print("Single Email Predictions")
print("=" * 60)
for i, email in enumerate(test_emails, 1):
    result = predict_email(email)
    status = "ðŸš¨" if result["label"] == 1 else "âœ…"
    print(f"\n{status} Email {i}:")
    print(f"   Text:       {email[:80]}...")
    print(f"   Prediction: {result['label_name']}")
    print(f"   Confidence: {result['confidence']:.2%}")
print("\n" + "=" * 60)

## 15 â€” (Optional) Download Model from Colab

In [None]:
# Zip and download the saved model
# Uncomment the lines below when running in Colab

# import shutil
# shutil.make_archive("distilbert-phishing-final", "zip", SAVE_DIR)
# from google.colab import files
# files.download("distilbert-phishing-final.zip")
# print("Download started!")