In [1]:
!pip install -q transformers datasets accelerate peft bitsandbytes trl pandas scikit-learn

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.7/60.7 MB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m540.5/540.5 kB[0m [31m22.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from datasets import Dataset

print("Loading datasets...")
enron_df = pd.read_csv('/content/enron_preprocessed_3k.csv')
combined_df = pd.read_csv('/content/combined_preprocessed_2k.csv')

print(f"Enron: {len(enron_df)} emails")
print(f"Combined: {len(combined_df)} emails")

train_df, test_enron_df = train_test_split(
    enron_df,
    test_size=0.2,
    random_state=42,
    stratify=enron_df['label']
)

print(f"\nTraining: {len(train_df)} emails")
print(f"Test (Enron): {len(test_enron_df)} emails")
print(f"Test (Combined): {len(combined_df)} emails")

Loading datasets...
Enron: 3000 emails
Combined: 2000 emails

Training: 2400 emails
Test (Enron): 600 emails
Test (Combined): 2000 emails


In [3]:
def format_prompt(email_text, label=None):
    """Format email for instruction tuning"""
    prompt = f"""Classify this email as 'phishing' or 'legitimate'.

Email:
{email_text}

Classification:"""

    if label is not None:
        classification = "phishing" if label == 1 else "legitimate"
        return prompt + f" {classification}"
    return prompt

train_dataset = Dataset.from_pandas(train_df)

print(f"Training dataset: {len(train_dataset)} examples")
print(f"\nExample (raw):\n{train_dataset[0]['text'][:300]}...")

Training dataset: 2400 examples

Example (raw):
its illegal to use hacked mlcrosoft office its illegal to use hacked operating systems micros 0 ft , adobe , macromedia and many more ! check out these amazing offers : microsoft office xp pro 2002 . . . . . . . . . 60 corel draw graphics suite 11 . . . . . . . . . 60 windows xp professional 2002 . ...


In [4]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

model_name = "mistralai/Mistral-7B-Instruct-v0.2"

print(f"Loading model: {model_name}")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

print("Model loaded successfully!")

Loading model: mistralai/Mistral-7B-Instruct-v0.2


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]



model.safetensors.index.json: 0.00B [00:00, ?B/s]

Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

Loading weights:   0%|          | 0/291 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

Model loaded successfully!


In [5]:
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"Trainable parameters: {trainable_params:,} ({100 * trainable_params / total_params:.2f}%)")

Trainable parameters: 41,943,040 (1.11%)


In [6]:
from transformers import TrainingArguments
from trl import SFTTrainer

training_args = TrainingArguments(
    output_dir="./phishing-finetuned",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    fp16=False, # Disable FP16
    bf16=True,  # Enable BF16 for T4 GPUs
    logging_steps=10,
    save_strategy="epoch",
    optim="paged_adamw_8bit",
    warmup_steps=10,
    max_steps=200,
    report_to="none"
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    args=training_args,
    formatting_func=lambda example: format_prompt(example['text'], example['label'])
)

print("Trainer configured successfully!")

Applying formatting function to train dataset:   0%|          | 0/2400 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/2400 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/2400 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/2400 [00:00<?, ? examples/s]

Trainer configured successfully!


In [None]:
import time

print("Starting training...")
print("="*60)

start_time = time.time()

trainer.train()

end_time = time.time()
training_time = (end_time - start_time) / 60

print("="*60)
print(f"Training completed in {training_time:.2f} minutes!")

Starting training...


  return fn(*args, **kwargs)


Step,Training Loss
10,2.548517
20,2.237639


In [None]:
model.save_pretrained("./phishing-finetuned-final")
tokenizer.save_pretrained("./phishing-finetuned-final")

print("Model saved to ./phishing-finetuned-final")

In [None]:
def classify_email(email_text, model, tokenizer):
    prompt = format_prompt(email_text)

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=10,
            temperature=0.1,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = response[len(prompt):].strip().lower()

    if "phishing" in response:
        return 1
    elif "legitimate" in response:
        return 0
    else:
        return None  # Failed to classify

def evaluate_dataset(df, model, tokenizer, dataset_name):
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
    import time

    print(f"\nEvaluating on {dataset_name}...")

    predictions = []
    true_labels = []
    failed = 0

    start_time = time.time()

    for idx, row in df.iterrows():
        pred = classify_email(row['text'], model, tokenizer)

        if pred is not None:
            predictions.append(pred)
            true_labels.append(row['label'])
        else:
            failed += 1

        if (idx + 1) % 50 == 0:
            print(f"Processed {idx + 1}/{len(df)} emails...")

    end_time = time.time()
    total_time = end_time - start_time

    accuracy = accuracy_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions, zero_division=0)
    recall = recall_score(true_labels, predictions, zero_division=0)
    f1 = f1_score(true_labels, predictions, zero_division=0)
    speed = len(df) / total_time
    success_rate = (len(predictions) / len(df)) * 100

    print(f"\n{dataset_name} Results:")
    print("="*60)
    print(f"Accuracy:      {accuracy*100:.2f}%")
    print(f"Precision:     {precision*100:.2f}%")
    print(f"Recall:        {recall*100:.2f}%")
    print(f"F1 Score:      {f1*100:.2f}%")
    print(f"Speed:         {speed:.3f} emails/second")
    print(f"Success Rate:  {success_rate:.2f}% ({len(predictions)}/{len(df)})")
    print(f"Failed:        {failed}")
    print("="*60)

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'speed': speed,
        'success_rate': success_rate,
        'failed': failed
    }

In [None]:
test_enron_sample = test_enron_df.sample(n=min(100, len(test_enron_df)), random_state=42)

enron_results = evaluate_dataset(test_enron_sample, model, tokenizer, "Enron Test Set")

In [None]:
combined_sample = combined_df.sample(n=min(100, len(combined_df)), random_state=42)

combined_results = evaluate_dataset(combined_sample, model, tokenizer, "Combined Dataset")

In [None]:
import json
from google.colab import files

results = {
    'enron': enron_results,
    'combined': combined_results,
    'training_time_minutes': training_time
}

with open('finetuned_results.json', 'w') as f:
    json.dump(results, f, indent=2)

print("Results saved to finetuned_results.json")

files.download('finetuned_results.json')

In [None]:
model.save_pretrained("./phishing-finetuned-final")
tokenizer.save_pretrained("./phishing-finetuned-final")

print("Model saved to ./phishing-finetuned-final")