In [2]:
# Install required packages (Kaggle doesn't have the latest transformers by default)
!pip install -q transformers>=4.40.0
!pip install -q torch>=2.1.0

# Set environment variable to avoid torchvision issues
import os
os.environ['TRANSFORMERS_NO_TORCHVISION'] = '1'

# Kaggle GPU setup
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

torch._dynamo.config.suppress_errors = True


You should consider upgrading via the 'C:\Users\barsa\Documents\Projects\Social-Engineering-Attack-Prevention-in-Coorporate-\GNNProject\venv\Scripts\python.exe -m pip install --upgrade pip' command.
You should consider upgrading via the 'C:\Users\barsa\Documents\Projects\Social-Engineering-Attack-Prevention-in-Coorporate-\GNNProject\venv\Scripts\python.exe -m pip install --upgrade pip' command.


CUDA available: True
GPU: NVIDIA GeForce RTX 5060 Laptop GPU


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import AutoModel, AutoTokenizer
import numpy as np
import gc
from tqdm import tqdm


# ====================================================
# 1. Co-Attention Layer
# ====================================================
class CoAttentionLayer(nn.Module):
    def __init__(self, hidden_size=768, k=384):
        super().__init__()
        self.W_l = nn.Linear(hidden_size, hidden_size, bias=False)
        self.W_s = nn.Linear(hidden_size, k, bias=False)
        self.W_c = nn.Linear(hidden_size, k, bias=False)
        self.w_hs = nn.Linear(k, 1, bias=False)
        self.w_hc = nn.Linear(k, 1, bias=False)

    def forward(self, C, S):
        C_t = self.W_l(C)
        F = torch.tanh(torch.bmm(C_t, S.transpose(1, 2)))

        W_s_S = self.W_s(S)
        W_c_C = self.W_c(C)

        W_c_C_F = torch.bmm(W_c_C.transpose(1, 2), F)
        H_s = torch.tanh(W_s_S.transpose(1, 2) + W_c_C_F)

        W_s_S_F_T = torch.bmm(W_s_S.transpose(1, 2), F.transpose(1, 2))
        H_c = torch.tanh(W_c_C.transpose(1, 2) + W_s_S_F_T)

        a_s = torch.softmax(self.w_hs(H_s.transpose(1, 2)).squeeze(-1), dim=1)
        a_c = torch.softmax(self.w_hc(H_c.transpose(1, 2)).squeeze(-1), dim=1)

        s_hat = torch.bmm(a_s.unsqueeze(1), S).squeeze(1)
        c_hat = torch.bmm(a_c.unsqueeze(1), C).squeeze(1)

        return torch.cat([s_hat, c_hat], dim=1)


# ====================================================
# 2. ModernBERT + Co-Attention Model
# ====================================================
class ModernBERTCoAttentionModel(nn.Module):
    def __init__(self, model_name="answerdotai/ModernBERT-base", k=384, dropout=0.2):
        super().__init__()
        self.bert = AutoModel.from_pretrained(model_name)
        self.bert.gradient_checkpointing_enable()  # VRAM saver
        self.hidden_size = self.bert.config.hidden_size
        self.co_attention = CoAttentionLayer(self.hidden_size, k)

        self.classifier = nn.Sequential(
            nn.Linear(2 * self.hidden_size, 512),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.LayerNorm(512),
            nn.Linear(512, 1),
            nn.Sigmoid(),
        )

    def forward(self, ids1, mask1, ids2, mask2):
        with torch.cuda.amp.autocast():
            C = self.bert(ids1, attention_mask=mask1).last_hidden_state
            S = self.bert(ids2, attention_mask=mask2).last_hidden_state
            z = self.co_attention(C, S)
            return self.classifier(z)

    def freeze_bert(self):
        for p in self.bert.parameters():
            p.requires_grad = False

    def unfreeze_bert(self):
        for p in self.bert.parameters():
            p.requires_grad = True


  from .autonotebook import tqdm as notebook_tqdm


In [5]:
class CoAttentionLayer(nn.Module):
    def __init__(self, hidden_size=768, k=384):
        super().__init__()
        self.W_l = nn.Linear(hidden_size, hidden_size, bias=False)
        self.W_s = nn.Linear(hidden_size, k, bias=False)
        self.W_c = nn.Linear(hidden_size, k, bias=False)
        self.w_hs = nn.Linear(k, 1, bias=False)
        self.w_hc = nn.Linear(k, 1, bias=False)

    def forward(self, C, S):
        C_t = self.W_l(C)
        F = torch.tanh(torch.bmm(C_t, S.transpose(1, 2)))
        W_s_S = self.W_s(S)
        W_c_C = self.W_c(C)
        W_c_C_F = torch.bmm(W_c_C.transpose(1, 2), F)
        H_s = torch.tanh(W_s_S.transpose(1, 2) + W_c_C_F)
        W_s_S_F_T = torch.bmm(W_s_S.transpose(1, 2), F.transpose(1, 2))
        H_c = torch.tanh(W_c_C.transpose(1, 2) + W_s_S_F_T)
        a_s = torch.softmax(self.w_hs(H_s.transpose(1, 2)).squeeze(-1), dim=1)
        a_c = torch.softmax(self.w_hc(H_c.transpose(1, 2)).squeeze(-1), dim=1)
        s_hat = torch.bmm(a_s.unsqueeze(1), S).squeeze(1)
        c_hat = torch.bmm(a_c.unsqueeze(1), C).squeeze(1)
        return torch.cat([s_hat, c_hat], dim=1)


# ====================================================
# 2. ModernBERT + Co-Attention Model (no sigmoid)
# ====================================================
class ModernBERTCoAttentionModel(nn.Module):
    def __init__(self, model_name="answerdotai/ModernBERT-base", k=384, dropout=0.2):
        super().__init__()
        self.bert = AutoModel.from_pretrained(model_name)
        self.bert.gradient_checkpointing_enable()
        self.hidden_size = self.bert.config.hidden_size
        self.co_attention = CoAttentionLayer(self.hidden_size, k)
        self.classifier = nn.Sequential(
            nn.Linear(2 * self.hidden_size, 512),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.LayerNorm(512),
            nn.Linear(512, 1),
        )

    def forward(self, ids1, mask1, ids2, mask2):
        with torch.cuda.amp.autocast():
            C = self.bert(ids1, attention_mask=mask1).last_hidden_state
            S = self.bert(ids2, attention_mask=mask2).last_hidden_state
            z = self.co_attention(C, S)
            return self.classifier(z)

    def freeze_bert(self):
        for p in self.bert.parameters():
            p.requires_grad = False

    def unfreeze_bert(self):
        for p in self.bert.parameters():
            p.requires_grad = True



class SocialEngineeringDataset(Dataset):
    def __init__(self, subs, bodies, labels, tokenizer, max_sub_len=256, max_body_len=512):
        self.subs = subs
        self.bodies = bodies
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_sub_len = max_sub_len
        self.max_body_len = max_body_len

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        t1 = self.tokenizer(
            self.subs[idx],
            padding="max_length",
            truncation=True,
            max_length=self.max_sub_len,
            return_tensors="pt",
        )
        t2 = self.tokenizer(
            self.bodies[idx],
            padding="max_length",
            truncation=True,
            max_length=self.max_body_len,
            return_tensors="pt",
        )
        return {
            "input_ids_1": t1["input_ids"].squeeze(0),
            "attention_mask_1": t1["attention_mask"].squeeze(0),
            "input_ids_2": t2["input_ids"].squeeze(0),
            "attention_mask_2": t2["attention_mask"].squeeze(0),
            "label": torch.tensor(self.labels[idx], dtype=torch.float32),
        }



def create_big_social_engineering_data():

    
    legitimate_subjects = [
        "Weekly Team Meeting - Thursday 2PM",
        "Q4 Budget Review Documents",
        "IT Security Training Completion Certificate",
        "Project Alpha Status Update",
        "Employee Benefits Open Enrollment Reminder",
        "Monthly Sales Report - October 2024",
        "Office Holiday Party Planning Committee",
        "Performance Review Schedule - November",
        "New Employee Orientation Materials",
        "System Maintenance Window This Weekend",
        "Conference Room Booking Confirmation",
        "Quarterly All-Hands Meeting Invitation",
    ]
    legitimate_bodies = [
        "Hi team, this is a reminder about our weekly meeting scheduled for Thursday at 2:00 PM in Conference Room B. We'll be discussing the current project milestones and next week's deliverables. Please bring your status reports. Thanks, Sarah from HR.",
        "Please find attached the Q4 budget review documents that were discussed in yesterday's management meeting. The deadline for departmental feedback is November 15th. Contact me if you have questions about the allocation details. Best regards, Finance Department.",
        "Congratulations on completing the mandatory IT security training. Your certificate is attached to this email. Please save it for your records as it will be needed for your annual performance review. IT Security Team.",
        "Project Alpha is currently 75% complete and on schedule for the December 1st deadline. All major milestones have been achieved except for the final testing phase. The development team expects to begin testing next Monday. Project Manager.",
        "Open enrollment for employee benefits begins November 1st and ends November 30th. Please review your current coverage and make any necessary changes during this period. HR will host information sessions every Tuesday this month. Human Resources.",
        "The October sales report shows a 12% increase compared to September. Regional performance varied with the Northeast leading at 18% growth. Detailed breakdowns are available on the company portal. Sales Operations Team.",
        "We're forming a committee to plan the annual holiday party scheduled for December 15th. If you're interested in helping with planning, please reply by Friday. We need volunteers for decorations, catering coordination, and entertainment. Social Committee.",
        "Performance reviews are scheduled for the week of November 20-24. You'll receive a calendar invitation with your specific time slot. Please complete the self-evaluation form before your scheduled meeting. Management Team.",
        "Welcome materials for new employees starting next week are now available on the company intranet. Managers should review the onboarding checklist to ensure all items are completed during the first week. HR Department.",
        "Scheduled system maintenance will occur this Saturday from 2:00 AM to 6:00 AM EST. Email and file servers will be unavailable during this time. Please plan accordingly and save your work before Friday evening. IT Operations.",
        "Your conference room booking for Meeting Room C on November 10th from 1:00-3:00 PM has been confirmed. The room includes video conferencing equipment and accommodates 12 people. Facilities Management.",
        "You're invited to the quarterly all-hands meeting on Friday, November 17th at 10:00 AM in the main auditorium. The CEO will present Q3 results and discuss strategic initiatives for 2024. Executive Assistant.",
    ]

    
    malicious_subjects = [
        "URGENT: Verify Your Account Within 24 Hours",
        "Your Password Will Expire Today - Action Required",
        "Security Alert: Suspicious Login Detected",
        "Congratulations! You've Won $50,000",
        "Important: Update Your Banking Information",
        "Final Notice: Unpaid Invoice #INV-2024-5847",
        "Re: Confidential Document Review Needed",
        "IT Help Desk: Please Confirm Your Credentials",
        "Payroll Issue: Missing Direct Deposit Information",
        "CEO Request: Wire Transfer Authorization Needed",
        "System Upgrade: Temporary Password Reset Required",
        "HR Investigation: Employee Complaint Filed Against You",
    ]
    malicious_bodies = [
        "Your company email account has been flagged for suspicious activity. To prevent account suspension, you must verify your identity immediately. Click here to confirm your login credentials and maintain access to company systems. This verification must be completed within 24 hours or your account will be permanently disabled. IT Security Team (Note: This is not from your real IT department).",
        "This is an automated reminder that your network password expires today at 5:00 PM. To continue accessing company resources, you must update your password immediately. Click the link below to access the password reset portal and enter your current credentials. Failure to update will result in account lockout. System Administrator (Spoofed sender).",
        "We detected an unusual login attempt to your account from an unrecognized device in Russia at 3:42 AM EST. If this was not you, please click here immediately to secure your account and change your password. Your account has been temporarily locked for security purposes. Security Operations Center (Fake department).",
        "Congratulations! Your email address was randomly selected in our international lottery drawing. You have won $50,000 USD. To claim your prize, please provide your full name, address, phone number, and bank account details. Processing fee of $500 required. Contact our claims department immediately. International Lottery Commission (Scam organization).",
        "Due to new federal banking regulations, we need to update your direct deposit information immediately. Please click here to verify your bank account details and routing number. This update must be completed by end of business today to ensure your next paycheck is processed correctly. Payroll Services (Impersonation attempt).",
        "This is your final notice for unpaid invoice #INV-2024-5847 in the amount of $2,847.50. Payment is now 60 days overdue. To avoid legal action and additional fees, please remit payment immediately via wire transfer. Contact our collections department at the number below. Accounts Receivable Department (Fake invoice scam).",
        "I'm forwarding a confidential document that requires your immediate review and approval. Due to the sensitive nature, I cannot send it through normal channels. Please click this secure link and enter your login credentials to access the document. This matter is time-sensitive and confidential. Executive Assistant (CEO impersonation).",
        "We're conducting routine security updates and need to verify all user accounts. Please reply with your username, password, and security question answers to confirm your account is legitimate. Accounts that don't respond within 48 hours will be deactivated for security reasons. Help Desk Support (Credential harvesting).",
        "Our payroll system shows missing direct deposit information for your account. Your next paycheck cannot be processed without this update. Please click here to submit your banking details immediately. Contact payroll if you have questions about this urgent matter. Payroll Department (Banking information theft).",
        "I need you to process an urgent wire transfer of $25,000 to our new vendor for the Johnson project. Due to the time-sensitive nature, please initiate this transfer today and send confirmation. I'm in meetings all day but this cannot wait. Thanks for your quick action on this. CEO (Business Email Compromise).",
        "As part of our system upgrade, all users must reset their passwords using the new security portal. Click here and enter your current password to generate a new one. This upgrade improves security and must be completed by all employees before Monday. System Administrator (Password harvesting).",
        "An anonymous complaint has been filed against you regarding workplace conduct. HR requires your immediate response to these allegations. Click here to view the complaint details and submit your response. This matter is confidential and time-sensitive. Human Resources Department (Social engineering for document access).",
    ]

    email_subjects = legitimate_subjects + malicious_subjects
    email_bodies = legitimate_bodies + malicious_bodies
    labels = [0] * len(legitimate_subjects) + [1] * len(malicious_subjects)

    return email_subjects, email_bodies, labels



def train_epoch(model, loader, opt, crit, device, scaler):
    model.train()
    total_loss, correct, total = 0, 0, 0
    for b in tqdm(loader, desc="Train", leave=False):
        ids1 = b["input_ids_1"].to(device)
        mask1 = b["attention_mask_1"].to(device)
        ids2 = b["input_ids_2"].to(device)
        mask2 = b["attention_mask_2"].to(device)
        y = b["label"].to(device)

        opt.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast():
            out = model(ids1, mask1, ids2, mask2).view(-1)   
            loss = crit(out, y)
        scaler.scale(loss).backward()
        scaler.step(opt)
        scaler.update()

        preds = (torch.sigmoid(out) > 0.5).float()
        total_loss += loss.item()
        correct += (preds == y).sum().item()
        total += y.size(0)
        torch.cuda.empty_cache()
    return total_loss / len(loader), correct / total


@torch.no_grad()
def evaluate(model, loader, crit, device):
    model.eval()
    total_loss, correct, total = 0, 0, 0
    for b in tqdm(loader, desc="Eval", leave=False):
        ids1 = b["input_ids_1"].to(device)
        mask1 = b["attention_mask_1"].to(device)
        ids2 = b["input_ids_2"].to(device)
        mask2 = b["attention_mask_2"].to(device)
        y = b["label"].to(device)
        with torch.cuda.amp.autocast():
            out = model(ids1, mask1, ids2, mask2).view(-1) 
            loss = crit(out, y)
        preds = (torch.sigmoid(out) > 0.5).float()
        total_loss += loss.item()
        correct += (preds == y).sum().item()
        total += y.size(0)
    return total_loss / len(loader), correct / total




def test_single_email(model, tokenizer, device, subject, body):
    model.eval()
    t1 = tokenizer(subject, return_tensors="pt", padding="max_length", truncation=True, max_length=256)
    t2 = tokenizer(body, return_tensors="pt", padding="max_length", truncation=True, max_length=512)
    with torch.no_grad(), torch.cuda.amp.autocast():
        out = model(
            t1["input_ids"].to(device),
            t1["attention_mask"].to(device),
            t2["input_ids"].to(device),
            t2["attention_mask"].to(device),
        ).view(-1)
        prob = torch.sigmoid(out).item()
    label = "⚠️ Social Engineering" if prob > 0.5 else " Safe Email"
    print(f"\nSubject: {subject}\nBody: {body}\nPrediction: {label}  (confidence={prob:.3f})")
    return prob, label


def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
    subs, bodies, labels = create_big_social_engineering_data()
    split = int(0.75 * len(labels))
    train_data = SocialEngineeringDataset(subs[:split], bodies[:split], labels[:split], tokenizer)
    test_data = SocialEngineeringDataset(subs[split:], bodies[split:], labels[split:], tokenizer)
    train_loader = DataLoader(train_data, batch_size=2, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=2)

    model = ModernBERTCoAttentionModel().to(device)
    print(f"Total params: {sum(p.numel() for p in model.parameters())/1e6:.2f}M")

    
    model.freeze_bert()
    opt = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=2e-3)
    crit = nn.BCEWithLogitsLoss()
    scaler = torch.cuda.amp.GradScaler()

    print("\nStage 1: Training frozen BERT for 3 epochs...")
    for e in range(10):
        tr_loss, tr_acc = train_epoch(model, train_loader, opt, crit, device, scaler)
        val_loss, val_acc = evaluate(model, test_loader, crit, device)
        print(f"Epoch {e+1}: Train {tr_loss:.3f}/{tr_acc:.3f}, Val {val_loss:.3f}/{val_acc:.3f}")

    
    model.unfreeze_bert()
    opt = optim.AdamW(model.parameters(), lr=2e-5)
    print("\nStage 2: Fine-tuning full BERT for 2 epochs...")
    for e in range(10):
        tr_loss, tr_acc = train_epoch(model, train_loader, opt, crit, device, scaler)
        val_loss, val_acc = evaluate(model, test_loader, crit, device)
        print(f"Epoch {e+1}: Train {tr_loss:.3f}/{tr_acc:.3f}, Val {val_loss:.3f}/{val_acc:.3f}")

    torch.save(model.state_dict(), "modernbert_coattention_local.pt")
    print("\n Training complete. Model saved as modernbert_coattention_local.pt")

    
    subject = "ALL daily charts and matrices as hot links 5/1"
    body = """The information contained herein is based on sources that we believe to be
reliable, but we do not represent that it is accurate or complete.  Nothing
contained herein should be considered as an offer to sell or a solicitation
of an offer to buy any financial instruments discussed herein.  Any
opinions expressed herein are solely those of the author.  As such, they
may differ in material respects from those of, or expressed or published by
on behalf of Carr Futures or its officers, directors, employees or
affiliates.  , 2001 Carr Futures


The charts are now available on the web by clicking on the hot link(s)
contained in this email. If for any reason you are unable to receive the
charts via the web, please contact me via email and I will email the charts
to you as attachments.


Crude     http://www.carrfut.com/research/Energy1/crude48.pdf
Natural Gas     http://www.carrfut.com/research/Energy1/ngas48.pdf
Distillate     http://www.carrfut.com/research/Energy1/hoil48.pdf
Unleaded     http://www.carrfut.com/research/Energy1/unlded48.pdf

Nat Gas Strip Matrix
http://www.carrfut.com/research/Energy1/StripmatrixNG48.pdf
Nat Gas Spread Matrix
http://www.carrfut.com/research/Energy1/SpreadmatrixNG48.pdf

Crude and Products Spread Matrix
http://www.carrfut.com/research/Energy1/SpreadmatrixCL48.pdf"""
    test_single_email(model, tokenizer, device, subject, body)


if __name__ == "__main__":
    torch.backends.cudnn.benchmark = True
    torch.cuda.empty_cache()
    gc.collect()
    main()


Using device: cuda


  scaler = torch.cuda.amp.GradScaler()


Total params: 150.98M

Stage 1: Training frozen BERT for 3 epochs...


  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
                                                   

Epoch 1: Train 0.765/0.556, Val 0.328/1.000


                                                    

Epoch 2: Train 0.398/0.778, Val 1.083/0.500


                                                    

Epoch 3: Train 0.094/1.000, Val 1.642/0.500


                                                    

Epoch 4: Train 0.140/0.944, Val 0.064/1.000


                                                    

Epoch 5: Train 0.139/0.944, Val 2.418/0.333


                                                    

Epoch 6: Train 0.147/0.944, Val 0.270/0.833


                                                    

Epoch 7: Train 0.232/0.833, Val 4.316/0.333


                                                    

Epoch 8: Train 0.170/0.944, Val 1.658/0.500


                                                    

Epoch 9: Train 0.009/1.000, Val 0.283/0.833


                                                    

Epoch 10: Train 0.020/1.000, Val 0.526/0.833

Stage 2: Fine-tuning full BERT for 2 epochs...


                                                    

Epoch 1: Train 0.004/1.000, Val 0.812/0.833


                                                    

Epoch 2: Train 0.006/1.000, Val 4.987/0.333


                                                    

Epoch 3: Train 0.043/0.944, Val 4.924/0.333


                                                    

Epoch 4: Train 0.000/1.000, Val 0.002/1.000


                                                    

Epoch 5: Train 0.032/1.000, Val 0.009/1.000


                                                    

Epoch 6: Train 0.000/1.000, Val 1.985/0.500


                                                    

Epoch 7: Train 0.000/1.000, Val 3.503/0.500


                                                    

Epoch 8: Train 0.000/1.000, Val 3.923/0.500


                                                    

Epoch 9: Train 0.000/1.000, Val 4.076/0.500


                                                    

Epoch 10: Train 0.000/1.000, Val 4.119/0.500

 Training complete. Model saved as modernbert_coattention_local.pt

Subject: ALL daily charts and matrices as hot links 5/1
Body: The information contained herein is based on sources that we believe to be
reliable, but we do not represent that it is accurate or complete.  Nothing
contained herein should be considered as an offer to sell or a solicitation
of an offer to buy any financial instruments discussed herein.  Any
opinions expressed herein are solely those of the author.  As such, they
may differ in material respects from those of, or expressed or published by
on behalf of Carr Futures or its officers, directors, employees or
affiliates.  , 2001 Carr Futures


The charts are now available on the web by clicking on the hot link(s)
contained in this email. If for any reason you are unable to receive the
charts via the web, please contact me via email and I will email the charts
to you as attachments.


Crude     http://www.carrfut.com/

  with torch.no_grad(), torch.cuda.amp.autocast():
