# **BERT**

In [None]:
import random
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import get_linear_schedule_with_warmup
from torch.optim import AdamW
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
import torch.nn as nn

# -----------------------
# Reproducibility helpers
# -----------------------
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

# -----------------------
# Config
# -----------------------
CSV_PATH = r"C:\Users\sk304939\OneDrive - California State University, Northridge\Documents\Thesis\My_Project\Twitter Bot - Final code\Previous EDA code\bot_detection_data.csv"  # Path to the uploaded file
TEXT_COL = "Tweet"
LABEL_COL = "Bot Label"
MODEL_NAME = "bert-base-uncased"
MAX_LEN = 128
TRAIN_BATCH_SIZE = 16
VAL_BATCH_SIZE = 32
EPOCHS = 3
LEARNING_RATE = 2e-5
WARMUP_RATIO = 0.1   # 10% warmup
WEIGHT_DECAY = 0.01

# -----------------------
# Device setup
# -----------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# -----------------------
# Load & Prep Data
# -----------------------
df = pd.read_csv(CSV_PATH)

df = df.head(5000)  # Use the first 5,000 samples

# Keep rows with actual text
df = df[df[TEXT_COL].notna()]
df = df[df[TEXT_COL].str.strip().str.lower() != "nil"]

# Ensure labels are numeric (if strings, encode them)
if not np.issubdtype(df[LABEL_COL].dtype, np.number):
    le = LabelEncoder()
    df[LABEL_COL] = le.fit_transform(df[LABEL_COL].astype(str))
    class_names = [str(c) for c in le.classes_]
else:
    # Make readable class names from unique numeric labels
    uniq = sorted(df[LABEL_COL].unique().tolist())
    class_names = [str(c) for c in uniq]

num_labels = len(set(df[LABEL_COL].tolist()))
print(f"Detected {num_labels} classes:", class_names)

# Train/Val/Test split (80/10/10)
X_train, X_temp, y_train, y_temp = train_test_split(
    df[TEXT_COL].tolist(), df[LABEL_COL].tolist(),
    test_size=0.2, random_state=42, stratify=df[LABEL_COL].tolist()
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print(f"Train: {len(X_train)} | Val: {len(X_val)} | Test: {len(X_test)}")

# -----------------------
# Dataset / Dataloader
# -----------------------
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)

class TextDS(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tok = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = self.tok(
            str(self.texts[idx]),
            padding="max_length",
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt"
        )
        item = {k: v.squeeze(0) for k, v in enc.items()}
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

train_ds = TextDS(X_train, y_train, tokenizer, MAX_LEN)
val_ds   = TextDS(X_val,   y_val,   tokenizer, MAX_LEN)
test_ds  = TextDS(X_test,  y_test,  tokenizer, MAX_LEN)

train_loader = DataLoader(train_ds, batch_size=TRAIN_BATCH_SIZE, shuffle=True,  drop_last=False)
val_loader   = DataLoader(val_ds,   batch_size=VAL_BATCH_SIZE,   shuffle=False, drop_last=False)
test_loader  = DataLoader(test_ds,  batch_size=VAL_BATCH_SIZE,   shuffle=False, drop_last=False)

# -----------------------
# Model, Optimizer, Scheduler
# -----------------------
model = BertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=num_labels)
model.to(device)
print(model)  # print model architecture

# Optimizer and Scheduler
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

total_steps = len(train_loader) * EPOCHS
warmup_steps = int(WARMUP_RATIO * total_steps)
scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=warmup_steps, num_training_steps=total_steps
)

# -----------------------
# Train / Eval Loop
# -----------------------
def run_epoch(dataloader, train: bool = True):
    model.train() if train else model.eval()
    total_loss = 0.0
    total_correct = 0
    total_count = 0

    for batch in dataloader:
        input_ids      = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels         = batch["labels"].to(device)

        with torch.set_grad_enabled(train):
            outputs = model(input_ids=input_ids,
                            attention_mask=attention_mask,
                            labels=labels)
            loss = outputs.loss
            logits = outputs.logits

            if train:
                optimizer.zero_grad()
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()
                scheduler.step()

        total_loss += loss.item() * labels.size(0)
        preds = torch.argmax(logits, dim=-1)
        total_correct += (preds == labels).sum().item()
        total_count += labels.size(0)

    avg_loss = total_loss / total_count
    avg_acc  = total_correct / total_count
    return avg_loss, avg_acc

train_losses, val_losses = [], []
train_accs, val_accs = [], []

for epoch in range(1, EPOCHS + 1):
    tr_loss, tr_acc = run_epoch(train_loader, train=True)
    va_loss, va_acc = run_epoch(val_loader,   train=False)

    train_losses.append(tr_loss); val_losses.append(va_loss)
    train_accs.append(tr_acc);    val_accs.append(va_acc)

    print(f"Epoch {epoch}/{EPOCHS} | "
          f"Train Loss: {tr_loss:.4f} Acc: {tr_acc:.4f} | "
          f"Val Loss: {va_loss:.4f} Acc: {va_acc:.4f}")

# -----------------------
# Test evaluation
# -----------------------
model.eval()
all_logits = []
all_labels = []

with torch.no_grad():
    for batch in test_loader:
        input_ids      = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels         = batch["labels"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        all_logits.append(logits.cpu().numpy())
        all_labels.append(labels.cpu().numpy())

all_logits = np.concatenate(all_logits, axis=0)
all_labels = np.concatenate(all_labels, axis=0)

y_pred = np.argmax(all_logits, axis=1)
test_acc = accuracy_score(all_labels, y_pred)
print(f"\nTEST Accuracy: {test_acc:.4f}")

# Classification report
# If labels were encoded, class_names holds strings; otherwise make them
if len(class_names) != num_labels:
    # fallback (shouldn't happen)
    class_names = [str(i) for i in range(num_labels)]
print("\nClassification Report:\n")
print(classification_report(all_labels, y_pred, target_names=class_names, digits=4))

# -----------------------
# Plots
# -----------------------

# 1) Training vs Validation Loss
plt.figure()
plt.plot(range(1, EPOCHS+1), train_losses, label="Train Loss")
plt.plot(range(1, EPOCHS+1), val_losses,   label="Val Loss")
plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.title("Training & Validation Loss")
plt.legend(); plt.tight_layout()
plt.show()

# 2) Confusion Matrix
cm = confusion_matrix(all_labels, y_pred, labels=list(range(num_labels)))
plt.figure()
im = plt.imshow(cm, interpolation='nearest')
plt.title("Confusion Matrix")
plt.colorbar(im)
tick_marks = np.arange(num_labels)
plt.xticks(tick_marks, class_names, rotation=45, ha='right')
plt.yticks(tick_marks, class_names)
plt.xlabel("Predicted"); plt.ylabel("True")
# Annotate counts
thresh = cm.max() / 2.0
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, format(cm[i, j], 'd'),
                 ha="center", va="center",
                 color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.show()

# 3) ROC Curve (binary or multiclass One-vs-Rest)
probs = torch.softmax(torch.tensor(all_logits), dim=1).numpy()

plt.figure()
if num_labels == 2:
    fpr, tpr, _ = roc_curve(all_labels, probs[:, 1])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.3f}")
else:
    # One-vs-Rest micro-average
    y_bin = label_binarize(all_labels, classes=list(range(num_labels)))
    fpr = dict(); tpr = dict(); roc_auc = dict()
    for i in range(num_labels):
        fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], probs[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
        plt.plot(fpr[i], tpr[i], label=f"Class {class_names[i]} (AUC={roc_auc[i]:.3f})")
    # Micro-average
    fpr["micro"], tpr["micro"], _ = roc_curve(y_bin.ravel(), probs.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    plt.plot(fpr["micro"], tpr["micro"], linestyle="--", label=f"Micro Avg (AUC={roc_auc['micro']:.3f})")

plt.plot([0,1],[0,1], linestyle="--")
plt.xlabel("False Positive Rate"); plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend(loc="lower right")
plt.tight_layout()
plt.show()


# **Roberta**

In [None]:
import random
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from transformers import get_linear_schedule_with_warmup
from torch.optim import AdamW
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
import torch.nn as nn

# -----------------------
# Reproducibility helpers
# -----------------------
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

# -----------------------
# Config
# -----------------------
CSV_PATH = r"C:\Users\sk304939\OneDrive - California State University, Northridge\Documents\Thesis\My_Project\Twitter Bot - Final code\Previous EDA code\bot_detection_data.csv"  # Path to your uploaded file
TEXT_COL = "Tweet"
LABEL_COL = "Bot Label"
MODEL_NAME = "roberta-base"  # RoBERTa model
MAX_LEN = 128
TRAIN_BATCH_SIZE = 16
VAL_BATCH_SIZE = 32
EPOCHS = 3
LEARNING_RATE = 2e-5
WARMUP_RATIO = 0.1   # 10% warmup
WEIGHT_DECAY = 0.01

# -----------------------
# Device setup
# -----------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# -----------------------
# Load & Prep Data
# -----------------------
df = pd.read_csv(CSV_PATH)

df = df.head(5000)  # Use the first 5,000 samples

# Keep rows with actual text
df = df[df[TEXT_COL].notna()]
df = df[df[TEXT_COL].str.strip().str.lower() != "nil"]

# Ensure labels are numeric (if strings, encode them)
if not np.issubdtype(df[LABEL_COL].dtype, np.number):
    le = LabelEncoder()
    df[LABEL_COL] = le.fit_transform(df[LABEL_COL].astype(str))
    class_names = [str(c) for c in le.classes_]
else:
    # Make readable class names from unique numeric labels
    uniq = sorted(df[LABEL_COL].unique().tolist())
    class_names = [str(c) for c in uniq]

num_labels = len(set(df[LABEL_COL].tolist()))
print(f"Detected {num_labels} classes:", class_names)

# Train/Val/Test split (80/10/10)
X_train, X_temp, y_train, y_temp = train_test_split(
    df[TEXT_COL].tolist(), df[LABEL_COL].tolist(),
    test_size=0.2, random_state=42, stratify=df[LABEL_COL].tolist()
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print(f"Train: {len(X_train)} | Val: {len(X_val)} | Test: {len(X_test)}")

# -----------------------
# Dataset / Dataloader
# -----------------------
tokenizer = RobertaTokenizer.from_pretrained(MODEL_NAME)

class TextDS(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tok = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = self.tok(
            str(self.texts[idx]),
            padding="max_length",
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt"
        )
        item = {k: v.squeeze(0) for k, v in enc.items()}
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

train_ds = TextDS(X_train, y_train, tokenizer, MAX_LEN)
val_ds   = TextDS(X_val,   y_val,   tokenizer, MAX_LEN)
test_ds  = TextDS(X_test,  y_test,  tokenizer, MAX_LEN)

train_loader = DataLoader(train_ds, batch_size=TRAIN_BATCH_SIZE, shuffle=True,  drop_last=False)
val_loader   = DataLoader(val_ds,   batch_size=VAL_BATCH_SIZE,   shuffle=False, drop_last=False)
test_loader  = DataLoader(test_ds,  batch_size=VAL_BATCH_SIZE,   shuffle=False, drop_last=False)

# -----------------------
# Model, Optimizer, Scheduler
# -----------------------
model = RobertaForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=num_labels)
model.to(device)
print(model)  # print model architecture

# Optimizer and Scheduler
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

total_steps = len(train_loader) * EPOCHS
warmup_steps = int(WARMUP_RATIO * total_steps)
scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=warmup_steps, num_training_steps=total_steps
)

# -----------------------
# Train / Eval Loop
# -----------------------
def run_epoch(dataloader, train: bool = True):
    model.train() if train else model.eval()
    total_loss = 0.0
    total_correct = 0
    total_count = 0

    for batch in dataloader:
        input_ids      = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels         = batch["labels"].to(device)

        with torch.set_grad_enabled(train):
            outputs = model(input_ids=input_ids,
                            attention_mask=attention_mask,
                            labels=labels)
            loss = outputs.loss
            logits = outputs.logits

            if train:
                optimizer.zero_grad()
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()
                scheduler.step()

        total_loss += loss.item() * labels.size(0)
        preds = torch.argmax(logits, dim=-1)
        total_correct += (preds == labels).sum().item()
        total_count += labels.size(0)

    avg_loss = total_loss / total_count
    avg_acc  = total_correct / total_count
    return avg_loss, avg_acc

train_losses, val_losses = [], []
train_accs, val_accs = [], []

for epoch in range(1, EPOCHS + 1):
    tr_loss, tr_acc = run_epoch(train_loader, train=True)
    va_loss, va_acc = run_epoch(val_loader,   train=False)

    train_losses.append(tr_loss); val_losses.append(va_loss)
    train_accs.append(tr_acc);    val_accs.append(va_acc)

    print(f"Epoch {epoch}/{EPOCHS} | "
          f"Train Loss: {tr_loss:.4f} Acc: {tr_acc:.4f} | "
          f"Val Loss: {va_loss:.4f} Acc: {va_acc:.4f}")

# -----------------------
# Test evaluation
# -----------------------
model.eval()
all_logits = []
all_labels = []

with torch.no_grad():
    for batch in test_loader:
        input_ids      = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels         = batch["labels"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        all_logits.append(logits.cpu().numpy())
        all_labels.append(labels.cpu().numpy())

all_logits = np.concatenate(all_logits, axis=0)
all_labels = np.concatenate(all_labels, axis=0)

y_pred = np.argmax(all_logits, axis=1)
test_acc = accuracy_score(all_labels, y_pred)
print(f"\nTEST Accuracy: {test_acc:.4f}")

# Classification report
# If labels were encoded, class_names holds strings; otherwise make them
if len(class_names) != num_labels:
    # fallback (shouldn't happen)
    class_names = [str(i) for i in range(num_labels)]
print("\nClassification Report:\n")
print(classification_report(all_labels, y_pred, target_names=class_names, digits=4))

# -----------------------
# Plots
# -----------------------

# 1) Training vs Validation Loss
plt.figure()
plt.plot(range(1, EPOCHS+1), train_losses, label="Train Loss")
plt.plot(range(1, EPOCHS+1), val_losses,   label="Val Loss")
plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.title("Training & Validation Loss")
plt.legend(); plt.tight_layout()
plt.show()

# 2) Confusion Matrix
cm = confusion_matrix(all_labels, y_pred, labels=list(range(num_labels)))
plt.figure()
im = plt.imshow(cm, interpolation='nearest')
plt.title("Confusion Matrix")
plt.colorbar(im)
tick_marks = np.arange(num_labels)
plt.xticks(tick_marks, class_names, rotation=45, ha='right')
plt.yticks(tick_marks, class_names)
plt.xlabel("Predicted"); plt.ylabel("True")
# Annotate counts
thresh = cm.max() / 2.0
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, format(cm[i, j], 'd'),
                 ha="center", va="center",
                 color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.show()

# 3) ROC Curve (binary or multiclass One-vs-Rest)
probs = torch.softmax(torch.tensor(all_logits), dim=1).numpy()

plt.figure()
if num_labels == 2:
    fpr, tpr, _ = roc_curve(all_labels, probs[:, 1])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.3f}")
else:
    # One-vs-Rest micro-average
    y_bin = label_binarize(all_labels, classes=list(range(num_labels)))
    fpr = dict(); tpr = dict(); roc_auc = dict()
    for i in range(num_labels):
        fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], probs[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
        plt.plot(fpr[i], tpr[i], label=f"Class {class_names[i]} (AUC={roc_auc[i]:.3f})")
    # Micro-average
    fpr["micro"], tpr["micro"], _ = roc_curve(y_bin.ravel(), probs.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    plt.plot(fpr["micro"], tpr["micro"], linestyle="--", label=f"Micro Avg (AUC={roc_auc['micro']:.3f})")

plt.plot([0,1],[0,1], linestyle="--")
plt.xlabel("False Positive Rate"); plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend(loc="lower right")
plt.tight_layout()
plt.show()


# **DistilBERT**

In [None]:
import random
import numpy as np
import pandas as pd 
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from transformers import get_linear_schedule_with_warmup
from torch.optim import AdamW
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
import torch.nn as nn

# -----------------------
# Reproducibility helpers
# -----------------------
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

# -----------------------
# Config
# -----------------------
CSV_PATH = r"C:\Users\sk304939\OneDrive - California State University, Northridge\Documents\Thesis\My_Project\Twitter Bot - Final code\Previous EDA code\bot_detection_data.csv"  # Path to the uploaded file
TEXT_COL = "Tweet"
LABEL_COL = "Bot Label"
MODEL_NAME = "distilbert-base-uncased"  # Using DistilBERT model
MAX_LEN = 128
TRAIN_BATCH_SIZE = 16
VAL_BATCH_SIZE = 32
EPOCHS = 3
LEARNING_RATE = 2e-5
WARMUP_RATIO = 0.1   # 10% warmup
WEIGHT_DECAY = 0.01

# -----------------------
# Device setup
# -----------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# -----------------------
# Load & Prep Data
# -----------------------
df = pd.read_csv(CSV_PATH)

df = df.head(5000)  # Use the first 5,000 samples

# Keep rows with actual text
df = df[df[TEXT_COL].notna()]
df = df[df[TEXT_COL].str.strip().str.lower() != "nil"]

# Ensure labels are numeric (if strings, encode them)
if not np.issubdtype(df[LABEL_COL].dtype, np.number):
    le = LabelEncoder()
    df[LABEL_COL] = le.fit_transform(df[LABEL_COL].astype(str))
    class_names = [str(c) for c in le.classes_]
else:
    # Make readable class names from unique numeric labels
    uniq = sorted(df[LABEL_COL].unique().tolist())
    class_names = [str(c) for c in uniq]

num_labels = len(set(df[LABEL_COL].tolist()))
print(f"Detected {num_labels} classes:", class_names)

# Train/Val/Test split (80/10/10)
X_train, X_temp, y_train, y_temp = train_test_split(
    df[TEXT_COL].tolist(), df[LABEL_COL].tolist(),
    test_size=0.2, random_state=42, stratify=df[LABEL_COL].tolist()
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print(f"Train: {len(X_train)} | Val: {len(X_val)} | Test: {len(X_test)}")

# -----------------------
# Dataset / Dataloader
# -----------------------
tokenizer = DistilBertTokenizer.from_pretrained(MODEL_NAME)

class TextDS(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tok = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = self.tok(
            str(self.texts[idx]),
            padding="max_length",
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt"
        )
        item = {k: v.squeeze(0) for k, v in enc.items()}
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

train_ds = TextDS(X_train, y_train, tokenizer, MAX_LEN)
val_ds   = TextDS(X_val,   y_val,   tokenizer, MAX_LEN)
test_ds  = TextDS(X_test,  y_test,  tokenizer, MAX_LEN)

train_loader = DataLoader(train_ds, batch_size=TRAIN_BATCH_SIZE, shuffle=True,  drop_last=False)
val_loader   = DataLoader(val_ds,   batch_size=VAL_BATCH_SIZE,   shuffle=False, drop_last=False)
test_loader  = DataLoader(test_ds,  batch_size=VAL_BATCH_SIZE,   shuffle=False, drop_last=False)

# -----------------------
# Model, Optimizer, Scheduler
# -----------------------
model = DistilBertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=num_labels)
model.to(device)
print(model)  # print model architecture

# Optimizer and Scheduler
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

total_steps = len(train_loader) * EPOCHS
warmup_steps = int(WARMUP_RATIO * total_steps)
scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=warmup_steps, num_training_steps=total_steps
)

# -----------------------
# Train / Eval Loop
# -----------------------
def run_epoch(dataloader, train: bool = True):
    model.train() if train else model.eval()
    total_loss = 0.0
    total_correct = 0
    total_count = 0

    for batch in dataloader:
        input_ids      = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels         = batch["labels"].to(device)

        with torch.set_grad_enabled(train):
            outputs = model(input_ids=input_ids,
                            attention_mask=attention_mask,
                            labels=labels)
            loss = outputs.loss
            logits = outputs.logits

            if train:
                optimizer.zero_grad()
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()
                scheduler.step()

        total_loss += loss.item() * labels.size(0)
        preds = torch.argmax(logits, dim=-1)
        total_correct += (preds == labels).sum().item()
        total_count += labels.size(0)

    avg_loss = total_loss / total_count
    avg_acc  = total_correct / total_count
    return avg_loss, avg_acc

train_losses, val_losses = [], []
train_accs, val_accs = [], []

for epoch in range(1, EPOCHS + 1):
    tr_loss, tr_acc = run_epoch(train_loader, train=True)
    va_loss, va_acc = run_epoch(val_loader,   train=False)

    train_losses.append(tr_loss); val_losses.append(va_loss)
    train_accs.append(tr_acc);    val_accs.append(va_acc)

    print(f"Epoch {epoch}/{EPOCHS} | "
          f"Train Loss: {tr_loss:.4f} Acc: {tr_acc:.4f} | "
          f"Val Loss: {va_loss:.4f} Acc: {va_acc:.4f}")

# -----------------------
# Test evaluation
# -----------------------
model.eval()
all_logits = []
all_labels = []

with torch.no_grad():
    for batch in test_loader:
        input_ids      = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels         = batch["labels"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        all_logits.append(logits.cpu().numpy())
        all_labels.append(labels.cpu().numpy())

all_logits = np.concatenate(all_logits, axis=0)
all_labels = np.concatenate(all_labels, axis=0)

y_pred = np.argmax(all_logits, axis=1)
test_acc = accuracy_score(all_labels, y_pred)
print(f"\nTEST Accuracy: {test_acc:.4f}")

# Classification report
# If labels were encoded, class_names holds strings; otherwise make them
if len(class_names) != num_labels:
    # fallback (shouldn't happen)
    class_names = [str(i) for i in range(num_labels)]
print("\nClassification Report:\n")
print(classification_report(all_labels, y_pred, target_names=class_names, digits=4))

# -----------------------
# Plots
# -----------------------

# 1) Training vs Validation Loss
plt.figure()
plt.plot(range(1, EPOCHS+1), train_losses, label="Train Loss")
plt.plot(range(1, EPOCHS+1), val_losses,   label="Val Loss")
plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.title("Training & Validation Loss")
plt.legend(); plt.tight_layout()
plt.show()

# 2) Confusion Matrix
cm = confusion_matrix(all_labels, y_pred, labels=list(range(num_labels)))
plt.figure()
im = plt.imshow(cm, interpolation='nearest')
plt.title("Confusion Matrix")
plt.colorbar(im)
tick_marks = np.arange(num_labels)
plt.xticks(tick_marks, class_names, rotation=45, ha='right')
plt.yticks(tick_marks, class_names)
plt.xlabel("Predicted"); plt.ylabel("True")
# Annotate counts
thresh = cm.max() / 2.0
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, format(cm[i, j], 'd'),
                 ha="center", va="center",
                 color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.show()

# 3) ROC Curve (binary or multiclass One-vs-Rest)
probs = torch.softmax(torch.tensor(all_logits), dim=1).numpy()

plt.figure()
if num_labels == 2:
    fpr, tpr, _ = roc_curve(all_labels, probs[:, 1])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.3f}")
else:
    # One-vs-Rest micro-average
    y_bin = label_binarize(all_labels, classes=list(range(num_labels)))
    fpr = dict(); tpr = dict(); roc_auc = dict()
    for i in range(num_labels):
        fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], probs[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
        plt.plot(fpr[i], tpr[i], label=f"Class {class_names[i]} (AUC={roc_auc[i]:.3f})")
    # Micro-average
    fpr["micro"], tpr["micro"], _ = roc_curve(y_bin.ravel(), probs.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    plt.plot(fpr["micro"], tpr["micro"], linestyle="--", label=f"Micro Avg (AUC={roc_auc['micro']:.3f})")

plt.plot([0,1],[0,1], linestyle="--")
plt.xlabel("False Positive Rate"); plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend(loc="lower right")
plt.tight_layout()
plt.show()


# **XLNet**

In [None]:
import random
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from transformers import XLNetTokenizer, XLNetForSequenceClassification
from transformers import  get_linear_schedule_with_warmup
from torch.optim import AdamW
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
import torch.nn as nn

# -----------------------
# Reproducibility helpers
# -----------------------
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

# -----------------------
# Config
# -----------------------
CSV_PATH = r"C:\Users\sk304939\OneDrive - California State University, Northridge\Documents\Thesis\My_Project\Twitter Bot - Final code\Previous EDA code\bot_detection_data.csv"  # Path to the uploaded file
TEXT_COL = "Tweet"
LABEL_COL = "Bot Label"
MODEL_NAME = "xlnet-base-cased"  # XLNet model
MAX_LEN = 128
TRAIN_BATCH_SIZE = 16
VAL_BATCH_SIZE = 32
EPOCHS = 3
LEARNING_RATE = 2e-5
WARMUP_RATIO = 0.1   # 10% warmup
WEIGHT_DECAY = 0.01

# -----------------------
# Device setup
# -----------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# -----------------------
# Load & Prep Data
# -----------------------
df = pd.read_csv(CSV_PATH)

df = df.head(5000)  # Use the first 5,000 samples

# Keep rows with actual text
df = df[df[TEXT_COL].notna()]
df = df[df[TEXT_COL].str.strip().str.lower() != "nil"]

# Ensure labels are numeric (if strings, encode them)
if not np.issubdtype(df[LABEL_COL].dtype, np.number):
    le = LabelEncoder()
    df[LABEL_COL] = le.fit_transform(df[LABEL_COL].astype(str))
    class_names = [str(c) for c in le.classes_]
else:
    # Make readable class names from unique numeric labels
    uniq = sorted(df[LABEL_COL].unique().tolist())
    class_names = [str(c) for c in uniq]

num_labels = len(set(df[LABEL_COL].tolist()))
print(f"Detected {num_labels} classes:", class_names)

# Train/Val/Test split (80/10/10)
X_train, X_temp, y_train, y_temp = train_test_split(
    df[TEXT_COL].tolist(), df[LABEL_COL].tolist(),
    test_size=0.2, random_state=42, stratify=df[LABEL_COL].tolist()
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print(f"Train: {len(X_train)} | Val: {len(X_val)} | Test: {len(X_test)}")

# -----------------------
# Dataset / Dataloader
# -----------------------
tokenizer = XLNetTokenizer.from_pretrained(MODEL_NAME)

class TextDS(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tok = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = self.tok(
            str(self.texts[idx]),
            padding="max_length",
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt"
        )
        item = {k: v.squeeze(0) for k, v in enc.items()}
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

train_ds = TextDS(X_train, y_train, tokenizer, MAX_LEN)
val_ds   = TextDS(X_val,   y_val,   tokenizer, MAX_LEN)
test_ds  = TextDS(X_test,  y_test,  tokenizer, MAX_LEN)

train_loader = DataLoader(train_ds, batch_size=TRAIN_BATCH_SIZE, shuffle=True,  drop_last=False)
val_loader   = DataLoader(val_ds,   batch_size=VAL_BATCH_SIZE,   shuffle=False, drop_last=False)
test_loader  = DataLoader(test_ds,  batch_size=VAL_BATCH_SIZE,   shuffle=False, drop_last=False)

# -----------------------
# Model, Optimizer, Scheduler
# -----------------------
model = XLNetForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=num_labels)
model.to(device)
print(model)  # print model architecture

# Optimizer and Scheduler
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

total_steps = len(train_loader) * EPOCHS
warmup_steps = int(WARMUP_RATIO * total_steps)
scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=warmup_steps, num_training_steps=total_steps
)

# -----------------------
# Train / Eval Loop
# -----------------------
def run_epoch(dataloader, train: bool = True):
    model.train() if train else model.eval()
    total_loss = 0.0
    total_correct = 0
    total_count = 0

    for batch in dataloader:
        input_ids      = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels         = batch["labels"].to(device)

        with torch.set_grad_enabled(train):
            outputs = model(input_ids=input_ids,
                            attention_mask=attention_mask,
                            labels=labels)
            loss = outputs.loss
            logits = outputs.logits

            if train:
                optimizer.zero_grad()
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()
                scheduler.step()

        total_loss += loss.item() * labels.size(0)
        preds = torch.argmax(logits, dim=-1)
        total_correct += (preds == labels).sum().item()
        total_count += labels.size(0)

    avg_loss = total_loss / total_count
    avg_acc  = total_correct / total_count
    return avg_loss, avg_acc

train_losses, val_losses = [], []
train_accs, val_accs = [], []

for epoch in range(1, EPOCHS + 1):
    tr_loss, tr_acc = run_epoch(train_loader, train=True)
    va_loss, va_acc = run_epoch(val_loader,   train=False)

    train_losses.append(tr_loss); val_losses.append(va_loss)
    train_accs.append(tr_acc);    val_accs.append(va_acc)

    print(f"Epoch {epoch}/{EPOCHS} | "
          f"Train Loss: {tr_loss:.4f} Acc: {tr_acc:.4f} | "
          f"Val Loss: {va_loss:.4f} Acc: {va_acc:.4f}")

# -----------------------
# Test evaluation
# -----------------------
model.eval()
all_logits = []
all_labels = []

with torch.no_grad():
    for batch in test_loader:
        input_ids      = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels         = batch["labels"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        all_logits.append(logits.cpu().numpy())
        all_labels.append(labels.cpu().numpy())

all_logits = np.concatenate(all_logits, axis=0)
all_labels = np.concatenate(all_labels, axis=0)

y_pred = np.argmax(all_logits, axis=1)
test_acc = accuracy_score(all_labels, y_pred)
print(f"\nTEST Accuracy: {test_acc:.4f}")

# Classification report
# If labels were encoded, class_names holds strings; otherwise make them
if len(class_names) != num_labels:
    # fallback (shouldn't happen)
    class_names = [str(i) for i in range(num_labels)]
print("\nClassification Report:\n")
print(classification_report(all_labels, y_pred, target_names=class_names, digits=4))

# -----------------------
# Plots
# -----------------------

# 1) Training vs Validation Loss
plt.figure()
plt.plot(range(1, EPOCHS+1), train_losses, label="Train Loss")
plt.plot(range(1, EPOCHS+1), val_losses,   label="Val Loss")
plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.title("Training & Validation Loss")
plt.legend(); plt.tight_layout()
plt.show()

# 2) Confusion Matrix
cm = confusion_matrix(all_labels, y_pred, labels=list(range(num_labels)))
plt.figure()
im = plt.imshow(cm, interpolation='nearest')
plt.title("Confusion Matrix")
plt.colorbar(im)
tick_marks = np.arange(num_labels)
plt.xticks(tick_marks, class_names, rotation=45, ha='right')
plt.yticks(tick_marks, class_names)
plt.xlabel("Predicted"); plt.ylabel("True")
# Annotate counts
thresh = cm.max() / 2.0
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, format(cm[i, j], 'd'),
                 ha="center", va="center",
                 color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.show()

# 3) ROC Curve (binary or multiclass One-vs-Rest)
probs = torch.softmax(torch.tensor(all_logits), dim=1).numpy()

plt.figure()
if num_labels == 2:
    fpr, tpr, _ = roc_curve(all_labels, probs[:, 1])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.3f}")
else:
    # One-vs-Rest micro-average
    y_bin = label_binarize(all_labels, classes=list(range(num_labels)))
    fpr = dict(); tpr = dict(); roc_auc = dict()
    for i in range(num_labels):
        fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], probs[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
        plt.plot(fpr[i], tpr[i], label=f"Class {class_names[i]} (AUC={roc_auc[i]:.3f})")
    # Micro-average
    fpr["micro"], tpr["micro"], _ = roc_curve(y_bin.ravel(), probs.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    plt.plot(fpr["micro"], tpr["micro"], linestyle="--", label=f"Micro Avg (AUC={roc_auc['micro']:.3f})")

plt.plot([0,1],[0,1], linestyle="--")
plt.xlabel("False Positive Rate"); plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend(loc="lower right")
plt.tight_layout()
plt.show()


# **T5-Small**

In [None]:
import random
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from transformers import T5Tokenizer, T5ForSequenceClassification
from transformers import  get_linear_schedule_with_warmup
from torch.optim import AdamW
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
import torch.nn as nn

# -----------------------
# Reproducibility helpers
# -----------------------
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

# -----------------------
# Config
# -----------------------
CSV_PATH = r"C:\Users\sk304939\OneDrive - California State University, Northridge\Documents\Thesis\My_Project\Twitter Bot - Final code\Previous EDA code\bot_detection_data.csv"  # Path to your uploaded file
TEXT_COL = "Tweet"
LABEL_COL = "Bot Label"
MODEL_NAME = "t5-small"  # T5-small model
MAX_LEN = 128
TRAIN_BATCH_SIZE = 16
VAL_BATCH_SIZE = 32
EPOCHS = 3
LEARNING_RATE = 2e-5
WARMUP_RATIO = 0.1   # 10% warmup
WEIGHT_DECAY = 0.01

# -----------------------
# Device setup
# -----------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# -----------------------
# Load & Prep Data
# -----------------------
df = pd.read_csv(CSV_PATH)

df = df.head(5000)  # Use the first 5,000 samples

# Keep rows with actual text
df = df[df[TEXT_COL].notna()]
df = df[df[TEXT_COL].str.strip().str.lower() != "nil"]

# Ensure labels are numeric (if strings, encode them)
if not np.issubdtype(df[LABEL_COL].dtype, np.number):
    le = LabelEncoder()
    df[LABEL_COL] = le.fit_transform(df[LABEL_COL].astype(str))
    class_names = [str(c) for c in le.classes_]
else:
    # Make readable class names from unique numeric labels
    uniq = sorted(df[LABEL_COL].unique().tolist())
    class_names = [str(c) for c in uniq]

num_labels = len(set(df[LABEL_COL].tolist()))
print(f"Detected {num_labels} classes:", class_names)

# Train/Val/Test split (80/10/10)
X_train, X_temp, y_train, y_temp = train_test_split(
    df[TEXT_COL].tolist(), df[LABEL_COL].tolist(),
    test_size=0.2, random_state=42, stratify=df[LABEL_COL].tolist()
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print(f"Train: {len(X_train)} | Val: {len(X_val)} | Test: {len(X_test)}")

# -----------------------
# Dataset / Dataloader
# -----------------------
tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)

class TextDS(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tok = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        # Format the input as text-to-text format, for example: "classify: [Tweet]"
        text = "classify: " + str(self.texts[idx])  # Prepend task name to the tweet
        enc = self.tok(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt"
        )
        item = {k: v.squeeze(0) for k, v in enc.items()}

        # Convert labels to string format for T5 (bot=1, non-bot=0)
        # Here, labels are directly passed as integers (1 or 0)
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

train_ds = TextDS(X_train, y_train, tokenizer, MAX_LEN)
val_ds   = TextDS(X_val,   y_val,   tokenizer, MAX_LEN)
test_ds  = TextDS(X_test,  y_test,  tokenizer, MAX_LEN)

train_loader = DataLoader(train_ds, batch_size=TRAIN_BATCH_SIZE, shuffle=True,  drop_last=False)
val_loader   = DataLoader(val_ds,   batch_size=VAL_BATCH_SIZE,   shuffle=False, drop_last=False)
test_loader  = DataLoader(test_ds,  batch_size=VAL_BATCH_SIZE,   shuffle=False, drop_last=False)

# -----------------------
# Model, Optimizer, Scheduler
# -----------------------
model = T5ForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=num_labels)
model.to(device)
print(model)  # print model architecture

# Optimizer and Scheduler
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

total_steps = len(train_loader) * EPOCHS
warmup_steps = int(WARMUP_RATIO * total_steps)
scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=warmup_steps, num_training_steps=total_steps
)

# -----------------------
# Train / Eval Loop
# -----------------------
def run_epoch(dataloader, train: bool = True):
    model.train() if train else model.eval()
    total_loss = 0.0
    total_correct = 0
    total_count = 0

    for batch in dataloader:
        input_ids      = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels         = batch["labels"].to(device)

        with torch.set_grad_enabled(train):
            outputs = model(input_ids=input_ids,
                            attention_mask=attention_mask,
                            labels=labels)
            loss = outputs.loss
            logits = outputs.logits

            if train:
                optimizer.zero_grad()
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()
                scheduler.step()

        total_loss += loss.item() * labels.size(0)
        preds = torch.argmax(logits, dim=-1)
        total_correct += (preds == labels).sum().item()
        total_count += labels.size(0)

    avg_loss = total_loss / total_count
    avg_acc  = total_correct / total_count
    return avg_loss, avg_acc

train_losses, val_losses = [], []
train_accs, val_accs = [], []

for epoch in range(1, EPOCHS + 1):
    tr_loss, tr_acc = run_epoch(train_loader, train=True)
    va_loss, va_acc = run_epoch(val_loader,   train=False)

    train_losses.append(tr_loss); val_losses.append(va_loss)
    train_accs.append(tr_acc);    val_accs.append(va_acc)

    print(f"Epoch {epoch}/{EPOCHS} | "
          f"Train Loss: {tr_loss:.4f} Acc: {tr_acc:.4f} | "
          f"Val Loss: {va_loss:.4f} Acc: {va_acc:.4f}")

# -----------------------
# Test evaluation
# -----------------------
model.eval()
all_logits = []
all_labels = []

with torch.no_grad():
    for batch in test_loader:
        input_ids      = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels         = batch["labels"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        all_logits.append(logits.cpu().numpy())
        all_labels.append(labels.cpu().numpy())

all_logits = np.concatenate(all_logits, axis=0)
all_labels = np.concatenate(all_labels, axis=0)

y_pred = np.argmax(all_logits, axis=1)
test_acc = accuracy_score(all_labels, y_pred)
print(f"\nTEST Accuracy: {test_acc:.4f}")

# Classification report
# If labels were encoded, class_names holds strings; otherwise make them
if len(class_names) != num_labels:
    # fallback (shouldn't happen)
    class_names = [str(i) for i in range(num_labels)]
print("\nClassification Report:\n")
print(classification_report(all_labels, y_pred, target_names=class_names, digits=4))

# -----------------------
# Plots
# -----------------------

# 1) Training vs Validation Loss
plt.figure()
plt.plot(range(1, EPOCHS+1), train_losses, label="Train Loss")
plt.plot(range(1, EPOCHS+1), val_losses,   label="Val Loss")
plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.title("Training & Validation Loss")
plt.legend(); plt.tight_layout()
plt.show()

# 2) Confusion Matrix
cm = confusion_matrix(all_labels, y_pred, labels=list(range(num_labels)))
plt.figure()
im = plt.imshow(cm, interpolation='nearest')
plt.title("Confusion Matrix")
plt.colorbar(im)
tick_marks = np.arange(num_labels)
plt.xticks(tick_marks, class_names, rotation=45, ha='right')
plt.yticks(tick_marks, class_names)
plt.xlabel("Predicted"); plt.ylabel("True")
# Annotate counts
thresh = cm.max() / 2.0
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, format(cm[i, j], 'd'),
                 ha="center", va="center",
                 color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.show()

# 3) ROC Curve (binary or multiclass One-vs-Rest)
probs = torch.softmax(torch.tensor(all_logits), dim=1).numpy()

plt.figure()
if num_labels == 2:
    fpr, tpr, _ = roc_curve(all_labels, probs[:, 1])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.3f}")
else:
    # One-vs-Rest micro-average
    y_bin = label_binarize(all_labels, classes=list(range(num_labels)))
    fpr = dict(); tpr = dict(); roc_auc = dict()
    for i in range(num_labels):
        fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], probs[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
        plt.plot(fpr[i], tpr[i], label=f"Class {class_names[i]} (AUC={roc_auc[i]:.3f})")
    # Micro-average
    fpr["micro"], tpr["micro"], _ = roc_curve(y_bin.ravel(), probs.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    plt.plot(fpr["micro"], tpr["micro"], linestyle="--", label=f"Micro Avg (AUC={roc_auc['micro']:.3f})")

plt.plot([0,1],[0,1], linestyle="--")
plt.xlabel("False Positive Rate"); plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend(loc="lower right")
plt.tight_layout()
plt.show()
