In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
from torch.optim import AdamW
from tqdm import tqdm

# Load and filter dataset
df = pd.read_csv("train.csv")
df['toxic'] = (df['target'] >= 0.5).astype(int)

# Define subtype labels
subtype_labels = ['severe_toxicity', 'obscene', 'identity_attack', 'insult', 'threat', 'sexual_explicit']
df[subtype_labels] = (df[subtype_labels] >= 0.5).astype(int)

# Tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
# Dataset class
class ToxicityDataset(Dataset):
    def __init__(self, df):
        self.texts = df['comment_text'].tolist()
        self.targets = df['toxic'].tolist()
        self.labels = df[subtype_labels].values

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = tokenizer(self.texts[idx], padding='max_length', truncation=True, max_length=128, return_tensors='pt')
        return {
            'input_ids': enc['input_ids'].squeeze(),
            'attention_mask': enc['attention_mask'].squeeze(),
            'toxicity': torch.tensor(self.targets[idx], dtype=torch.float),
            'labels': torch.tensor(self.labels[idx], dtype=torch.float)
        }

# Model
class ToxicityClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.bert = BertModel.from_pretrained("bert-base-uncased")
        self.tox_head = nn.Linear(768, 1)              # Binary toxicity score
        self.subtype_head = nn.Linear(768, len(subtype_labels))  # Multi-label subtypes

    def forward(self, input_ids, attention_mask):
        out = self.bert(input_ids=input_ids, attention_mask=attention_mask).pooler_output
        toxicity_score = torch.sigmoid(self.tox_head(out)).squeeze(1)
        subtype_pred = torch.sigmoid(self.subtype_head(out))
        return toxicity_score, subtype_pred

# Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ToxicityClassifier().to(device)
optimizer = AdamW(model.parameters(), lr=2e-5)
bce_loss = nn.BCELoss()

train_loader = DataLoader(ToxicityDataset(df), batch_size=32, shuffle=True)

# Training loop
for epoch in range(2):
    model.train()
    total_loss, correct, total = 0, 0, 0

    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}")
    for step, batch in enumerate(loop):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        toxicity = batch['toxicity'].to(device)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()
        tox_pred, subtype_pred = model(input_ids, attention_mask)

        loss_tox = bce_loss(tox_pred, toxicity)
        loss_subtype = bce_loss(subtype_pred, labels)
        loss = loss_tox + loss_subtype
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        preds = (tox_pred >= 0.5).float()
        correct += (preds == toxicity).sum().item()
        total += toxicity.size(0)

        loop.set_postfix({
            "i loss train": f"{loss.item():.3f}",
            "acc": f"{100*correct/total:.2f}%",
            "step": step
        })

    print(f"✅ Epoch {epoch+1} | Train Accuracy: {100*correct/total:.2f}% | Avg Loss: {total_loss/len(train_loader):.4f}")
    torch.save(model.state_dict(), f"toxicity.pth")

Epoch 1: 100%|██████████| 15119/15119 [48:11<00:00,  5.23it/s, i loss train=0.454, acc=84.52%, step=15118]


✅ Epoch 1 | Train Accuracy: 84.52% | Avg Loss: 0.4290


Epoch 2: 100%|██████████| 15119/15119 [48:11<00:00,  5.23it/s, i loss train=0.789, acc=86.72%, step=15118]


✅ Epoch 2 | Train Accuracy: 86.72% | Avg Loss: 0.3653


In [6]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer
import torch.nn as nn
from sklearn.metrics import classification_report, roc_auc_score
from tqdm import tqdm
import numpy as np

# === Config ===
VAL_PATH = "val.csv"
BATCH_SIZE = 32
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
THRESHOLD = 0.5

# === Subtype + Identity Columns ===
subtype_labels = ['severe_toxicity', 'obscene', 'identity_attack', 'insult', 'threat', 'sexual_explicit']
identity_labels = ['female', 'male', 'muslim', 'black', 'white', 'christian', 'jewish', 'hindu', 'buddhist', 'atheist', 'transgender', 'latino']

# === Load val data ===
df_val = pd.read_csv(VAL_PATH)
print(f"Loaded {len(df_val)} validation samples")

# === Tokenizer ===
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# === Dataset ===
class ValDataset(Dataset):
    def __init__(self, df):
        self.texts = df['comment_text'].tolist()

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = tokenizer(self.texts[idx], padding='max_length', truncation=True, max_length=128, return_tensors='pt')
        return {
            'input_ids': enc['input_ids'].squeeze(),
            'attention_mask': enc['attention_mask'].squeeze()
        }

val_loader = DataLoader(ValDataset(df_val), batch_size=BATCH_SIZE)

# === Model ===
class ToxicityClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        from transformers import BertModel
        self.bert = BertModel.from_pretrained("bert-base-uncased")
        self.tox_head = nn.Linear(768, 1)
        self.subtype_head = nn.Linear(768, len(subtype_labels))

    def forward(self, input_ids, attention_mask):
        out = self.bert(input_ids=input_ids, attention_mask=attention_mask).pooler_output
        tox_score = torch.sigmoid(self.tox_head(out)).squeeze(1)
        subtype_preds = torch.sigmoid(self.subtype_head(out))
        return tox_score, subtype_preds

model = ToxicityClassifier().to(DEVICE)
model.load_state_dict(torch.load("toxicity.pth", map_location=DEVICE))
model.eval()

# === Inference ===
tox_preds, subtype_preds = [], []

with torch.no_grad():
    for batch in tqdm(val_loader, desc="Running Evaluation"):
        input_ids = batch['input_ids'].to(DEVICE)
        attention_mask = batch['attention_mask'].to(DEVICE)

        tox_out, subtype_out = model(input_ids, attention_mask)
        tox_preds.extend(tox_out.cpu().numpy())
        subtype_preds.extend(subtype_out.cpu().numpy())

df_val["toxicity_pred"] = tox_preds
df_val["toxicity_label_pred"] = df_val["toxicity_pred"].apply(lambda x: 1 if x >= THRESHOLD else 0)
df_val["toxicity_label_true"] = df_val["target"].apply(lambda x: 1 if x >= THRESHOLD else 0)

# === Evaluate main classifier ===
print("\n=== Toxicity Binary Classification Report ===")
print(classification_report(df_val["toxicity_label_true"], df_val["toxicity_label_pred"], target_names=["Appropriate", "Inappropriate"]))
print("Toxicity ROC AUC:", roc_auc_score(df_val["toxicity_label_true"], df_val["toxicity_pred"]))

# === Evaluate subtypes ===
print("\n=== Toxic Subtype Classification Report ===")
subtype_results = []
for i, label in enumerate(subtype_labels):
    df_val[f"{label}_pred"] = [row[i] for row in subtype_preds]
    df_val[f"{label}_label_pred"] = df_val[f"{label}_pred"].apply(lambda x: 1 if x >= THRESHOLD else 0)
    df_val[f"{label}_label_true"] = df_val[label].apply(lambda x: 1 if x >= THRESHOLD else 0)

    print(f"\n-- {label.upper()} --")
    print(classification_report(df_val[f"{label}_label_true"], df_val[f"{label}_label_pred"]))

# === Optional: Identity Sliced Subtype Evaluation ===
print("\n=== Identity-Sliced Subtype Evaluation (e.g., female, muslim, etc.) ===")
for identity in identity_labels:
    subset = df_val[df_val[identity] >= 0.5]
    if len(subset) < 20:
        continue
    print(f"\n--- {identity.upper()} ({len(subset)} samples) ---")
    for label in subtype_labels:
        y_true = subset[f"{label}_label_true"]
        y_pred = subset[f"{label}_label_pred"]
        acc = (y_true == y_pred).mean()
        print(f"{label}: Accuracy = {acc:.2f}")



Loaded 56326 validation samples


Running Evaluation: 100%|██████████| 1761/1761 [02:28<00:00, 11.83it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



=== Toxicity Binary Classification Report ===
               precision    recall  f1-score   support

  Appropriate       0.88      0.92      0.90     40718
Inappropriate       0.77      0.66      0.71     15608

     accuracy                           0.85     56326
    macro avg       0.82      0.79      0.80     56326
 weighted avg       0.85      0.85      0.85     56326

Toxicity ROC AUC: 0.9000073221174536

=== Toxic Subtype Classification Report ===

-- SEVERE_TOXICITY --
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56325
           1       0.00      0.00      0.00         1

    accuracy                           1.00     56326
   macro avg       0.50      0.50      0.50     56326
weighted avg       1.00      1.00      1.00     56326


-- OBSCENE --
              precision    recall  f1-score   support

           0       0.99      0.99      0.99     55377
           1       0.64      0.59      0.62       949

    accur