In [1]:
# 1. Uninstall existing versions to clear conflicts
!pip uninstall -y protobuf tensorboard

# 2. Install a stable, compatible version of protobuf
!pip install -q protobuf==3.20.3

# 3. Reinstall tensorboard
!pip install -q tensorboard

Found existing installation: protobuf 6.33.0
Uninstalling protobuf-6.33.0:
  Successfully uninstalled protobuf-6.33.0
Found existing installation: tensorboard 2.18.0
Uninstalling tensorboard-2.18.0:
  Successfully uninstalled tensorboard-2.18.0
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.1/162.1 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
bigframes 2.12.0 requires google-cloud-bigquery-storage<3.0.0,>=2.30.0, which is not installed.
tensorflow 2.18.0 requires tensorboard<2.19,>=2.18, which is not installed.
opentelemetry-proto 1.37.0 requires protobuf<7.0,>=5.0, but you have protobuf 3.20.3 which is incompatible.
onnx 1.18.0 requires protobuf>=4.25.1, but you have protobuf 3.20.3 which is incompatible.
a2a-sdk 0.3.10 requires protobuf>=5.29.5, but you have proto

In [2]:
import pandas as pd
import numpy as np
import nltk

import re
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter  # <--- NEW IMPORT
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from transformers import BertModel, BertTokenizer, BertForSequenceClassification, get_linear_schedule_with_warmup
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, accuracy_score, f1_score, recall_score
from nltk.corpus import wordnet

import matplotlib.pyplot as plt
try:
    nltk.download('stopwords')
    from nltk.corpus import stopwords
    stop_words = set(stopwords.words('english'))
except:
    from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
    stop_words = set(ENGLISH_STOP_WORDS)

2025-12-27 06:04:46.195774: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1766815486.387837      47 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1766815486.441731      47 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Hyperparameters
MAX_LEN = 256      # Max length of tweets (BERT max is 512, but tweets are short)
BATCH_SIZE = 16    # 16 or 32 is standard for BERT
EPOCHS = 2         # BERT fine-tunes quickly (2-4 epochs is usually enough)
RETRAIN_EPOCHS = 4
LEARNING_RATE = 1e-5
RETRAIN_LEARNING_RATE = 5e-6
USE_BALANCED = False
lambdaa = 1.2
NUM_AUGMENTATIONS_PER_TEXT = 4
MIN_REPLACEMENT = 4
# --- 3. Data Loading & Minimal Cleaning ---
writer = SummaryWriter(f'runs/BERT_{EPOCHS}_epochs_{LEARNING_RATE}_lr_BALANCED' if USE_BALANCED else f'BERT_NEGATIVE_MINING_{EPOCHS}_epochs_{RETRAIN_EPOCHS}_retrain_epochs_{LEARNING_RATE}_lr')

def clean_text_bert(text):
    # Minimal cleaning for BERT. It needs context, so we keep stopwords.
    text = str(text).lower()
    text = re.sub(r'rt\s', '', text)               # Remove RT
    text = re.sub(r'@\w+', '', text)               # Remove mentions
    text = re.sub(r'https?://\S+|www\.\S+', '', text) # Remove URLs
    text = re.sub(r'&#[0-9]+;', '', text)          # Remove HTML
    # We KEEP punctuation because BERT uses it for context/structure
    return text.strip()

# Load Data
df = pd.read_csv('/kaggle/input/sentiment-analysis-twitter-hate-speech/train.csv')
df_test = pd.read_csv('/kaggle/input/sentiment-analysis-twitter-hate-speech/test.csv')
df['clean_text'] = df['tweet'].apply(clean_text_bert)
df_test['clean_text'] = df_test['tweet'].apply(clean_text_bert)
# Split Data
X_train, X_val, y_train, y_val = train_test_split(
    df['clean_text'], df['class'], test_size=0.2, random_state=42
)

df_balanced_data = pd.read_csv('/kaggle/input/sentiment-analysis-twitter-hate-speech/balanced_data.csv')
X_train_balanced, X_val_balanced, y_train_balanced, y_val_balanced = train_test_split(
    df_balanced_data['clean_text'], df_balanced_data['class'], test_size=0.2, random_state=42
)

Using device: cuda


In [4]:
# --- 1. Initialize Tokenizer ---z
# 
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# --- 2. Custom Dataset Class ---
class TwoHeadDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        # Reset index to avoid errors if dataframe was shuffled/split
        self.texts = texts.reset_index(drop=True)
        self.labels = labels.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        text = str(self.texts[item])
        label = self.labels[item]

        # Encoding: This handles Tokenization, Padding, and Truncation
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,    # Add [CLS] and [SEP]
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',       # Pad to max_len
            truncation=True,            # Truncate if too long
            return_attention_mask=True,
            return_tensors='pt',        # Return PyTorch tensors
        )

        return {
            'text': text,
            # Flatten because DataLoader adds the batch dimension later
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            # We pass the raw label (0, 1, or 2). 
            # The train_fn logic will handle splitting this into binary targets.
            'labels': torch.tensor(label, dtype=torch.long)
        }

# --- 3. Create DataLoaders ---

# Create Dataset objects
if not USE_BALANCED:
    train_dataset = TwoHeadDataset(
        texts=X_train, 
        labels=y_train, 
        tokenizer=tokenizer, 
        max_len=MAX_LEN
    )
    
    val_dataset = TwoHeadDataset(
        texts=X_val, 
        labels=y_val, 
        tokenizer=tokenizer, 
        max_len=MAX_LEN
    )
else:
    train_dataset = TwoHeadDataset(
        texts=X_train_balanced, 
        labels=y_train_balanced, 
        tokenizer=tokenizer, 
        max_len=MAX_LEN
    )
    
    val_dataset = TwoHeadDataset(
        texts=X_val_balanced, 
        labels=y_val_balanced, 
        tokenizer=tokenizer, 
        max_len=MAX_LEN
    )
test_dataset = TwoHeadDataset(
    texts=df_test['clean_text'],
    labels=df_test['class'],
    tokenizer=tokenizer,
    max_len=MAX_LEN
)
# Create DataLoaders
# shuffle=True for training to break correlations
train_loader = DataLoader(
    train_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=True,
    num_workers=2 # Optional: speeds up data loading
)

# shuffle=False for validation so results are reproducible
val_loader = DataLoader(
    val_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=False,
    num_workers=2 
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=2
)

print(f"Data Loaded: {len(train_dataset)} training samples, {len(val_dataset)} validation samples.")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Data Loaded: 15860 training samples, 3966 validation samples.


In [5]:
class BertTwoHeadHier(nn.Module):
    """
    Hierarchical 3-class via two binary heads:
      A: toxic?   (0=Neither, 1=Toxic)
      B: hate?    (0=Offensive, 1=Hate)  computed/trained only on toxic samples
    """
    def __init__(self, bert_name="bert-base-uncased", dropout=0.1):
        super().__init__()
        self.bert = BertModel.from_pretrained(bert_name)
        hidden = self.bert.config.hidden_size
        self.drop = nn.Dropout(dropout)
        self.head_toxic = nn.Linear(hidden, 1)  # logitA
        self.head_hate  = nn.Linear(hidden, 1)  # logitB

    def forward(self, input_ids, attention_mask=None, token_type_ids=None):
        out = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            return_dict=True
        )
        cls = out.last_hidden_state[:, 0, :]  # [B, H] CLS token
        cls = self.drop(cls)
        logitA = self.head_toxic(cls).squeeze(-1)  # [B]
        logitB = self.head_hate(cls).squeeze(-1)   # [B]
        return logitA, logitB

class BertCNN(nn.Module):
    def __init__(self, n_classes, dropout=0.3):
        super(BertCNN, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        
        # CNN Hyperparameters
        embedding_dim = self.bert.config.hidden_size # 768 for bert-base
        n_filters = 100
        filter_sizes = [2, 3, 4] # Look at 2-grams, 3-grams, 4-grams
        
        # Convolutional Layers
        # We create a ModuleList of Conv1d layers for different window sizes
        self.convs = nn.ModuleList([
            nn.Conv1d(in_channels=embedding_dim, out_channels=n_filters, kernel_size=fs)
            for fs in filter_sizes
        ])
        
        # Fully Connected Layer
        self.fc = nn.Linear(len(filter_sizes) * n_filters, n_classes)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input_ids, attention_mask):
        # BERT Output
        # We need the 'last_hidden_state' which has shape [batch_size, seq_len, hidden_dim]
        output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        last_hidden_state = output.last_hidden_state
        
        # Permute for CNN: [batch, hidden_dim, seq_len]
        # Conv1d expects channels (hidden_dim) as the second dimension
        embedded = last_hidden_state.permute(0, 2, 1)
        
        # Apply CNN & Max Pooling
        # For each filter size: Conv1d -> ReLU -> MaxPool1d
        conved = [F.relu(conv(embedded)) for conv in self.convs]
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        
        # Concatenate pooled features
        # Shape: [batch, n_filters * len(filter_sizes)]
        cat = self.dropout(torch.cat(pooled, dim=1))
        
        # Final Classification
        return self.fc(cat)

In [6]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2, reduction='mean'):
        """
        Args:
            alpha (float, list, or torch.Tensor): 
                - If float: Applies the same weight to all classes.
                - If list/Tensor: Weights for each class (e.g., [1.0, 0.5, 0.1]).
                  Must match the number of classes.
            gamma (float): Focusing parameter (default 2).
            reduction (str): 'mean', 'sum', or 'none'.
        """
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.reduction = reduction
        
        # Handle alpha
        if isinstance(alpha, (list, tuple, np.ndarray)):
            self.alpha = torch.tensor(alpha).float()
        else:
            self.alpha = alpha

    def forward(self, inputs, targets):
        # Move alpha to the correct device (GPU/CPU) automatically
        if self.alpha is not None and isinstance(self.alpha, torch.Tensor):
            self.alpha = self.alpha.to(inputs.device)

        # 1. Calculate Standard Cross Entropy (raw log_softmax)
        # We assume inputs are raw logits (not probabilities)
        ce_loss = F.cross_entropy(inputs, targets, reduction='none')
        
        # 2. Calculate probabilities (pt)
        pt = torch.exp(-ce_loss)
        
        # 3. Calculate Alpha Factor
        if self.alpha is not None:
            if isinstance(self.alpha, torch.Tensor):
                # Select the specific weight for the target class of each sample
                alpha_t = self.alpha[targets]
            else:
                # Scalar alpha
                alpha_t = self.alpha
        else:
            alpha_t = 1.0
            
        # 4. Focal Loss Formula
        # Loss = -alpha * (1 - pt)^gamma * log(pt)
        # Note: ce_loss is already -log(pt)
        focal_loss = alpha_t * (1 - pt) ** self.gamma * ce_loss

        # 5. Reduction
        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        else:
            return focal_loss

In [7]:
def train_fn(data_loader, model, optimizer, device, scheduler=None, start_steps=0, lambdaa = 1.2, retrain = False):
    model.train()
    total_loss = 0
    # We use BCEWithLogitsLoss because our heads output raw logits (no sigmoid applied yet)
    criterion = nn.BCEWithLogitsLoss()
    
    for idx, batch in enumerate(tqdm(data_loader, desc="Training")):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        
        # Labels are 0 (Hate), 1 (Offensive), 2 (Neither)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()

        # 1. Forward Pass
        logitA, logitB = model(input_ids, attention_mask)

        # 2. Create Binary Targets on the Fly
        
        # Target A: 1 if Toxic (Class 0 or 1), 0 if Neither (Class 2)
        target_A = (labels <= 1).float()
        
        # Target B: 1 if Hate (Class 0), 0 if Offensive (Class 1)
        target_B = (labels == 0).float()

        # 3. Calculate Loss A (Toxic Detection)
        # This is calculated for EVERY sample in the batch
        loss_A = criterion(logitA, target_A)

        # 4. Calculate Loss B (Hate Detection)
        # This is calculated ONLY for samples that are actually Toxic (Label 0 or 1)
        
        # Create a mask: True where label is 0 or 1
        toxic_mask = (labels <= 1)
        
        if toxic_mask.sum() > 0:
            # Select only the logits and targets corresponding to toxic samples
            loss_B = criterion(logitB[toxic_mask], target_B[toxic_mask])
        else:
            # If batch has no toxic samples, Loss B is 0
            loss_B = torch.tensor(0.0, device=device)

        # 5. Total Loss
        # You can weigh these terms if needed (e.g., loss = loss_A + 2.0 * loss_B)
        loss = loss_A + lambdaa * loss_B
        
        loss.backward()
        
        # Clip gradients to prevent explosion
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        if scheduler:
            scheduler.step()
            
        total_loss += loss.item()
        current_step = start_steps + idx
        if retrain: 
            writer.add_scalar('Loss/Train_New', loss.item(), current_step)
        else:
            writer.add_scalar('Loss/Train', loss.item(), current_step)
    return total_loss / len(data_loader)

# --- 3. Custom Evaluation Function (Hierarchical Inference) ---
def evaluate_fn(data_loader, model, device, threshold = 0.5):
    model.eval()
    
    final_targets = []
    final_predictions = []
    
    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            
            # Get Logits
            logitA, logitB = model(input_ids, attention_mask)
            
            # Convert to Probabilities
            probA = torch.sigmoid(logitA) # Prob of being Toxic
            probB = torch.sigmoid(logitB) # Prob of being Hate (conditional)
            
            # Hierarchical Decision Logic
            batch_preds = []
            for pA, pB in zip(probA, probB):
                # Step 1: Is it Toxic?
                if pA < 0.5:
                    # Not Toxic -> Predict Class 2 (Neither)
                    batch_preds.append(2)
                else:
                    # Is Toxic -> Step 2: Is it Hate?
                    if pB > 0.5:
                        # Hate -> Predict Class 0
                        batch_preds.append(0)
                    else:
                        # Not Hate (but Toxic) -> Predict Class 1 (Offensive)
                        batch_preds.append(1)
            
            final_targets.extend(labels.cpu().numpy())
            final_predictions.extend(batch_preds)
            
    # Metrics
    f1 = f1_score(final_targets, final_predictions, average='macro')
    print(f"\nValidation F1: {f1:.4f}")
    
    target_names = ['Hate Speech (0)', 'Offensive (1)', 'Neither (2)']
    print(classification_report(final_targets, final_predictions, target_names=target_names))
    
    return f1

def train_fn_bert_cnn(data_loader, criterion, model, optimizer, device, scheduler=None, epoch_index = 0):
    model.train()
    total_loss = 0
    losses = []
    for idx, batch in enumerate(tqdm(data_loader, desc="Training")):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        
        # Labels are 0 (Hate), 1 (Offensive), 2 (Neither)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()

        # 1. Forward Pass
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        _, preds = torch.max(outputs, dim=1)
        
        # Focal Loss
        loss = criterion(outputs, labels)

        losses.append(loss.item())        
        loss.backward()
        
        # Clip gradients to prevent explosion
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        if scheduler:
            scheduler.step()
            
        total_loss += loss.item()
        current_step = epoch_index * len(data_loader) + idx
        writer.add_scalar('Loss/Train_BERT_CNN', loss.item(), current_step)
    return total_loss / len(data_loader)

def evaluate_fn_bert_cnn(data_loader, criterion, model, device, epoch_index = 0, is_testing = False):
    model.eval()
    
    final_targets = []
    final_predictions = []
    val_losses = []
    
    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            
            # 1. Forward Pass
            # The BertCNN model returns raw logits of shape [Batch, 3]
            logits = model(input_ids, attention_mask)
            
            # 2. Calculate Loss (Validation Loss)
            # We use the same Focal Loss criterion passed from the main loop
            loss = criterion(logits, labels)
            val_losses.append(loss.item())
            
            # 3. Get Predictions
            # Since this is a standard Multi-class problem (0, 1, 2) for the CNN,
            # we just take the argmax. 
            # (Note: The hierarchical logic was for the Two-Head model. 
            # For this BertCNN 3-class model, we use standard argmax).
            _, preds = torch.max(logits, dim=1)
            
            final_targets.extend(labels.cpu().numpy())
            final_predictions.extend(preds.cpu().numpy())
            
    # 4. Calculate Metrics
    avg_val_loss = np.mean(val_losses)
    val_f1 = f1_score(final_targets, final_predictions, average='macro')
    if not is_testing: 
        writer.add_scalar('Loss/Validation_BERT_CNN', avg_val_loss, epoch_index)
        writer.add_scalar('F1/Validation_BERT_CNN', val_f1, epoch_index)
        print(f"\nValidation Loss: {avg_val_loss:.4f} | Validation F1: {val_f1:.4f}")
    
    target_names = ['Hate Speech (0)', 'Offensive (1)', 'Neither (2)']
    print(classification_report(final_targets, final_predictions, target_names=target_names))
    
    return avg_val_loss, val_f1

In [8]:
model = BertTwoHeadHier("bert-base-uncased").to(device)
    
# Initialize Optimizer
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)

# Assume train_loader and val_loader are already created from previous steps
total_steps = len(train_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(optimizer, 0, total_steps)
best_val_loss = float('inf')
save_path = f"BERT_{EPOCHS}_epochs_{LEARNING_RATE}_lr_BALANCED.pth" if USE_BALANCED else f"BERT_{EPOCHS}_epochs_{LEARNING_RATE}_lr.pth"
current_steps = 0
# Training Loop
for epoch in range(EPOCHS):
    print(f"Epoch {epoch + 1}/{EPOCHS}")
    train_loss = train_fn(train_loader, model, optimizer, device, scheduler, current_steps, lambdaa)
    current_steps += len(train_loader)
    model.eval()
    val_loss = 0
    val_criterion = nn.BCEWithLogitsLoss()
    
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            
            logitA, logitB = model(input_ids, attention_mask)
            
            target_A = (labels <= 1).float()
            target_B = (labels == 0).float()
            
            loss_A = val_criterion(logitA, target_A)
            toxic_mask = (labels <= 1)
            if toxic_mask.sum() > 0:
                loss_B = val_criterion(logitB[toxic_mask], target_B[toxic_mask])
            else:
                loss_B = torch.tensor(0.0, device=device)
            
            val_loss += (loss_A + lambdaa * loss_B).item()
            
    avg_val_loss = val_loss / len(val_loader)
    
    # Get Accuracy from your existing evaluate function
    val_f1 = evaluate_fn(val_loader, model, device) # Assuming this returns accuracy
    
    print(f"Train Loss: {train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Val F1: {val_f1:.4f}")
    
    # --- 3. Logging to TensorBoard ---
    writer.add_scalar('Loss/Validation', avg_val_loss, epoch)
    writer.add_scalar('F1/Validation', val_f1, epoch)
    
    # --- 4. Save Model if Val Loss Improved ---
    if avg_val_loss < best_val_loss:
        print(f"Validation loss decreased ({best_val_loss:.4f} --> {avg_val_loss:.4f}). Saving model...")
        torch.save(model.state_dict(), save_path)
        print(f"Saved at {save_path}")
        best_val_loss = avg_val_loss

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Epoch 1/2


Training: 100%|██████████| 992/992 [07:07<00:00,  2.32it/s]
Evaluating: 100%|██████████| 248/248 [00:27<00:00,  8.89it/s]



Validation F1: 0.7675
                 precision    recall  f1-score   support

Hate Speech (0)       0.54      0.42      0.47       220
  Offensive (1)       0.94      0.96      0.95      3052
    Neither (2)       0.88      0.88      0.88       694

       accuracy                           0.91      3966
      macro avg       0.79      0.75      0.77      3966
   weighted avg       0.91      0.91      0.91      3966

Train Loss: 0.3897 | Val Loss: 0.3068 | Val F1: 0.7675
Validation loss decreased (inf --> 0.3068). Saving model...
Saved at BERT_2_epochs_1e-05_lr.pth
Epoch 2/2


Training: 100%|██████████| 992/992 [07:07<00:00,  2.32it/s]
Evaluating: 100%|██████████| 248/248 [00:27<00:00,  8.87it/s]


Validation F1: 0.7235
                 precision    recall  f1-score   support

Hate Speech (0)       0.60      0.23      0.33       220
  Offensive (1)       0.93      0.97      0.95      3052
    Neither (2)       0.90      0.89      0.89       694

       accuracy                           0.92      3966
      macro avg       0.81      0.70      0.72      3966
   weighted avg       0.90      0.92      0.91      3966

Train Loss: 0.2870 | Val Loss: 0.3150 | Val F1: 0.7235





In [9]:
def hierarchical_predict(probA, probB, threshold=0.4, toxic_cut=0.5):
    probA = np.asarray(probA)
    probB = np.asarray(probB)
    pred = np.full_like(probA, 2, dtype=int)   # default Neither
    toxic = probA >= toxic_cut
    pred[toxic] = np.where(probB[toxic] > threshold, 0, 1)  # 0=hate, 1=offensive
    return pred


@torch.no_grad()
def mine_hard_hate_as_offensive_from_loader_new(
    model,
    loader,
    device,
    threshold=0.4,
    toxic_cut=0.5,
    top_k=None,          # None = keep all
):
    """
    Mine HARD POSITIVES:
      - TRUE label = hate (0)
      - PREDICTED as offensive (1)

    These are false negatives for hate.
    """
    model.eval().to(device)

    rows = []

    for batch in loader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)
        texts = batch["text"]

        logitA, logitB = model(input_ids=input_ids, attention_mask=attention_mask)

        pA = torch.sigmoid(logitA).cpu().numpy()
        pB = torch.sigmoid(logitB).cpu().numpy()
        y_true = labels.cpu().numpy()

        preds = hierarchical_predict(pA, pB, threshold=threshold, toxic_cut=toxic_cut)

        for t, y, a, b, p in zip(texts, y_true, pA, pB, preds):
            # ✅ THIS is the key condition
            if y == 0 and p == 1:
                rows.append({
                    "clean_text": t,
                    "class": 0,
                    "probA_toxic": float(a),
                    "probB_hate": float(b),
                    "pred": int(p),
                    "hard_score": float(1.0 - b)  # lower pB = harder hate
                })

    df_hard = pd.DataFrame(rows)

    if len(df_hard) == 0:
        print("⚠️ No hate→offensive errors found (this is actually good).")
        return df_hard, {}

    # Sort by hardest first (lowest hate confidence)
    df_hard = df_hard.sort_values("hard_score", ascending=False)

    if top_k is not None:
        df_hard = df_hard.head(top_k)

    stats = {
        "hard_hate_count": len(df_hard),
        "threshold": threshold,
        "toxic_cut": toxic_cut
    }

    return df_hard.reset_index(drop=True), stats

In [10]:
hard_hate, stats = mine_hard_hate_as_offensive_from_loader_new(
    model,
    train_loader,
    device,
    threshold=0.4
)


print(stats)
display(hard_hate.head())

{'hard_hate_count': 469, 'threshold': 0.4, 'toxic_cut': 0.5}


Unnamed: 0,clean_text,class,probA_toxic,probB_hate,pred,hard_score
0,: true i need to sleep to make them gainz br...,0,0.996118,0.002242,1,0.997758
1,: : some of y'all hoes so worried about y'all ...,0,0.99751,0.002871,1,0.997129
2,: sometimes i wanna upper cut this bitch but i...,0,0.998408,0.003196,1,0.996804
3,""": stacey dash won baddest bitch evaaaa",0,0.998677,0.003372,1,0.996628
4,: tea bag a bitch. pahahaha,0,0.998706,0.003508,1,0.996492


In [11]:
hard_hate.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 469 entries, 0 to 468
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   clean_text   469 non-null    object 
 1   class        469 non-null    int64  
 2   probA_toxic  469 non-null    float64
 3   probB_hate   469 non-null    float64
 4   pred         469 non-null    int64  
 5   hard_score   469 non-null    float64
dtypes: float64(3), int64(2), object(1)
memory usage: 22.1+ KB


In [12]:
def get_synonyms(word):
    """Get a list of synonyms for a given word."""
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonym = lemma.name().replace("_", " ").lower()
            if synonym != word:
                synonyms.add(synonym)
    return list(synonyms)

def augment_data(
    text: str,
    num_augmentations: int = 1,
    min_replacements: int = 2,
    max_attempts: int = 100,
    seed: int | None = None,
):
    """
    Augment text by replacing at least `min_replacements` distinct words with synonyms.
    Ensures augmented outputs differ from the original (or returns fewer if impossible).
    """
    rng = np.random.default_rng(seed)
    
    words = text.split()
    if len(words) == 0:
        return []

    # Precompute which indices are replaceable and their synonym lists
    replaceable = []
    syn_cache = {}
    for i, w in enumerate(words):
        # basic cleanup to help wordnet (optional)
        w_clean = re.sub(r"[^A-Za-z']+", "", w).lower()
        if not w_clean:
            continue
        syns = get_synonyms(w_clean)
        if syns:
            replaceable.append(i)
            syn_cache[i] = syns

    # If we don't have enough replaceable words, can't guarantee min_replacements
    if len(replaceable) < min_replacements:
        # Return [] or fall back to fewer replacements (your choice)
        return []

    augmented_texts = []
    original = " ".join(words)

    for _ in range(num_augmentations):
        success = False

        for _attempt in range(max_attempts):
            new_words = words.copy()

            # choose distinct indices to replace
            idxs = rng.choice(replaceable, size=min_replacements, replace=False)

            changed = 0
            for idx in idxs:
                syns = syn_cache[idx]
                # pick synonym; if punctuation present, we keep original punctuation around it
                chosen = rng.choice(syns)
                new_words[idx] = chosen
                changed += 1

            augmented = " ".join(new_words)

            # ensure different from original AND we actually replaced enough
            if augmented != original and changed >= min_replacements:
                augmented_texts.append(augmented)
                success = True
                break

        if not success:
            # couldn't create a valid augmentation after max_attempts
            # you can append original, skip, or return fewer; I skip.
            pass

    return augmented_texts

In [13]:
hard_negatives_df = hard_hate.loc[:, ["clean_text", "class"]]
augmented_rows = []

for idx in range(len(hard_negatives_df)):
    text = hard_negatives_df.loc[idx, "clean_text"]
    label = hard_negatives_df.loc[idx, "class"]

    augments = augment_data(
        text,
        num_augmentations=NUM_AUGMENTATIONS_PER_TEXT,
        min_replacements=MIN_REPLACEMENT,
        seed=idx
    )

    if augments:  # only augment iff non-empty
        for aug_text in augments:
            augmented_rows.append({
                "clean_text": aug_text,
                "class": label
            })

        # print(f"[✓] Augmented: {text}")
        # print(augments)
    else:
        pass
        # print(f"[×] Skipped (no valid aug): {text}")

augmented_hard_negatives_df = pd.DataFrame(augmented_rows)
print(f"Augmented {len(augmented_hard_negatives_df)} new texts of class 0")
augmented_hard_negatives_df.info()
X_train_new = pd.concat([X_train, augmented_hard_negatives_df['clean_text']], axis = 0)
y_train_new = pd.concat([y_train, augmented_hard_negatives_df['class']], axis = 0)
print(f"New shape X train: {X_train_new.shape}")
print(f"New shape y train: {y_train_new.shape}")

Augmented 1608 new texts of class 0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1608 entries, 0 to 1607
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   clean_text  1608 non-null   object
 1   class       1608 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 25.3+ KB
New shape X train: (17468,)
New shape y train: (17468,)


In [14]:
TARGET_OFFENSIVE = 5000   # or 7500
SEED = 42
total_df = pd.concat([X_train_new, y_train_new], axis = 1, keys=['clean_text', 'class'])
df_offensive = total_df[total_df["class"] == 1]
df_offensive_down = (
    df_offensive
    .sample(n=TARGET_OFFENSIVE, random_state=SEED)
    .reset_index(drop=True)
)
df_hate = total_df[total_df["class"] == 0]
df_neither = total_df[total_df["class"] == 2]

df_train_balanced = pd.concat(
    [
        df_hate,
        df_offensive_down,
        df_neither
    ],
    ignore_index=True
)
df_train_balanced.head()
df_train_balanced['class'].value_counts()

X_train_new, y_train_new = df_train_balanced['clean_text'], df_train_balanced['class']

In [15]:
new_train_dataset = TwoHeadDataset(
    texts=X_train_new, 
    labels=y_train_new, 
    tokenizer=tokenizer, 
    max_len=MAX_LEN
)

new_train_loader = DataLoader(
    new_train_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=True,
    num_workers=2 # Optional: speeds up data loading
)

In [19]:
new_model = BertCNN(n_classes=3)
new_model = new_model.to(device)
classes = np.unique(y_train)
weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train_new)
print(f"Class weights: {weights}")
# Initialize Optimizer
new_optimizer = AdamW(new_model.parameters(), lr=RETRAIN_LEARNING_RATE)
criterion = FocalLoss(alpha=weights, gamma=1)
total_steps = len(new_train_loader) * RETRAIN_EPOCHS
new_scheduler = get_linear_schedule_with_warmup(optimizer, 0, total_steps)
best_val_loss = float('inf')
save_path = f"BERT_CNN_{RETRAIN_EPOCHS}_epochs_{RETRAIN_LEARNING_RATE}_lr_BALANCED.pth" if USE_BALANCED else f"BERT_CNN_{RETRAIN_EPOCHS}_epochs_{RETRAIN_LEARNING_RATE}_lr.pth"

# Training Loop
for epoch in range(RETRAIN_EPOCHS):
    print(f"Epoch {epoch + 1}/{RETRAIN_EPOCHS}")
    train_loss = train_fn_bert_cnn(new_train_loader, criterion, new_model, new_optimizer, device, new_scheduler, epoch)
    val_loss, val_f1 = evaluate_fn_bert_cnn(val_loader, criterion, new_model, device, epoch)
    print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val F1: {val_f1:.4f}")
    model.eval()
    
    
    # --- 4. Save Model if Val Loss Improved ---
    if val_loss < best_val_loss:
        print(f"Validation loss decreased ({best_val_loss:.4f} --> {val_loss:.4f}). Saving model...")
        torch.save(new_model.state_dict(), save_path)
        print(f"Saved at {save_path}")
        best_val_loss = val_loss

Class weights: [1.33859926 0.67786667 1.28578655]
Epoch 1/4


Training: 100%|██████████| 636/636 [04:38<00:00,  2.28it/s]
Evaluating: 100%|██████████| 248/248 [00:28<00:00,  8.71it/s]



Validation Loss: 0.2143 | Validation F1: 0.6891
                 precision    recall  f1-score   support

Hate Speech (0)       0.22      0.63      0.33       220
  Offensive (1)       0.98      0.81      0.88      3052
    Neither (2)       0.79      0.93      0.86       694

       accuracy                           0.82      3966
      macro avg       0.66      0.79      0.69      3966
   weighted avg       0.90      0.82      0.85      3966

Train Loss: 0.4126 | Val Loss: 0.2143 | Val F1: 0.6891
Validation loss decreased (inf --> 0.2143). Saving model...
Saved at BERT_CNN_4_epochs_5e-06_lr.pth
Epoch 2/4


Training: 100%|██████████| 636/636 [04:39<00:00,  2.28it/s]
Evaluating: 100%|██████████| 248/248 [00:28<00:00,  8.72it/s]



Validation Loss: 0.1739 | Validation F1: 0.7497
                 precision    recall  f1-score   support

Hate Speech (0)       0.34      0.62      0.44       220
  Offensive (1)       0.97      0.89      0.93      3052
    Neither (2)       0.84      0.94      0.89       694

       accuracy                           0.88      3966
      macro avg       0.72      0.82      0.75      3966
   weighted avg       0.91      0.88      0.89      3966

Train Loss: 0.2109 | Val Loss: 0.1739 | Val F1: 0.7497
Validation loss decreased (0.2143 --> 0.1739). Saving model...
Saved at BERT_CNN_4_epochs_5e-06_lr.pth
Epoch 3/4


Training: 100%|██████████| 636/636 [04:39<00:00,  2.28it/s]
Evaluating: 100%|██████████| 248/248 [00:28<00:00,  8.73it/s]



Validation Loss: 0.1717 | Validation F1: 0.7568
                 precision    recall  f1-score   support

Hate Speech (0)       0.35      0.65      0.45       220
  Offensive (1)       0.97      0.90      0.93      3052
    Neither (2)       0.87      0.91      0.89       694

       accuracy                           0.88      3966
      macro avg       0.73      0.82      0.76      3966
   weighted avg       0.91      0.88      0.90      3966

Train Loss: 0.1590 | Val Loss: 0.1717 | Val F1: 0.7568
Validation loss decreased (0.1739 --> 0.1717). Saving model...
Saved at BERT_CNN_4_epochs_5e-06_lr.pth
Epoch 4/4


Training: 100%|██████████| 636/636 [04:39<00:00,  2.28it/s]
Evaluating: 100%|██████████| 248/248 [00:28<00:00,  8.72it/s]


Validation Loss: 0.1762 | Validation F1: 0.7616
                 precision    recall  f1-score   support

Hate Speech (0)       0.40      0.52      0.45       220
  Offensive (1)       0.95      0.93      0.94      3052
    Neither (2)       0.88      0.90      0.89       694

       accuracy                           0.90      3966
      macro avg       0.75      0.78      0.76      3966
   weighted avg       0.91      0.90      0.91      3966

Train Loss: 0.1303 | Val Loss: 0.1762 | Val F1: 0.7616





In [21]:
saved_model = BertCNN(n_classes=3).to(device)
saved_model.load_state_dict(torch.load(save_path, map_location=device))
test_loss, test_f1 = evaluate_fn_bert_cnn(test_loader, criterion, saved_model, device, epoch, is_testing = True)

Evaluating: 100%|██████████| 310/310 [00:35<00:00,  8.83it/s]

                 precision    recall  f1-score   support

Hate Speech (0)       0.36      0.65      0.46       286
  Offensive (1)       0.97      0.90      0.93      3838
    Neither (2)       0.87      0.91      0.89       833

       accuracy                           0.89      4957
      macro avg       0.73      0.82      0.76      4957
   weighted avg       0.91      0.89      0.90      4957




