In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report
from transformers import BertTokenizer, BertForSequenceClassification
from torch.optim import AdamW
from sklearn.utils.class_weight import compute_class_weight
import pandas as pd
import numpy as np
from collections import Counter
import os
import re
import random
import warnings

# -------------------------
# Suppress Warnings
# -------------------------
warnings.filterwarnings("ignore")

# -------------------------
# Reproducibility
# -------------------------
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

# -------------------------
# Config
# -------------------------
MAX_LEN = 128
BATCH_SIZE = 32
NUM_CLASSES = 2
EMBEDDING_DIM = 100
HIDDEN_DIM = 128
NUM_EPOCHS = 5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# -------------------------
# Preprocessing Function
# -------------------------
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = re.sub(r'\s+', ' ', text).strip()  # Remove extra whitespace
    return text

# -------------------------
# Load & Preprocess Dataset
# -------------------------
train_df = pd.read_csv("train_hate.csv")
val_df = pd.read_csv("val_hate.csv")

train_texts = [preprocess_text(t) for t in train_df["Sentence"].tolist()]
train_labels = list(map(int, train_df["Tag"].tolist()))
val_texts = [preprocess_text(t) for t in val_df["Sentence"].tolist()]
val_labels = list(map(int, val_df["Tag"].tolist()))

# -------------------------
# Build Custom Tokenizer & Vocabulary
# -------------------------
def basic_tokenizer(text):
    return text.split()

def build_vocab(texts, min_freq=1):
    counter = Counter()
    for text in texts:
        tokens = basic_tokenizer(text)
        counter.update(tokens)
    vocab = {"<pad>": 0, "<unk>": 1}
    for word, freq in counter.items():
        if freq >= min_freq:
            vocab[word] = len(vocab)
    return vocab

def text_to_ids(text, vocab):
    tokens = basic_tokenizer(text)
    return [vocab.get(token, vocab["<unk>"]) for token in tokens]

vocab = build_vocab(train_texts)
tokenizer_fn = lambda x: text_to_ids(x, vocab)

# -------------------------
# Dataset Class
# -------------------------
class HateDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, use_bert=False):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.use_bert = use_bert

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        sentence = self.texts[idx]
        label = self.labels[idx]
        if self.use_bert:
            tokens = self.tokenizer(sentence, padding='max_length', truncation=True, max_length=MAX_LEN, return_tensors="pt")
            return {
                'input_ids': tokens['input_ids'].squeeze(0),
                'attention_mask': tokens['attention_mask'].squeeze(0),
                'label': torch.tensor(label)
            }
        else:
            tokens = self.tokenizer(sentence)
            if len(tokens) < MAX_LEN:
                tokens += [0] * (MAX_LEN - len(tokens))
            else:
                tokens = tokens[:MAX_LEN]
            return torch.tensor(tokens), torch.tensor(label)

# -------------------------
# Model Architectures
# -------------------------
class RNNModel(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, EMBEDDING_DIM)
        self.rnn = nn.RNN(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True)
        self.fc = nn.Linear(HIDDEN_DIM, NUM_CLASSES)

    def forward(self, x):
        x = self.embed(x)
        _, h_n = self.rnn(x)
        return self.fc(h_n.squeeze(0))

class LSTMModel(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, EMBEDDING_DIM)
        self.lstm = nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True)
        self.fc = nn.Linear(HIDDEN_DIM, NUM_CLASSES)

    def forward(self, x):
        x = self.embed(x)
        _, (h_n, _) = self.lstm(x)
        return self.fc(h_n.squeeze(0))

class TransformerClassifier(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, EMBEDDING_DIM)
        encoder_layer = nn.TransformerEncoderLayer(d_model=EMBEDDING_DIM, nhead=4)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=2)
        self.fc = nn.Linear(EMBEDDING_DIM, NUM_CLASSES)

    def forward(self, x):
        x = self.embedding(x).permute(1, 0, 2)
        out = self.transformer(x)
        out = out[0]
        return self.fc(out)

# -------------------------
# Evaluation Function
# -------------------------
def evaluate(model, loader, is_bert=False):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in loader:
            if is_bert:
                input_ids = batch['input_ids'].to(DEVICE)
                attention_mask = batch['attention_mask'].to(DEVICE)
                labels = batch['label'].to(DEVICE)
                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                logits = outputs.logits
            else:
                inputs, labels = batch
                inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
                logits = model(inputs)
            preds = torch.argmax(logits, dim=1).cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())

    report = classification_report(all_labels, all_preds, digits=4, zero_division=0, output_dict=True)
    macro_f1 = report["macro avg"]["f1-score"]
    print(classification_report(all_labels, all_preds, digits=4, zero_division=0))
    print(f"Macro F1-score: {macro_f1:.4f}")



# -------------------------
# Build Datasets & Loaders
# -------------------------
train_dataset = HateDataset(train_texts, train_labels, tokenizer_fn)
val_dataset = HateDataset(val_texts, val_labels, tokenizer_fn)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

# -------------------------
# Training Function
# -------------------------
def train_model(model, train_loader, val_loader, model_path, lr=1e-3):
    model = model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    class_weights = compute_class_weight(class_weight="balanced", classes=np.unique(train_labels), y=train_labels)
    class_weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(DEVICE)
    criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)

    for epoch in range(NUM_EPOCHS):
        model.train()
        total_loss = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"Epoch {epoch+1}/{NUM_EPOCHS}, Loss: {total_loss/len(train_loader):.4f}")
        evaluate(model, val_loader)

    os.makedirs(os.path.dirname(model_path), exist_ok=True)
    torch.save(model.state_dict(), model_path)

# -------------------------
# Train & Evaluate: RNN, LSTM, Transformer
# -------------------------
rnn_model = RNNModel(len(vocab))
train_model(rnn_model, train_loader, val_loader, "models/rnn_model.pth")
print("\n--- RNN ---")
evaluate(rnn_model, val_loader)

lstm_model = LSTMModel(len(vocab))
train_model(lstm_model, train_loader, val_loader, "models/lstm_model.pth")
print("\n--- LSTM ---")
evaluate(lstm_model, val_loader)

transformer_model = TransformerClassifier(len(vocab))
train_model(transformer_model, train_loader, val_loader, "models/transformer_model.pth")
print("\n--- Transformer ---")
evaluate(transformer_model, val_loader)

# -------------------------
# Train & Evaluate BERT
# -------------------------
bert_tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
bert_model = BertForSequenceClassification.from_pretrained("google-bert/bert-base-uncased", num_labels=NUM_CLASSES).to(DEVICE)

bert_train_dataset = HateDataset(train_texts, train_labels, tokenizer=bert_tokenizer, use_bert=True)
bert_val_dataset = HateDataset(val_texts, val_labels, tokenizer=bert_tokenizer, use_bert=True)
bert_train_loader = DataLoader(bert_train_dataset, batch_size=BATCH_SIZE, shuffle=True)
bert_val_loader = DataLoader(bert_val_dataset, batch_size=BATCH_SIZE)

optimizer = AdamW(bert_model.parameters(), lr=2e-5)

for epoch in range(NUM_EPOCHS):
    bert_model.train()
    total_loss = 0
    for batch in bert_train_loader:
        input_ids = batch['input_ids'].to(DEVICE)
        attention_mask = batch['attention_mask'].to(DEVICE)
        labels = batch['label'].to(DEVICE)
        optimizer.zero_grad()
        outputs = bert_model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"BERT Epoch {epoch+1}/{NUM_EPOCHS}, Loss: {total_loss/len(bert_train_loader):.4f}")
    evaluate(bert_model, bert_val_loader, is_bert=True)

torch.save(bert_model.state_dict(), "models/bert_model.pth")
print("\n--- BERT ---")
evaluate(bert_model, bert_val_loader, is_bert=True)

  from .autonotebook import tqdm as notebook_tqdm


Epoch 1/5, Loss: 0.7032
              precision    recall  f1-score   support

           0     0.6761    1.0000    0.8068       309
           1     0.0000    0.0000    0.0000       148

    accuracy                         0.6761       457
   macro avg     0.3381    0.5000    0.4034       457
weighted avg     0.4572    0.6761    0.5455       457

Macro F1-score: 0.4034
Epoch 2/5, Loss: 0.6987
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       309
           1     0.3239    1.0000    0.4893       148

    accuracy                         0.3239       457
   macro avg     0.1619    0.5000    0.2446       457
weighted avg     0.1049    0.3239    0.1584       457

Macro F1-score: 0.2446
Epoch 3/5, Loss: 0.6971
              precision    recall  f1-score   support

           0     0.6761    1.0000    0.8068       309
           1     0.0000    0.0000    0.0000       148

    accuracy                         0.6761       457
   macro a

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


KeyboardInterrupt: 

In [4]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
from tqdm import tqdm
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.makedirs("saved_models", exist_ok=True)

# ---------------------------
# Dataset
# ---------------------------
class HateDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len=128):
        self.texts = dataframe['Sentence'].tolist()
        self.labels = dataframe['Tag'].tolist()
        self.tokenizer = tokenizer
        self.max_len = max_len
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        
        encoding = self.tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            return_attention_mask=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

# Load data
train_df = pd.read_csv("train_hate.csv")
val_df = pd.read_csv("val_hate.csv")
tokenizer = BertTokenizer.from_pretrained('google-bert/bert-base-uncased')

train_dataset = HateDataset(train_df, tokenizer)
val_dataset = HateDataset(val_df, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

# ---------------------------
# Models
# ---------------------------
class RNNClassifier(nn.Module):
    def __init__(self, hidden_dim=128):
        super(RNNClassifier, self).__init__()
        self.embedding = nn.Embedding(30522, 100)
        self.rnn = nn.RNN(100, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 2)

    def forward(self, input_ids, attention_mask):
        x = self.embedding(input_ids)
        _, h_n = self.rnn(x)
        return self.fc(h_n.squeeze(0))

class LSTMClassifier(nn.Module):
    def __init__(self, hidden_dim=128):
        super(LSTMClassifier, self).__init__()
        self.embedding = nn.Embedding(30522, 100)
        self.lstm = nn.LSTM(100, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 2)

    def forward(self, input_ids, attention_mask):
        x = self.embedding(input_ids)
        _, (h_n, _) = self.lstm(x)
        return self.fc(h_n.squeeze(0))

class TransformerClassifier(nn.Module):
    def __init__(self, hidden_dim=128, nhead=4):
        super(TransformerClassifier, self).__init__()
        self.embedding = nn.Embedding(30522, 100)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=100, nhead=nhead)
        self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=2)
        self.fc = nn.Linear(100, 2)

    def forward(self, input_ids, attention_mask):
        x = self.embedding(input_ids)
        x = self.transformer(x)
        return self.fc(x.mean(dim=1))

class BERTClassifier(nn.Module):
    def __init__(self):
        super(BERTClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('google-bert/bert-base-uncased')
        self.fc = nn.Linear(self.bert.config.hidden_size, 2)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        return self.fc(outputs.pooler_output)

# ---------------------------
# Train + Eval Functions
# ---------------------------
def train_model(model, optimizer, loss_fn, epochs=3, save_path="model.pth"):
    model.to(device)
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids, attention_mask)
            loss = loss_fn(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch+1} Loss: {total_loss/len(train_loader):.4f}")

    # Save model
    torch.save(model.state_dict(), save_path)
    print(f"Model saved to {save_path}")

def evaluate(model, dataloader):
    model.eval()
    preds, true_labels = [], []
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids, attention_mask)
            pred_labels = torch.argmax(outputs, dim=1)

            preds.extend(pred_labels.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
    print(classification_report(true_labels, preds, target_names=["Non-Hate", "Hate"]))

# ---------------------------
# Run Training and Save All Models
# ---------------------------

# 1. RNN
rnn = RNNClassifier()
optimizer = torch.optim.Adam(rnn.parameters(), lr=1e-3)
train_model(rnn, optimizer, nn.CrossEntropyLoss(), save_path="saved_models/rnn.pth")
evaluate(rnn, val_loader)

# 2. LSTM
lstm = LSTMClassifier()
optimizer = torch.optim.Adam(lstm.parameters(), lr=1e-3)
train_model(lstm, optimizer, nn.CrossEntropyLoss(), save_path="saved_models/lstm.pth")
evaluate(lstm, val_loader)

# 3. Transformer
trans = TransformerClassifier()
optimizer = torch.optim.Adam(trans.parameters(), lr=1e-3)
train_model(trans, optimizer, nn.CrossEntropyLoss(), save_path="saved_models/transformer.pth")
evaluate(trans, val_loader)

# 4. BERT
bert = BERTClassifier()
optimizer = torch.optim.AdamW(bert.parameters(), lr=2e-5)
train_model(bert, optimizer, nn.CrossEntropyLoss(), save_path="saved_models/bert.pth")
evaluate(bert, val_loader)


Epoch 1: 100%|██████████| 115/115 [00:32<00:00,  3.58it/s]


Epoch 1 Loss: 0.6662


Epoch 2: 100%|██████████| 115/115 [00:19<00:00,  6.05it/s]


Epoch 2 Loss: 0.6626


Epoch 3: 100%|██████████| 115/115 [00:12<00:00,  9.58it/s]


Epoch 3 Loss: 0.6613
Model saved to saved_models/rnn.pth


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

    Non-Hate       0.68      1.00      0.81       309
        Hate       0.00      0.00      0.00       148

    accuracy                           0.68       457
   macro avg       0.34      0.50      0.40       457
weighted avg       0.46      0.68      0.55       457



Epoch 1: 100%|██████████| 115/115 [00:12<00:00,  8.90it/s]


Epoch 1 Loss: 0.6643


Epoch 2: 100%|██████████| 115/115 [00:13<00:00,  8.83it/s]


Epoch 2 Loss: 0.6591


Epoch 3: 100%|██████████| 115/115 [00:12<00:00,  9.13it/s]


Epoch 3 Loss: 0.6590
Model saved to saved_models/lstm.pth


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

    Non-Hate       0.68      1.00      0.81       309
        Hate       0.00      0.00      0.00       148

    accuracy                           0.68       457
   macro avg       0.34      0.50      0.40       457
weighted avg       0.46      0.68      0.55       457



Epoch 1: 100%|██████████| 115/115 [00:30<00:00,  3.77it/s]


Epoch 1 Loss: 0.6863


Epoch 2: 100%|██████████| 115/115 [00:31<00:00,  3.66it/s]


Epoch 2 Loss: 0.6537


Epoch 3: 100%|██████████| 115/115 [00:31<00:00,  3.69it/s]


Epoch 3 Loss: 0.6396
Model saved to saved_models/transformer.pth
              precision    recall  f1-score   support

    Non-Hate       0.69      0.94      0.80       309
        Hate       0.50      0.13      0.20       148

    accuracy                           0.68       457
   macro avg       0.60      0.53      0.50       457
weighted avg       0.63      0.68      0.60       457



Epoch 1:  39%|███▉      | 45/115 [09:11<14:17, 12.24s/it]


KeyboardInterrupt: 

In [6]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
from sklearn.metrics import classification_report
import pandas as pd
from tqdm import tqdm
import os
import urllib.request

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.makedirs("saved_models", exist_ok=True)

# ---------------------------
# 1. Download Weights from GitHub
# ---------------------------
def download_model(name, url):
    path = f"saved_models/{name}.pth"
    if not os.path.exists(path):
        print(f"Downloading {name}.pth...")
        urllib.request.urlretrieve(url, path)
    return path

# Replace these with your actual raw GitHub URLs
paths = {
    'RNN': download_model("rnn", "https://raw.githubusercontent.com/shrey-004/NLP_Assignment_3/main/rnn.pth"),
    'LSTM': download_model("lstm", "https://raw.githubusercontent.com/shrey-004/NLP_Assignment_3/main/lstm.pth"),
    'Transformer': download_model("transformer", "https://raw.githubusercontent.com/shrey-004/NLP_Assignment_3/main/transformer.pth"),
    # 'BERT': download_model("bert", "https://raw.githubusercontent.com/shrey-004/NLP_Assignment_3/main/bert.pth")
}

# ---------------------------
# 2. Dataset
# ---------------------------
class HateDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len=128):
        self.texts = dataframe['Sentence'].tolist()
        self.labels = dataframe['Tag'].tolist()
        self.tokenizer = tokenizer
        self.max_len = max_len
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        
        encoding = self.tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            return_attention_mask=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

# Load validation data
df_val = pd.read_csv("val_hate.csv")
tokenizer = BertTokenizer.from_pretrained('google-bert/bert-base-uncased')
val_dataset = HateDataset(df_val, tokenizer)
val_loader = DataLoader(val_dataset, batch_size=32)

# ---------------------------
# 3. Model Definitions
# ---------------------------
class RNNClassifier(nn.Module):
    def __init__(self, hidden_dim=128):
        super(RNNClassifier, self).__init__()
        self.embedding = nn.Embedding(30522, 100)
        self.rnn = nn.RNN(100, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 2)

    def forward(self, input_ids, attention_mask):
        x = self.embedding(input_ids)
        _, h_n = self.rnn(x)
        return self.fc(h_n.squeeze(0))

class LSTMClassifier(nn.Module):
    def __init__(self, hidden_dim=128):
        super(LSTMClassifier, self).__init__()
        self.embedding = nn.Embedding(30522, 100)
        self.lstm = nn.LSTM(100, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 2)

    def forward(self, input_ids, attention_mask):
        x = self.embedding(input_ids)
        _, (h_n, _) = self.lstm(x)
        return self.fc(h_n.squeeze(0))

class TransformerClassifier(nn.Module):
    def __init__(self, hidden_dim=128, nhead=4):
        super(TransformerClassifier, self).__init__()
        self.embedding = nn.Embedding(30522, 100)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=100, nhead=nhead, batch_first=True)
        self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=2)
        self.fc = nn.Linear(100, 2)

    def forward(self, input_ids, attention_mask):
        x = self.embedding(input_ids)
        x = self.transformer(x)
        x = x.mean(dim=1)
        return self.fc(x)

class BERTClassifier(nn.Module):
    def __init__(self):
        super(BERTClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('google-bert/bert-base-uncased')
        self.fc = nn.Linear(self.bert.config.hidden_size, 2)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        return self.fc(outputs.pooler_output)

# ---------------------------
# 4. Load Model + Weights
# ---------------------------
def load_model(model_class, weight_path):
    model = model_class().to(device)
    model.load_state_dict(torch.load(weight_path, map_location=device))
    model.eval()
    return model

models = {
    'RNN': load_model(RNNClassifier, paths['RNN']),
    'LSTM': load_model(LSTMClassifier, paths['LSTM']),
    'Transformer': load_model(TransformerClassifier, paths['Transformer']),
    # 'BERT': load_model(BERTClassifier, paths['BERT'])
}

# ---------------------------
# 5. Evaluation
# ---------------------------
def evaluate(model, dataloader):
    preds, true_labels = [], []
    with torch.no_grad():
        for batch in tqdm(dataloader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids, attention_mask)
            pred_labels = torch.argmax(outputs, dim=1)

            preds.extend(pred_labels.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    print(classification_report(true_labels, preds, target_names=["Non-Hate", "Hate"]))

# Run evaluation for each model
for name, model in models.items():
    print(f"\n--- Evaluating: {name} ---")
    evaluate(model, val_loader)

# ---------------------------
# 6. Training (Commented Out)
# ---------------------------
'''
# Example: Training loop (disabled for final notebook)
model = LSTMClassifier().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()

for epoch in range(3):
    model.train()
    for batch in train_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        outputs = model(input_ids, attention_mask)
        loss = loss_fn(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
'''



--- Evaluating: RNN ---


100%|██████████| 15/15 [00:01<00:00, 12.35it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

    Non-Hate       0.68      1.00      0.81       309
        Hate       0.00      0.00      0.00       148

    accuracy                           0.68       457
   macro avg       0.34      0.50      0.40       457
weighted avg       0.46      0.68      0.55       457


--- Evaluating: LSTM ---


100%|██████████| 15/15 [00:00<00:00, 19.58it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

    Non-Hate       0.68      1.00      0.81       309
        Hate       0.00      0.00      0.00       148

    accuracy                           0.68       457
   macro avg       0.34      0.50      0.40       457
weighted avg       0.46      0.68      0.55       457


--- Evaluating: Transformer ---


100%|██████████| 15/15 [00:01<00:00, 13.18it/s]

              precision    recall  f1-score   support

    Non-Hate       0.68      0.97      0.80       309
        Hate       0.47      0.06      0.11       148

    accuracy                           0.67       457
   macro avg       0.58      0.51      0.45       457
weighted avg       0.61      0.67      0.58       457






"\n# Example: Training loop (disabled for final notebook)\nmodel = LSTMClassifier().to(device)\noptimizer = torch.optim.Adam(model.parameters(), lr=1e-3)\nloss_fn = nn.CrossEntropyLoss()\n\nfor epoch in range(3):\n    model.train()\n    for batch in train_loader:\n        input_ids = batch['input_ids'].to(device)\n        attention_mask = batch['attention_mask'].to(device)\n        labels = batch['label'].to(device)\n\n        outputs = model(input_ids, attention_mask)\n        loss = loss_fn(outputs, labels)\n\n        optimizer.zero_grad()\n        loss.backward()\n        optimizer.step()\n"

In [11]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
from sklearn.metrics import classification_report
import pandas as pd
from tqdm import tqdm
import os
import urllib.request

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.makedirs("saved_models", exist_ok=True)

# ---------------------------
# 1. Download Weights
# ---------------------------
def download_model(name, url):
    path = f"saved_models/{name}.pth"
    if not os.path.exists(path):
        print(f"Downloading {name}.pth from GitHub...")
        urllib.request.urlretrieve(url, path)
    return path

# For BERT: download from Google Drive using gdown
def download_bert_from_drive(file_id):
    import gdown
    path = "saved_models/bert.pth"
    if not os.path.exists(path):
        print("Downloading bert.pth from Google Drive...")
        gdown.download(f"https://drive.google.com/uc?id={file_id}", path, quiet=False)
    return path

# GitHub weights
paths = {
    'RNN': download_model("rnn", "https://raw.githubusercontent.com/shrey-004/NLP_Assignment_3/main/rnn.pth"),
    'LSTM': download_model("lstm", "https://raw.githubusercontent.com/shrey-004/NLP_Assignment_3/main/lstm.pth"),
    'Transformer': download_model("transformer", "https://raw.githubusercontent.com/shrey-004/NLP_Assignment_3/main/transformer.pth"),
    'BERT': download_bert_from_drive("1nkghIyk-kjrNhpSZo-kTq64nVRcOwy5A")  # 👈 replace with actual file ID
}

# ---------------------------
# 2. Dataset
# ---------------------------
class HateDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len=128):
        self.texts = dataframe['Sentence'].tolist()
        self.labels = dataframe['Tag'].tolist()
        self.tokenizer = tokenizer
        self.max_len = max_len
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        
        encoding = self.tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            return_attention_mask=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

# Load validation data
df_val = pd.read_csv("val_hate.csv")
tokenizer = BertTokenizer.from_pretrained('google-bert/bert-base-uncased')
val_dataset = HateDataset(df_val, tokenizer)
val_loader = DataLoader(val_dataset, batch_size=32)

# ---------------------------
# 3. Model Definitions
# ---------------------------
class RNNClassifier(nn.Module):
    def __init__(self, hidden_dim=128):
        super(RNNClassifier, self).__init__()
        self.embedding = nn.Embedding(30522, 100)
        self.rnn = nn.RNN(100, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 2)

    def forward(self, input_ids, attention_mask):
        x = self.embedding(input_ids)
        _, h_n = self.rnn(x)
        return self.fc(h_n.squeeze(0))

class LSTMClassifier(nn.Module):
    def __init__(self, hidden_dim=128):
        super(LSTMClassifier, self).__init__()
        self.embedding = nn.Embedding(30522, 100)
        self.lstm = nn.LSTM(100, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 2)

    def forward(self, input_ids, attention_mask):
        x = self.embedding(input_ids)
        _, (h_n, _) = self.lstm(x)
        return self.fc(h_n.squeeze(0))

class TransformerClassifier(nn.Module):
    def __init__(self, hidden_dim=128, nhead=4):
        super(TransformerClassifier, self).__init__()
        self.embedding = nn.Embedding(30522, 100)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=100, nhead=nhead, batch_first=True)
        self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=2)
        self.fc = nn.Linear(100, 2)

    def forward(self, input_ids, attention_mask):
        x = self.embedding(input_ids)
        x = self.transformer(x)
        x = x.mean(dim=1)
        return self.fc(x)

class BERTClassifier(nn.Module):
    def __init__(self):
        super(BERTClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('google-bert/bert-base-uncased')
        self.fc = nn.Linear(self.bert.config.hidden_size, 2)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        return self.fc(outputs.pooler_output)

# ---------------------------
# 4. Load Models + Weights
# ---------------------------
def load_model(model_class, weight_path):
    model = model_class().to(device)
    model.load_state_dict(torch.load(weight_path, map_location=device))
    model.eval()
    return model

models = {
    'RNN': load_model(RNNClassifier, paths['RNN']),
    'LSTM': load_model(LSTMClassifier, paths['LSTM']),
    'Transformer': load_model(TransformerClassifier, paths['Transformer']),
    'BERT': load_model(BERTClassifier, paths['BERT'])
}

# ---------------------------
# 5. Evaluation
# ---------------------------
def evaluate(model, dataloader):
    preds, true_labels = [], []
    with torch.no_grad():
        for batch in tqdm(dataloader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids, attention_mask)
            pred_labels = torch.argmax(outputs, dim=1)

            preds.extend(pred_labels.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    print(classification_report(true_labels, preds, target_names=["Non-Hate", "Hate"]))

# Run evaluation for each model
for name, model in models.items():
    print(f"\n--- Evaluating: {name} ---")
    evaluate(model, val_loader)

# ---------------------------
# 6. Training (Commented Out)
# ---------------------------
'''
# Example: Training loop (disabled for final notebook)
model = LSTMClassifier().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()

for epoch in range(3):
    model.train()
    for batch in train_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        outputs = model(input_ids, attention_mask)
        loss = loss_fn(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
'''


Downloading bert.pth from Google Drive...


Downloading...
From (original): https://drive.google.com/uc?id=1nkghIyk-kjrNhpSZo-kTq64nVRcOwy5A
From (redirected): https://drive.google.com/uc?id=1nkghIyk-kjrNhpSZo-kTq64nVRcOwy5A&confirm=t&uuid=3a8bca03-954a-4bab-930d-793e5eac42e3
To: c:\Users\shrey\Documents\nlp_assgn_3\saved_models\bert.pth
100%|██████████| 438M/438M [00:35<00:00, 12.3MB/s] 



--- Evaluating: RNN ---


100%|██████████| 15/15 [00:00<00:00, 19.31it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

    Non-Hate       0.68      1.00      0.81       309
        Hate       0.00      0.00      0.00       148

    accuracy                           0.68       457
   macro avg       0.34      0.50      0.40       457
weighted avg       0.46      0.68      0.55       457


--- Evaluating: LSTM ---


100%|██████████| 15/15 [00:00<00:00, 25.19it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

    Non-Hate       0.68      1.00      0.81       309
        Hate       0.00      0.00      0.00       148

    accuracy                           0.68       457
   macro avg       0.34      0.50      0.40       457
weighted avg       0.46      0.68      0.55       457


--- Evaluating: Transformer ---


100%|██████████| 15/15 [00:01<00:00, 13.26it/s]


              precision    recall  f1-score   support

    Non-Hate       0.68      0.97      0.80       309
        Hate       0.47      0.06      0.11       148

    accuracy                           0.67       457
   macro avg       0.58      0.51      0.45       457
weighted avg       0.61      0.67      0.58       457


--- Evaluating: BERT ---


100%|██████████| 15/15 [00:44<00:00,  2.94s/it]

              precision    recall  f1-score   support

    Non-Hate       0.78      0.94      0.85       309
        Hate       0.77      0.43      0.55       148

    accuracy                           0.77       457
   macro avg       0.77      0.69      0.70       457
weighted avg       0.77      0.77      0.75       457






"\n# Example: Training loop (disabled for final notebook)\nmodel = LSTMClassifier().to(device)\noptimizer = torch.optim.Adam(model.parameters(), lr=1e-3)\nloss_fn = nn.CrossEntropyLoss()\n\nfor epoch in range(3):\n    model.train()\n    for batch in train_loader:\n        input_ids = batch['input_ids'].to(device)\n        attention_mask = batch['attention_mask'].to(device)\n        labels = batch['label'].to(device)\n\n        outputs = model(input_ids, attention_mask)\n        loss = loss_fn(outputs, labels)\n\n        optimizer.zero_grad()\n        loss.backward()\n        optimizer.step()\n"