## 🔁📊 Text Classification with LSTM and GRU (AG News)
This notebook trains both GRU and LSTM-based classifiers on AG News and evaluates them with accuracy.

In [None]:
import torch
from torch.utils.data import DataLoader
from torchtext.datasets import AG_NEWS
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score

### 1. Data Preparation

In [None]:
tokenizer = get_tokenizer("basic_english")

def yield_tokens(data_iter):
    for _, text in data_iter:
        yield tokenizer(text)

train_iter = AG_NEWS(split='train')
vocab = build_vocab_from_iterator(yield_tokens(train_iter), specials=['<unk>', '<pad>'])
vocab.set_default_index(vocab['<unk>'])

def encode(text):
    return torch.tensor([vocab[token] for token in tokenizer(text)], dtype=torch.long)

def collate_batch(batch):
    label_list, text_list, lengths = [], [], []
    for label, text in batch:
        encoded = encode(text)
        label_list.append(torch.tensor(label - 1))
        text_list.append(encoded)
        lengths.append(len(encoded))
    padded = pad_sequence(text_list, batch_first=True, padding_value=vocab['<pad>'])
    return padded, torch.tensor(lengths), torch.tensor(label_list)

train_iter = AG_NEWS(split='train')
test_iter = AG_NEWS(split='test')
train_loader = DataLoader(list(train_iter)[:5000], batch_size=32, shuffle=True, collate_fn=collate_batch)
test_loader = DataLoader(list(test_iter)[:1000], batch_size=64, shuffle=False, collate_fn=collate_batch)

### 2. Define GRU and LSTM Models

In [None]:
class RNNClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, num_classes, use_lstm=False):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=vocab['<pad>'])
        self.rnn = nn.LSTM(embed_dim, hidden_dim, batch_first=True) if use_lstm else nn.GRU(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x, lengths):
        embedded = self.embedding(x)
        packed = pack_padded_sequence(embedded, lengths.cpu(), batch_first=True, enforce_sorted=False)
        _, hidden = self.rnn(packed)
        if isinstance(hidden, tuple):  # LSTM returns (hidden_state, cell_state)
            hidden = hidden[0]
        return self.fc(hidden.squeeze(0))

### 3. Train Function

In [None]:
def train_model(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for text, lengths, labels in loader:
        text, lengths, labels = text.to(device), lengths.to(device), labels.to(device)
        preds = model(text, lengths)
        loss = criterion(preds, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss

### 4. Evaluation Function

In [None]:
def evaluate_model(model, loader, device):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for text, lengths, labels in loader:
            text, lengths = text.to(device), lengths.to(device)
            preds = model(text, lengths)
            predicted = torch.argmax(preds, dim=1).cpu().numpy()
            all_preds.extend(predicted)
            all_labels.extend(labels.numpy())
    return accuracy_score(all_labels, all_preds)

### 5. Train and Evaluate GRU and LSTM

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()

# GRU Model
gru_model = RNNClassifier(len(vocab), 64, 128, 4, use_lstm=False).to(device)
gru_optimizer = optim.Adam(gru_model.parameters(), lr=0.005)

for epoch in range(3):
    loss = train_model(gru_model, train_loader, gru_optimizer, criterion, device)
    acc = evaluate_model(gru_model, test_loader, device)
    print(f"GRU - Epoch {epoch+1}, Loss: {loss:.4f}, Accuracy: {acc:.4f}")

# LSTM Model
lstm_model = RNNClassifier(len(vocab), 64, 128, 4, use_lstm=True).to(device)
lstm_optimizer = optim.Adam(lstm_model.parameters(), lr=0.005)

for epoch in range(3):
    loss = train_model(lstm_model, train_loader, lstm_optimizer, criterion, device)
    acc = evaluate_model(lstm_model, test_loader, device)
    print(f"LSTM - Epoch {epoch+1}, Loss: {loss:.4f}, Accuracy: {acc:.4f}")