# **Model Transformers*

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np
uda
# Load GloVe embeddings
def load_glove_embeddings(filepath):
    embeddings = {}
    with open(filepath, 'r', encoding='utf-8') as f:
        for line in f:
            values = line.split()
            word = values[0]
            vector = np.asarray(values[1:], dtype='float32')
            embeddings[word] = vector
    return embeddings

# Dataset class
class AGNewsDataset(Dataset):
    def __init__(self, texts, labels, word_vectors, max_len, embedding_dim):
        self.texts = texts
        self.labels = labels
        self.word_vectors = word_vectors
        self.max_len = max_len
        self.embedding_dim = embedding_dim

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]

        # Tokenize and pad/truncate
        tokens = text.split()
        embedding = np.zeros((self.max_len, self.embedding_dim))
        for i, token in enumerate(tokens[:self.max_len]):
            if token in self.word_vectors:
                embedding[i] = self.word_vectors[token]

        return torch.tensor(embedding, dtype=torch.float32), torch.tensor(label, dtype=torch.long)

# Transformer model
class TransformerClassifier(nn.Module):
    def __init__(self, embedding_dim, num_heads, num_classes, num_layers, max_len):
        super(TransformerClassifier, self).__init__()
        self.embedding_dim = embedding_dim
        self.num_heads = num_heads
        self.num_classes = num_classes
        self.max_len = max_len

        self.positional_encoding = nn.Parameter(torch.zeros(1, max_len, embedding_dim))
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=embedding_dim, nhead=num_heads),
            num_layers=num_layers
        )
        self.fc = nn.Linear(embedding_dim, num_classes)

    def forward(self, x):
        x += self.positional_encoding[:, :x.size(1), :]
        x = self.transformer(x)
        x = x.mean(dim=1)  # Global average pooling
        return self.fc(x)

# Main script
def main():
    # Parameters
    data_path = "/content/train.csv"
    glove_path = "/content/glove.6B.100d.txt"
    max_len = 50
    batch_size = 16
    embedding_dim = 100
    num_heads = 4
    num_classes = 4
    num_layers = 1
    num_epochs = 3
    learning_rate = 5e-4

    # Load data
    df = pd.read_csv(data_path, header=None)
    texts = df[1].astype(str) + " " + df[2].astype(str)
    labels = LabelEncoder().fit_transform(df[0])

    # Train-test split
    X_train, X_val, y_train, y_val = train_test_split(texts, labels, test_size=0.2, random_state=42)

    # Load word embeddings
    print("Loading GloVe word vectors...")
    word_vectors = load_glove_embeddings(glove_path)

    # Create datasets and loaders
    train_dataset = AGNewsDataset(X_train.tolist(), y_train, word_vectors, max_len, embedding_dim)
    val_dataset = AGNewsDataset(X_val.tolist(), y_val, word_vectors, max_len, embedding_dim)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

    # Initialize model
    model = TransformerClassifier(embedding_dim, num_heads, num_classes, num_layers, max_len)
    model = model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for batch in train_loader:
            embeddings, labels = batch
            embeddings, labels = embeddings.to(torch.device("cuda" if torch.cuda.is_available() else "cpu")), labels.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

            optimizer.zero_grad()
            outputs = model(embeddings)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss/len(train_loader):.4f}")

    # Validation loop
    model.eval()
    val_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in val_loader:
            embeddings, labels = batch
            embeddings, labels = embeddings.to(torch.device("cuda" if torch.cuda.is_available() else "cpu")), labels.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

            outputs = model(embeddings)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Validation Loss: {val_loss/len(val_loader):.4f}, Accuracy: {100 * correct/total:.2f}%")

if __name__ == "__main__":
    main()

Loading GloVe word vectors...




Epoch 1/3, Train Loss: 0.5524
Epoch 2/3, Train Loss: 0.4371
Epoch 3/3, Train Loss: 0.4074
Validation Loss: 0.4584, Accuracy: 83.80%
