In [9]:
# !wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1QfAUt0u4wLZVy2Ta1G90jOLNaqzAw2eW' -O agnews_test.csv
# !wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1UsOBTnfch-Su4kqmkzXcIizwJt6NWtXZ' -O agnews_train.csv
# clear_output()

## Imports

In [10]:
import torch
from torch import nn

from torchvision import datasets
from torchvision.transforms import ToTensor

import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.feature_extraction.text import TfidfVectorizer

## Classes and Functions

In [11]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(1500, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 3),
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

def preprocess_agnews(csv_file):
    '''Load and preprocess AG News data'''
    df = pd.read_csv(csv_file, header = None)
    df = df[df[0] !=1]
    df['label'] = df[0] - 2
    df['text'] = [" ".join((title, body)) for title, body in zip(df[1], df[2])]
    return df[['text', 'label']]

class TextDataset(Dataset):
    '''Class for creating the vectoriser and features'''
    def __init__(self, dataframe, max_features=1500, vectoriser=None):
        self.texts = dataframe['text'].tolist()
        self.labels = dataframe['label'].tolist()
        
        # TF-IDF: fit on training, reuse for testing
        if vectoriser is None:
            # Training: create and fit
            self.vectoriser = TfidfVectorizer(max_features=max_features, stop_words='english')
            self.features = self.vectoriser.fit_transform(self.texts)
        else:
            # Testing: use provided vectoriser
            self.vectoriser = vectoriser
            self.features = self.vectoriser.transform(self.texts)
        
        # Convert sparse matrix to dense tensor (TF-IDF returns sparse)
        self.features = torch.tensor(self.features.toarray(), dtype=torch.float32)
        self.labels = torch.tensor(self.labels, dtype=torch.long)
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        # Just return pre-computed vectors
        return self.features[idx], self.labels[idx]

def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [13]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

model = NeuralNetwork().to(device)
print(model)

train_df = preprocess_agnews('../W2_feedfoward_neural_networks/agnews_train.csv')
test_df = preprocess_agnews('../W2_feedfoward_neural_networks/agnews_test.csv')

train_dataset = TextDataset(train_df, max_features=1500)
test_dataset = TextDataset(test_df, vectoriser=train_dataset.vectoriser)  # Passing the fitted vectoriser

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

model = NeuralNetwork()

learning_rate = 0.01
batch_size = 64

# Initialize the loss function and optimiser
loss_fn = nn.CrossEntropyLoss()

# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)

epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_loader, model, loss_fn, optimizer)
    test_loop(test_loader, model, loss_fn)
print("Done!")

Using mps device
NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=1500, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=3, bias=True)
  )
)
Epoch 1
-------------------------------
loss: 1.099790  [   64/90000]
loss: 0.433480  [ 6464/90000]
loss: 0.489551  [12864/90000]
loss: 0.240757  [19264/90000]
loss: 0.218598  [25664/90000]
loss: 0.225188  [32064/90000]
loss: 0.328116  [38464/90000]
loss: 0.247278  [44864/90000]
loss: 0.233035  [51264/90000]
loss: 0.272694  [57664/90000]
loss: 0.101648  [64064/90000]
loss: 0.242865  [70464/90000]
loss: 0.238016  [76864/90000]
loss: 0.279343  [83264/90000]
loss: 0.305352  [89664/90000]
Test Error: 
 Accuracy: 91.1%, Avg loss: 0.250083 

Epoch 2
-------------------------------
loss: 0.150153  [   64/90000]
loss: 0.212714  [ 6464/90000]
loss: 0.139310  [12864/90000]
loss: 0.167569  [19264/90000]
loss: 0.