![](https://i.gyazo.com/414881441edbabcf2ab32d3bdfc1711e.png)

In [2]:
# IMPORTS

import json
import random
import time

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn import init
from tqdm import tqdm

unk = "<UNK>"

In [3]:
# Consult the PyTorch documentation for information on the functions used below:
# https://pytorch.org/docs/stable/torch.html

class FFNN(nn.Module):
    
    def __init__(self, input_dim, h):
        super(FFNN, self).__init__()
        
        self.h = h
        self.W1 = nn.Linear(input_dim, h)
        self.output_dim = 5
        self.W2 = nn.Linear(h, self.output_dim)

        self.activation = (
            nn.ReLU()
        )  # The rectified linear unit; one valid choice of activation function
        self.softmax = (
            nn.LogSoftmax()
        )  # The softmax function that converts vectors into probability distributions; computes log probabilities for computational benefits
        self.loss = (
            nn.NLLLoss()
        )  # The cross-entropy/negative log likelihood loss taught in class

    def compute_Loss(self, predicted_vector, gold_label):
        return self.loss(predicted_vector, gold_label)

    def forward(self, input_vector):
        hidden_vector = self.activation(self.W1(input_vector))
        output_vector = self.W2(hidden_vector)
        predicted_vector = self.softmax(output_vector)
        return predicted_vector

In [4]:
def make_vocab(data):
    """
    Returns:
    vocab = A set of strings corresponding to the vocabulary, including the <UNK> token
    """
    return {word for document, _ in data for word in document} | {unk}

def make_indices(vocab):
    """
    Returns:
    vocab = A set of strings corresponding to the vocabulary including <UNK>
    word2index = A dictionary mapping word/token to its index (a number in 0, ..., V - 1)
    index2word = A dictionary inverting the mapping of word2index
    """
    vocab.remove(unk)
    vocab_list = sorted(vocab)
    
    word2index = {word: index for index, word in enumerate(vocab_list)}
    word2index[unk] = len(vocab_list)
    
    index2word = {index: word for index, word in enumerate(vocab_list)}
    index2word[len(vocab_list)] = unk
    
    vocab.add(unk)
    return word2index, index2word

In [5]:
def load_data(data):
    with open(data) as data_f:
        data_json = json.load(data_f)

    return [(elt["text"].split(), int(elt["stars"] - 1)) for elt in data_json]
    
def convert_to_vector_representation(data, word2index):
    """
    Returns:
    vectorized_data = A list of pairs (vector representation of input, y)
    """
    vectorized_data = []
    for document, y in data:
        vector = torch.zeros(len(word2index))
        for word in document:
            index = word2index.get(word, word2index[unk])
            vector[index] += 1
        vectorized_data.append((vector, y))
    return vectorized_data

### MAIN

In [6]:
from dataclasses import dataclass

@dataclass
class Args:
    """
    hidden_dim (int): hidden dimension
    epochs (int): number of epochs to train
    train_data (str): path to training data
    val_data (str): path to validation data
    test_data (str): path to test data
    """
    hidden_dim: int
    epochs: int
    train_data: str
    val_data: str
    test_data: str = "to fill"
    
args = Args(
    hidden_dim=64,
    epochs=10,
    train_data="training.json",
    val_data="validation.json",
    test_data="test.json",
)

In [7]:
# fix random seeds
random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7f7f8cd80a50>

In [8]:
# see if GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [9]:
# load data | X_data is a list of pairs (document, y); y in {0,1,2,3,4}
train_data = load_data(args.train_data)
valid_data = load_data(args.val_data)

In [10]:
vocab = make_vocab(train_data)
word2index, index2word = make_indices(vocab)

In [11]:
# vectorizing data
train_data = convert_to_vector_representation(train_data, word2index)
valid_data = convert_to_vector_representation(valid_data, word2index)

In [18]:
print("Training Data Size:", len(train_data))
print("Validation Data Size:", len(valid_data))

Training Data Size: 8000
Validation Data Size: 800


In [12]:
# Create model and optimizer
model = FFNN(input_dim=len(vocab), h=args.hidden_dim)
model.to(device)  # move model to device

optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [15]:
# Training and Validation
print("========== Training for {} epochs ==========".format(args.epochs))

for epoch in range(args.epochs):
    # Training
    model.train()
    # optimizer.zero_grad()
    
    # loss = None
    correct = total = 0
    random.shuffle(train_data)  # Good practice to shuffle order of training data
    N = len(train_data)
    minibatch_size = 16
    
    start_time = time.time()
    print("Training started for epoch {}".format(epoch + 1))
    for minibatch_index in tqdm(range(N // minibatch_size)):
        optimizer.zero_grad()
        loss = None
        
        for example_index in range(minibatch_size):
            input_vector, gold_label = train_data[
                minibatch_index * minibatch_size + example_index
            ]
            predicted_vector = model(input_vector.to(device))
            predicted_label = torch.argmax(predicted_vector)
            correct += int(predicted_label == gold_label)
            total += 1
            example_loss = model.compute_Loss(
                predicted_vector.view(1, -1), torch.tensor([gold_label]).to(device)
            )
            if loss is None:
                loss = example_loss
            else:
                loss += example_loss
                
        loss = loss / minibatch_size
        loss.backward()
        optimizer.step()
        
    print("Training completed for epoch {}".format(epoch + 1))
    print("Training accuracy for epoch {}: {}".format(epoch + 1, correct / total))
    # print("Training time for this epoch: {}".format(time.time() - start_time))

    # Validation
    model.eval()
    # loss = None
    correct = total = 0
    N = len(valid_data)
    
    with torch.no_grad():
        start_time = time.time()
        print("Validation started for epoch {}".format(epoch + 1))
        for minibatch_index in tqdm(range(N // minibatch_size)):
            optimizer.zero_grad()
            loss = None
            
            for example_index in range(minibatch_size):
                input_vector, gold_label = valid_data[
                    minibatch_index * minibatch_size + example_index
                ]
                predicted_vector = model(input_vector.to(device))
                predicted_label = torch.argmax(predicted_vector)
                correct += int(predicted_label == gold_label)
                total += 1
                example_loss = model.compute_Loss(
                    predicted_vector.view(1, -1), torch.tensor([gold_label]).to(device)
                )
                if loss is None:
                    loss = example_loss
                else:
                    loss += example_loss
                    
            loss = loss / minibatch_size
            
        print("Validation completed for epoch {}".format(epoch + 1))
        print("Validation accuracy for epoch {}: {}".format(epoch + 1, correct / total))
        # print("Validation time for this epoch: {}".format(time.time() - start_time))

Training started for epoch 1


  return self._call_impl(*args, **kwargs)
100%|██████████| 500/500 [00:07<00:00, 65.39it/s]


Training completed for epoch 1
Training accuracy for epoch 1: 0.792625
Validation started for epoch 1


100%|██████████| 50/50 [00:00<00:00, 125.52it/s]


Validation completed for epoch 1
Validation accuracy for epoch 1: 0.6125
Training started for epoch 2


100%|██████████| 500/500 [00:07<00:00, 67.10it/s]


Training completed for epoch 2
Training accuracy for epoch 2: 0.79525
Validation started for epoch 2


100%|██████████| 50/50 [00:00<00:00, 126.89it/s]


Validation completed for epoch 2
Validation accuracy for epoch 2: 0.5925
Training started for epoch 3


100%|██████████| 500/500 [00:07<00:00, 66.07it/s]


Training completed for epoch 3
Training accuracy for epoch 3: 0.857125
Validation started for epoch 3


100%|██████████| 50/50 [00:00<00:00, 129.16it/s]


Validation completed for epoch 3
Validation accuracy for epoch 3: 0.585
Training started for epoch 4


100%|██████████| 500/500 [00:07<00:00, 67.69it/s]


Training completed for epoch 4
Training accuracy for epoch 4: 0.869875
Validation started for epoch 4


100%|██████████| 50/50 [00:00<00:00, 129.25it/s]


Validation completed for epoch 4
Validation accuracy for epoch 4: 0.56125
Training started for epoch 5


100%|██████████| 500/500 [00:07<00:00, 67.39it/s]


Training completed for epoch 5
Training accuracy for epoch 5: 0.898875
Validation started for epoch 5


100%|██████████| 50/50 [00:00<00:00, 130.32it/s]


Validation completed for epoch 5
Validation accuracy for epoch 5: 0.59375
Training started for epoch 6


100%|██████████| 500/500 [00:07<00:00, 68.20it/s]


Training completed for epoch 6
Training accuracy for epoch 6: 0.910375
Validation started for epoch 6


100%|██████████| 50/50 [00:00<00:00, 129.52it/s]


Validation completed for epoch 6
Validation accuracy for epoch 6: 0.59875
Training started for epoch 7


100%|██████████| 500/500 [00:07<00:00, 66.54it/s]


Training completed for epoch 7
Training accuracy for epoch 7: 0.89275
Validation started for epoch 7


100%|██████████| 50/50 [00:00<00:00, 129.58it/s]


Validation completed for epoch 7
Validation accuracy for epoch 7: 0.58875
Training started for epoch 8


100%|██████████| 500/500 [00:07<00:00, 66.71it/s]


Training completed for epoch 8
Training accuracy for epoch 8: 0.89225
Validation started for epoch 8


100%|██████████| 50/50 [00:00<00:00, 128.08it/s]


Validation completed for epoch 8
Validation accuracy for epoch 8: 0.6075
Training started for epoch 9


100%|██████████| 500/500 [00:07<00:00, 66.79it/s]


Training completed for epoch 9
Training accuracy for epoch 9: 0.912375
Validation started for epoch 9


100%|██████████| 50/50 [00:00<00:00, 126.20it/s]


Validation completed for epoch 9
Validation accuracy for epoch 9: 0.58625
Training started for epoch 10


100%|██████████| 500/500 [00:07<00:00, 66.46it/s]


Training completed for epoch 10
Training accuracy for epoch 10: 0.93
Validation started for epoch 10


100%|██████████| 50/50 [00:00<00:00, 127.48it/s]

Validation completed for epoch 10
Validation accuracy for epoch 10: 0.57875





In [14]:
# Testing