# Question 2
(a) Specify all the 5 classes you used after converting from the original label set to the new setting.

(b) Describe what aggregation methods you have tried and which is finally adopted (and why). Explain the detailed function of the aggregation method you used. If you have tested different aggregation methods, list their accuracy results to support your claim.

(c) Describe what neural network you used to produce the final vector representation of each word and what are the mathematical functions used for the forward computation (i.e., from the pretrained word vectors to the final label of each word). Give the detailed setting of the network including which parameters are being updated, what are their sizes, and what is the length of the final vector representation of each word to be fed to the softmax classifier.

(d) Report how many epochs you used for training, as well as the running time.

(e) Report the accuracy on the test set, as well as the accuracy on the development set for each
epoch during training.

# Import libraries

In [1]:
import random
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

from tqdm import trange
from torch.autograd import Variable
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from nltk.tokenize import word_tokenize
import nltk
nltk.download('punkt')
import gensim.downloader

import warnings
warnings.filterwarnings('ignore')

[nltk_data] Downloading package punkt to C:\Users\Jeremy U
[nltk_data]     Keat\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
word2vec_goog1e_news: gensim.models.keyedvectors.KeyedVectors = gensim.downloader.load('word2vec-google-news-300')
word2vec_goog1e_news.add_vector("<pad>", np.zeros(300))
pad_index = word2vec_goog1e_news.key_to_index["<pad>"]
embedding_weights = torch.FloatTensor(word2vec_goog1e_news.vectors)
vocab = word2vec_goog1e_news.key_to_index

In [3]:
device = torch.device(torch.cuda.current_device() if torch.cuda.is_available() else "cpu")
print(f"Using: {device}")

Using: cuda:0


The code below tokenizes the modified csv file containing TREC dataset and proceeds to convert the tokens (words) into word2vec indexes. In addition, we format the labels to the correct input dimensions for the model. 

# Import Dataset

In [4]:
def tokenize_sentences(list_of_text):
    tokenized = []
    for sentence in list_of_text:
        tokenized.append(word_tokenize(sentence.lower()))
    return tokenized

def format_label(label):
    return torch.unsqueeze(torch.tensor(label.to_list()), axis=1).tolist()

def indexify(data):
    sentences = []
    for sentence in data:
        s = [vocab[token] if token in vocab
            else vocab['UNK']
            for token in sentence]
        sentences.append(s)
    return sentences

In [5]:
# modified csv files are derived from running Q2_preprocessing.ipynb
training_data = pd.read_csv(filepath_or_buffer="TREC_dataset/modified_training_data.csv", sep=",") 
test_data = pd.read_csv(filepath_or_buffer="TREC_dataset/modified_test_data.csv", sep=",")

X = training_data["text"]
y = training_data["label-coarse"]
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=500) # get 500 samples for validation set

X_test = test_data["text"]
y_test = test_data["label-coarse"]

X_train_lst = X_train.to_list()
X_val_lst = X_val.to_list()
X_test_lst = X_test.to_list()

X_train_tokenized = tokenize_sentences(X_train_lst)
X_val_tokenized = tokenize_sentences(X_val_lst)
X_test_tokenized = tokenize_sentences(X_test_lst)

no_of_labels = 5

In [6]:
X_train_tokenized_indexified = indexify(X_train_tokenized)
X_val_tokenized_indexified = indexify(X_val_tokenized)
X_test_tokenized_indexified = indexify(X_test_tokenized)

y_train_formatted = format_label(y_train)
y_val_formatted = format_label(y_val)
y_test_formatted = format_label(y_test)

In [7]:
def data_iterator(sentences, labels, total_size: int, batch_size: int, shuffle: bool=False):
    # make a list that decides the order in which we go over the data- this avoids explicit shuffling of data
    order = list(range(total_size))
    if shuffle:
        random.seed(230)
        random.shuffle(order)

    # one pass over data
    for i in range((total_size+1)//batch_size):
        # fetch sentences and tags
        batch_sentences = [sentences[idx] for idx in order[i*batch_size:(i+1)*batch_size]]
        batch_tags = [labels[idx] for idx in order[i*batch_size:(i+1)*batch_size]]

        # compute length of longest sentence in batch
        batch_max_len = max([len(s) for s in batch_sentences])

        # prepare a numpy array with the data, initialising the data with pad_ind and all labels with -1
        # initialising labels to -1 differentiates tokens with tags from PADding tokens
        batch_data = vocab['<pad>']*np.ones((len(batch_sentences), batch_max_len))
        batch_labels = np.array(batch_tags).squeeze()

        # copy the data to the numpy array
        for j in range(len(batch_sentences)):
            cur_len = len(batch_sentences[j])
            batch_data[j][:cur_len] = batch_sentences[j]

        # since all data are indices, we convert them to torch LongTensors
        batch_data, batch_labels = torch.LongTensor(batch_data), torch.LongTensor(batch_labels)
        # convert them to Variables to record operations in the computational graph
        batch_data, batch_labels = Variable(batch_data), Variable(batch_labels)

        yield batch_data, batch_labels, batch_sentences

The neural network used below is an LSTM-based network that predicts the sentence classification tags for each sentence. It consists of six main components:

1. An embedding layer that maps each token to its embedding vector.
2. An LSTM layer that processes the embedded tokens and produces LSTM outputs for each token.
3. An aggregation function that summarizes each token output vector into one vector.
4. Dropout layer for regularization purposes.
5. Batch normalization layers for regularization purposes.
6. Two fully connected layer (fc). The forward computation involves the following steps:

Embedding                 : Mapping tokens to their embedding vectors using the embedding layer.  
LSTM                      : Applying the LSTM on the embedded tokens, resulting in LSTM outputs for each token.  
Reshaping                 : Making the output contiguous in memory and reshaping it for further processing.  
Average pooling           : Computes average of each token output into one single word embedding.  
Fully Connected Layers    : Applying the fully connected layers to obtain word embeddings before the softmax.  
Log Softmax               : Applying log softmax to the output for numerical stability.

In [8]:
class Net(nn.Module):
    def __init__(self, embedding_weights, embedding_dim, lstm_hidden_dim, number_of_tags):
        super(Net, self).__init__()
        # the embedding takes as input the vocab_size and the embedding_dim and pad_index
        self.embedding = nn.Embedding.from_pretrained(embedding_weights, freeze=True, padding_idx=pad_index)

        # the LSTM takes as input the size of its input (embedding_dim), its hidden size
        self.lstm = nn.LSTM(embedding_dim, lstm_hidden_dim, batch_first=True)
        for name, param in self.lstm.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param.data)
            elif 'bias' in name:
                nn.init.zeros_(param.data)
        
        self.batch_norm1 = nn.BatchNorm1d(lstm_hidden_dim)
        self.dropout = nn.Dropout(0.005) 

        # the fully connected layer transforms the output to give the final output layer
        self.fc1 = nn.Linear(lstm_hidden_dim, 150)
        self.batch_norm2 = nn.BatchNorm1d(150)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(150, number_of_tags)
        self.batch_norm3 = nn.BatchNorm1d(number_of_tags)

    def forward(self, s, lengths):

        # apply the embedding layer that maps each token to its embedding
        s = self.embedding(s)

        # pack the sequences before feeding them to the LSTM
        packed_input = pack_padded_sequence(s, lengths, batch_first=True, enforce_sorted=False)
        packed_output, _ = self.lstm(packed_input)

        # unpack the sequences after passing through the LSTM
        padded_output, _ = pad_packed_sequence(packed_output, batch_first=True)
        batch_size, seq_len, embedding_dim = padded_output.size()
        s = self.batch_norm1(padded_output.view(-1, embedding_dim))
        s = self.dropout(s)
        
        # Reshape back to the original shape
        s = s.view(batch_size, seq_len, embedding_dim)
        
        s = torch.mean(s, dim=1)  # mean pooling
        s = self.fc1(s)
        s = self.batch_norm2(s)
        s = self.relu(s)
        # apply the fully connected layer and obtain the output (before softmax) for each token
        s = self.fc2(s)
        out = self.batch_norm3(s)
        # apply log softmax on each token's output
        return F.log_softmax(out, dim=1)

In [9]:
def accuracy(outputs, labels):
    outputs = np.argmax(outputs.cpu().detach().numpy(), axis=1)
    labels = labels.squeeze()
    # compare outputs with labels
    return np.sum([1 if first == second else 0 for first, second in zip(labels, outputs)]) / float(len(labels))

def loss_fn(outputs, labels):
    loss = F.cross_entropy(outputs, labels.squeeze())
    return loss

class EarlyStopper:
    def __init__(self, patience=3, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = np.inf

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

In [10]:
class RunningAverage:
    """A simple class that maintains the running average of a quantity

    Example:
    ```
    loss_avg = RunningAverage()
    loss_avg.update(2)
    loss_avg.update(4)
    loss_avg() = 3
    ```
    """

    def __init__(self):
        self.steps = 0
        self.total = 0

    def update(self, val):
        self.total += val
        self.steps += 1

    def __call__(self):
        return self.total / float(self.steps)

Training and Eval code:

In [11]:
def train(model, optimizer, loss_fn, data_iterator, num_steps):
    """Train the model on `num_steps` batches
    """

    # set model to training mode
    model.train()

    # summary for current training loop and a running average object for loss
    train_loss_avg = RunningAverage()

    # Use tqdm for progress bar
    t = trange(num_steps)
    for i in t:
        # fetch the next training batch
        train_batch, labels_batch, _ = next(data_iterator)
        train_batch = train_batch.to(device)
        labels_batch = labels_batch.to(device)
        
        # compute model output and loss
        seq_lengths = torch.LongTensor(list(map(len, train_batch)))
        output_batch = model(train_batch, seq_lengths)
        loss = loss_fn(output_batch, labels_batch)

        # clear previous gradients, compute gradients of all variables wrt loss
        optimizer.zero_grad()
        loss.backward()
        # performs updates using calculated gradients
        optimizer.step()

        # update the average loss
        train_loss_avg.update(loss.item())
        t.set_postfix(loss='{:05.3f}'.format(train_loss_avg()))
    return train_loss_avg()

def evaluate(model, loss_fn, data_iterator, num_steps):
    """Evaluate the model on `num_steps` batches.
    """

    # set model to evaluation mode
    model.eval()

    validation_loss_avg = RunningAverage()
    validation_accuracy_avg = RunningAverage()

    # compute metrics over the dataset
    for _ in range(num_steps):
        # fetch the next evaluation batch
        data_batch, labels_batch, _ = next(data_iterator)
        data_batch = data_batch.to(device)
        labels_batch = labels_batch.to(device)

        # compute model output
        seq_lengths = torch.LongTensor(list(map(len, data_batch)))
        output_batch = model(data_batch, seq_lengths)
        loss = loss_fn(output_batch, labels_batch)
        validation_loss_avg.update(loss.item())
        accuracy_val = accuracy(output_batch, labels_batch)
        validation_accuracy_avg.update(accuracy_val)

    print(f"{validation_loss_avg()=}")
    print(f"{validation_accuracy_avg()=}")
    
    return validation_loss_avg(), validation_accuracy_avg()

def train_and_evaluate(
        model,
        train_sentences,
        train_labels,
        val_sentences,
        val_labels,
        num_epochs: int,
        batch_size: int,
        optimizer,
        loss_fn
):
    early_stopper = EarlyStopper(patience=5, min_delta=0.1)

    accuracies_across_epochs = []
    
    for epoch in range(num_epochs):
        # Run one epoch
        print("Epoch {}/{}".format(epoch + 1, num_epochs))

        # compute number of batches in one epoch (one full pass over the training set)
        num_steps = (len(train_sentences) + 1) // batch_size
        train_data_iterator = data_iterator(train_sentences, train_labels, len(train_sentences), batch_size, shuffle=True)
        train(model, optimizer, loss_fn, train_data_iterator, num_steps)

        # Evaluate for one epoch on validation set
        num_steps = (len(val_sentences) + 1) // batch_size
        val_data_iterator = data_iterator(val_sentences, val_labels, len(val_sentences), batch_size, shuffle=False)
        loss, accuracy = evaluate(model, loss_fn, val_data_iterator, num_steps)
        accuracies_across_epochs.append(accuracy)

        if early_stopper.early_stop(loss):             
            break
    
    return accuracies_across_epochs


Start the training process

In [12]:
from time import time

model = Net(embedding_weights, 300, 300, no_of_labels).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.005)

if (os.path.isfile("model_weights_average_pooling.pth")):
    model.load_state_dict(torch.load('model_weights_average_pooling.pth'))
else:
    start_time = time()
    accuracies_across_epochs = train_and_evaluate(model, X_train_tokenized_indexified, y_train_formatted, X_val_tokenized_indexified, y_val_formatted, 100, 32, optimizer, loss_fn)
    execution_time = time() - start_time
    torch.save(model.state_dict(), 'model_weights_average_pooling.pth')
    
print(f"{execution_time=}")

Epoch 1/100


100%|██████████| 154/154 [00:02<00:00, 71.72it/s, loss=0.783] 


validation_loss_avg()=0.6101372718811036
validation_accuracy_avg()=0.7979166666666667
Epoch 2/100


100%|██████████| 154/154 [00:01<00:00, 122.27it/s, loss=0.417]


validation_loss_avg()=0.42489729126294457
validation_accuracy_avg()=0.8604166666666667
Epoch 3/100


100%|██████████| 154/154 [00:01<00:00, 116.89it/s, loss=0.277]


validation_loss_avg()=0.3751857548952103
validation_accuracy_avg()=0.8729166666666667
Epoch 4/100


100%|██████████| 154/154 [00:01<00:00, 117.30it/s, loss=0.188]


validation_loss_avg()=0.44155601461728416
validation_accuracy_avg()=0.8729166666666667
Epoch 5/100


100%|██████████| 154/154 [00:01<00:00, 124.21it/s, loss=0.126]


validation_loss_avg()=0.42595240473747253
validation_accuracy_avg()=0.8645833333333334
Epoch 6/100


100%|██████████| 154/154 [00:01<00:00, 117.27it/s, loss=0.076]


validation_loss_avg()=0.39121079444885254
validation_accuracy_avg()=0.86875
Epoch 7/100


100%|██████████| 154/154 [00:01<00:00, 122.06it/s, loss=0.061]


validation_loss_avg()=0.40027142961819967
validation_accuracy_avg()=0.8791666666666667
Epoch 8/100


100%|██████████| 154/154 [00:01<00:00, 112.58it/s, loss=0.041]


validation_loss_avg()=0.35934528708457947
validation_accuracy_avg()=0.8916666666666667
Epoch 9/100


100%|██████████| 154/154 [00:01<00:00, 114.42it/s, loss=0.029]


validation_loss_avg()=0.35651420752207436
validation_accuracy_avg()=0.8895833333333333
Epoch 10/100


100%|██████████| 154/154 [00:01<00:00, 113.51it/s, loss=0.016]


validation_loss_avg()=0.352587553858757
validation_accuracy_avg()=0.8958333333333334
Epoch 11/100


100%|██████████| 154/154 [00:01<00:00, 116.21it/s, loss=0.011]


validation_loss_avg()=0.3626897434393565
validation_accuracy_avg()=0.9020833333333333
Epoch 12/100


100%|██████████| 154/154 [00:01<00:00, 108.98it/s, loss=0.006]


validation_loss_avg()=0.3521660258372625
validation_accuracy_avg()=0.9020833333333333
Epoch 13/100


100%|██████████| 154/154 [00:01<00:00, 112.73it/s, loss=0.005]


validation_loss_avg()=0.35539696415265404
validation_accuracy_avg()=0.9020833333333333
Epoch 14/100


100%|██████████| 154/154 [00:01<00:00, 111.81it/s, loss=0.004]


validation_loss_avg()=0.35636792580286664
validation_accuracy_avg()=0.9020833333333333
Epoch 15/100


100%|██████████| 154/154 [00:01<00:00, 111.26it/s, loss=0.003]


validation_loss_avg()=0.3601593762636185
validation_accuracy_avg()=0.9041666666666667
Epoch 16/100


100%|██████████| 154/154 [00:01<00:00, 114.68it/s, loss=0.003]


validation_loss_avg()=0.3633328328529994
validation_accuracy_avg()=0.8979166666666667
Epoch 17/100


100%|██████████| 154/154 [00:01<00:00, 112.92it/s, loss=0.002]


validation_loss_avg()=0.36756266752878824
validation_accuracy_avg()=0.9041666666666667
Epoch 18/100


100%|██████████| 154/154 [00:01<00:00, 115.30it/s, loss=0.002]


validation_loss_avg()=0.371139340599378
validation_accuracy_avg()=0.9
Epoch 19/100


100%|██████████| 154/154 [00:01<00:00, 114.24it/s, loss=0.002]


validation_loss_avg()=0.37557741403579714
validation_accuracy_avg()=0.9020833333333333
Epoch 20/100


100%|██████████| 154/154 [00:01<00:00, 113.88it/s, loss=0.002]


validation_loss_avg()=0.38028222918510435
validation_accuracy_avg()=0.9020833333333333
Epoch 21/100


100%|██████████| 154/154 [00:01<00:00, 117.63it/s, loss=0.001]


validation_loss_avg()=0.3838713437318802
validation_accuracy_avg()=0.9041666666666667
Epoch 22/100


100%|██████████| 154/154 [00:01<00:00, 117.51it/s, loss=0.001]


validation_loss_avg()=0.3887585868438085
validation_accuracy_avg()=0.9020833333333333
Epoch 23/100


100%|██████████| 154/154 [00:01<00:00, 111.44it/s, loss=0.001]


validation_loss_avg()=0.39255629281202953
validation_accuracy_avg()=0.9020833333333333
Epoch 24/100


100%|██████████| 154/154 [00:01<00:00, 118.69it/s, loss=0.001]


validation_loss_avg()=0.3970261186361313
validation_accuracy_avg()=0.9020833333333333
Epoch 25/100


100%|██████████| 154/154 [00:01<00:00, 101.28it/s, loss=0.001]


validation_loss_avg()=0.40310676395893097
validation_accuracy_avg()=0.9020833333333333
Epoch 26/100


100%|██████████| 154/154 [00:01<00:00, 87.47it/s, loss=0.001]


validation_loss_avg()=0.4063160638014475
validation_accuracy_avg()=0.9020833333333333
Epoch 27/100


100%|██████████| 154/154 [00:01<00:00, 101.60it/s, loss=0.001]


validation_loss_avg()=0.4107486108938853
validation_accuracy_avg()=0.9020833333333333
Epoch 28/100


100%|██████████| 154/154 [00:01<00:00, 110.77it/s, loss=0.001]


validation_loss_avg()=0.41550938189029696
validation_accuracy_avg()=0.9020833333333333
Epoch 29/100


100%|██████████| 154/154 [00:01<00:00, 115.97it/s, loss=0.001]


validation_loss_avg()=0.42004578610261284
validation_accuracy_avg()=0.9020833333333333
Epoch 30/100


100%|██████████| 154/154 [00:01<00:00, 110.63it/s, loss=0.001]


validation_loss_avg()=0.4242940535147985
validation_accuracy_avg()=0.9020833333333333
Epoch 31/100


100%|██████████| 154/154 [00:01<00:00, 114.59it/s, loss=0.001]


validation_loss_avg()=0.4292596489191055
validation_accuracy_avg()=0.9
Epoch 32/100


100%|██████████| 154/154 [00:01<00:00, 108.74it/s, loss=0.001]


validation_loss_avg()=0.4331181784470876
validation_accuracy_avg()=0.9020833333333333
Epoch 33/100


100%|██████████| 154/154 [00:01<00:00, 111.19it/s, loss=0.000]


validation_loss_avg()=0.4365712672472
validation_accuracy_avg()=0.9
Epoch 34/100


100%|██████████| 154/154 [00:01<00:00, 107.82it/s, loss=0.000]


validation_loss_avg()=0.4416680465141932
validation_accuracy_avg()=0.9
Epoch 35/100


100%|██████████| 154/154 [00:01<00:00, 117.71it/s, loss=0.000]


validation_loss_avg()=0.44481855432192485
validation_accuracy_avg()=0.8979166666666667
Epoch 36/100


100%|██████████| 154/154 [00:01<00:00, 114.22it/s, loss=0.000]


validation_loss_avg()=0.4502387374639511
validation_accuracy_avg()=0.8958333333333334
Epoch 37/100


100%|██████████| 154/154 [00:01<00:00, 119.11it/s, loss=0.000]


validation_loss_avg()=0.4559207300345103
validation_accuracy_avg()=0.8958333333333334
Epoch 38/100


100%|██████████| 154/154 [00:01<00:00, 116.07it/s, loss=0.000]


validation_loss_avg()=0.4598547379175822
validation_accuracy_avg()=0.8958333333333334
Epoch 39/100


100%|██████████| 154/154 [00:01<00:00, 116.61it/s, loss=0.000]


validation_loss_avg()=0.463978902498881
validation_accuracy_avg()=0.8958333333333334
Epoch 40/100


100%|██████████| 154/154 [00:01<00:00, 109.80it/s, loss=0.000]


validation_loss_avg()=0.4690431276957194
validation_accuracy_avg()=0.8958333333333334
Epoch 41/100


100%|██████████| 154/154 [00:01<00:00, 106.50it/s, loss=0.000]


validation_loss_avg()=0.47394442061583203
validation_accuracy_avg()=0.8958333333333334
execution_time=60.83588123321533


# Print out model parameters

In [16]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.data.shape)

lstm.weight_ih_l0 torch.Size([1200, 300])
lstm.weight_hh_l0 torch.Size([1200, 300])
lstm.bias_ih_l0 torch.Size([1200])
lstm.bias_hh_l0 torch.Size([1200])
batch_norm1.weight torch.Size([300])
batch_norm1.bias torch.Size([300])
fc1.weight torch.Size([150, 300])
fc1.bias torch.Size([150])
batch_norm2.weight torch.Size([150])
batch_norm2.bias torch.Size([150])
fc2.weight torch.Size([5, 150])
fc2.bias torch.Size([5])
batch_norm3.weight torch.Size([5])
batch_norm3.bias torch.Size([5])


## Final Test Accuracy

In [13]:
# Simple check with test dataset
model.eval()
test_data_iterator = data_iterator(X_test_tokenized_indexified, y_test_formatted, len(X_test_tokenized_indexified), len(X_test_tokenized_indexified), shuffle=False)
test_batch, labels_batch, test_sentences = next(test_data_iterator)

seq_lengths = torch.LongTensor(list(map(len, test_batch)))
output_batch = model(test_batch.to(device),seq_lengths)
final_test_accuracy = accuracy(output_batch, labels_batch.to(device))
print(f"{final_test_accuracy=}")

final_test_accuracy=0.922


In [14]:
# display accuracies on development set per epoch
for epoch, accuracy in enumerate(accuracies_across_epochs):
    print(f"Accuracy on Development Set for Epoch {epoch + 1}: {accuracy:.4f}")

Accuracy on Development Set for Epoch 1: 0.7979
Accuracy on Development Set for Epoch 2: 0.8604
Accuracy on Development Set for Epoch 3: 0.8729
Accuracy on Development Set for Epoch 4: 0.8729
Accuracy on Development Set for Epoch 5: 0.8646
Accuracy on Development Set for Epoch 6: 0.8688
Accuracy on Development Set for Epoch 7: 0.8792
Accuracy on Development Set for Epoch 8: 0.8917
Accuracy on Development Set for Epoch 9: 0.8896
Accuracy on Development Set for Epoch 10: 0.8958
Accuracy on Development Set for Epoch 11: 0.9021
Accuracy on Development Set for Epoch 12: 0.9021
Accuracy on Development Set for Epoch 13: 0.9021
Accuracy on Development Set for Epoch 14: 0.9021
Accuracy on Development Set for Epoch 15: 0.9042
Accuracy on Development Set for Epoch 16: 0.8979
Accuracy on Development Set for Epoch 17: 0.9042
Accuracy on Development Set for Epoch 18: 0.9000
Accuracy on Development Set for Epoch 19: 0.9021
Accuracy on Development Set for Epoch 20: 0.9021
Accuracy on Development Set f

In [15]:
def print_sentence_label(sentence: str) -> int:
    model.eval()
    sentence_tokenized = word_tokenize(sentence.lower())
    sentence_as_id = [
        vocab[token] if token in vocab
        else vocab['UNK']
        for token in sentence_tokenized
    ]
    seq_lengths = torch.LongTensor([len(sentence_as_id)])
    input = torch.tensor(sentence_as_id).unsqueeze(0).to(device)
    output = model(input, seq_lengths).to(device)
    label = np.argmax(output.detach().cpu().numpy())
    print(f"sentence = {sentence}, label = {label}")

# Checking results
print_sentence_label("What is a squirrel?")
print_sentence_label("Is Singapore located in Southeast Asia?")
print_sentence_label("Is Singapore in China?")
print_sentence_label("Name 11 famous martyrs .")
print_sentence_label("What ISPs exist in the Caribbean ?")
print_sentence_label("How many cars are manufactured every day?")

sentence = What is a squirrel?, label = 0
sentence = Is Singapore located in Southeast Asia?, label = 3
sentence = Is Singapore in China?, label = 1
sentence = Name 11 famous martyrs ., label = 4
sentence = What ISPs exist in the Caribbean ?, label = 4
sentence = How many cars are manufactured every day?, label = 4
