In [37]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class BiLSTMSentiment(nn.Module):

    def __init__(self, embed_model, vocab_size, label_size, embedding_dim, hidden_dim, batch_size, seq_length, num_layers, dropout=0.5, freeze_embeddings = True):
        super(BiLSTMSentiment, self).__init__()
        # set class vars
        self.embedding_dim = embedding_dim
        
        # 1. embedding layer
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        # set weights to pre-trained
        self.embeddings.weight = nn.Parameter(torch.from_numpy(embed_model.vectors)) # all vectors
        # (optional) freeze embedding weights
        if freeze_embeddings:
            self.embeddings.requires_grad = False

        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.seq_length = seq_length
        self.dropout = dropout
        self.num_layers = num_layers

        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(batch_first=True, input_size=embedding_dim, num_layers =num_layers, hidden_size=hidden_dim, bidirectional=True)
        self.hidden2label = nn.Linear(hidden_dim*2, label_size)

    def forward(self, sentence):
        # print(sentence.shape)
        x = self.embeddings(sentence)
        batch_size = sentence.shape[0]
        # print(x.shape)
        _, (lstm_hidden, _) = self.lstm(x)
        final_state = lstm_hidden.view(self.num_layers, 2, batch_size, self.hidden_dim)[-1]
        h_1, h_2 = final_state[0], final_state[1]
        # final_hidden_state = h_1 + h_2 
         # Add both states (requires changes to the input size of first linear layer + attention layer)
        final_hidden_state = torch.cat((h_1, h_2), 1)
        logits = self.hidden2label(final_hidden_state)
        # log_probs = F.log_softmax(logits)
        return logits

In [4]:
import numpy as np
def pad_features(tokenized_text, seq_length):
    ''' Return features of tokenized_reviews, where each review is padded with 0's 
        or truncated to the input seq_length.
    '''
    
    # getting the correct rows x cols shape
    features = np.zeros((len(tokenized_text), seq_length), dtype=int)

    # for each review, I grab that review and 
    for i, row in enumerate(tokenized_text):
        features[i, -len(row):] = np.array(row)[:seq_length]
    
    return features

# convert reviews to tokens
def tokenize_all_text(embed_lookup, data):
    # split each review into a list of words
    words = [item['text'].split() for item in data.values()]

    tokenized_text = []
    for text in words:
        ints = []
        for word in text:
            try:
                idx = embed_lookup.key_to_index[word]
            except: 
                idx = 0
            ints.append(idx)
        tokenized_text.append(ints)
    
    return tokenized_text

# import Word2Vec loading capabilities
from gensim.models import KeyedVectors

# Creating the model
embed_lookup = KeyedVectors.load_word2vec_format('word2vec_model/GoogleNews-vectors-negative300-SLIM.bin', 
                                                 binary=True)
# store pretrained vocab
pretrained_words = []
for word in embed_lookup.index_to_key:
    pretrained_words.append(word)


In [3]:
# LREC
import json
with open("../preprocess/lrec_split.json") as f:
    lrec_data = json.load(f)

lrec_tokenized_text = tokenize_all_text(embed_lookup, lrec_data['train'])
lrec_test_tokenized_text = tokenize_all_text(embed_lookup, lrec_data['test'])

# Test your implementation!

seq_length = 15

lrec_train_features = pad_features(lrec_tokenized_text, seq_length=seq_length)

## test statements - do not change - ##
assert len(lrec_train_features)==len(lrec_tokenized_text), "Features should have as many rows as reviews."
assert len(lrec_train_features[0])==seq_length, "Each feature row should contain seq_length values."

# print first 8 values of the first 20 batches 
print(lrec_train_features[:20,:8])

# Test your implementation!

lrec_test_features = pad_features(lrec_test_tokenized_text, seq_length=seq_length)

## test statements - do not change - ##
assert len(lrec_test_features)==len(lrec_test_tokenized_text), "Features should have as many rows as reviews."
assert len(lrec_test_features[0])==seq_length, "Each feature row should contain seq_length values."

# print first 8 values of the first 20 batches 
print(lrec_test_features[:20,:8])


lrec_train_labels = np.array([item['label'] for item in lrec_data['train'].values()])
lrec_test_labels = np.array([item['label'] for item in lrec_data['test'].values()])

print(lrec_test_labels[:20])

split_frac = 0.8

## split data into training, validation, and test data (features and labels, x and y)

split_idx = int(len(lrec_train_features)*split_frac)
lrec_train_x, lrec_valid_x = lrec_train_features[:split_idx], lrec_train_features[split_idx:]
lrec_train_y, lrec_valid_y = lrec_train_labels[:split_idx], lrec_train_labels[split_idx:]

## print out the shapes of your resultant feature data
print("\t\t\tFeature Shapes:")
print("Train set: \t\t{}".format(lrec_train_x.shape), 
      "\nValidation set: \t{}".format(lrec_valid_x.shape),
      "\nTest set: \t\t{}".format(lrec_test_features.shape))

from torch.utils.data import TensorDataset, DataLoader

# create Tensor datasets
lrec_train_data = TensorDataset(torch.from_numpy(lrec_train_x), torch.from_numpy(lrec_train_y))
lrec_valid_data = TensorDataset(torch.from_numpy(lrec_valid_x), torch.from_numpy(lrec_valid_y))
lrec_test_data = TensorDataset(torch.from_numpy(lrec_test_features), torch.from_numpy(lrec_test_labels))

# dataloaders
batch_size = 4

# shuffling and batching data
lrec_train_loader = DataLoader(lrec_train_data, shuffle=True, batch_size=batch_size)
lrec_valid_loader = DataLoader(lrec_valid_data, shuffle=True, batch_size=batch_size)
lrec_test_loader = DataLoader(lrec_test_data, shuffle=True, batch_size=batch_size)

[[     0      0      0      0      0      0  31722     78]
 [     0      0      0      0      0    132      3      9]
 [     0      0      0      0      0      0      0      0]
 [128719    814      0    134      4      9   2032   8748]
 [     0      0      0      0      0      0      0     78]
 [    83     34   6510      9   3042      0      9 137375]
 [   132    330     90     45     34   9593    545   8229]
 [    50   1748      9    708      4   1172  17670  77415]
 [     0      0      0      0      0      0      0   2873]
 [     0      0      0   1715   2962      0    245    116]
 [     0      0      0      0      0      0      0      0]
 [  4779     78     21   1090     56   4365   1183      9]
 [     0      0      0      0     78     83      9   6149]
 [   132    171      9   9519      0      9    527   5129]
 [   494     12    124   3887     12     38     45      2]
 [     0      0      0     78      3      9   1840      0]
 [  4779     78     21   1090      0      9  12761     5

In [40]:
# Instantiate the model w/ hyperparams


# lrec lstm

vocab_size = len(pretrained_words)
output_size = 3 # binary class (1 or 0)
embedding_dim = len(embed_lookup[pretrained_words[0]]) # 300-dim vectors

hidden_dim = 128

batch_size = 4
seq_length = 15

num_layers =2

lrec_lstm = BiLSTMSentiment(embed_lookup, vocab_size, output_size, embedding_dim,
                   hidden_dim, batch_size, seq_length, num_layers)

print(lrec_lstm)

# loss and optimization functions
lr=0.001

criterion = nn.CrossEntropyLoss()
lrec_optimizer = torch.optim.Adam(lrec_lstm.parameters(), lr=lr, weight_decay = 0.01)

BiLSTMSentiment(
  (embeddings): Embedding(299567, 300)
  (lstm): LSTM(300, 128, num_layers=2, batch_first=True, bidirectional=True)
  (hidden2label): Linear(in_features=256, out_features=3, bias=True)
)


In [32]:
# training loop
def train(net, train_loader, valid_loader, epochs, optimizer, patience = 5, print_every=100):

    # # move model to GPU, if available
    # if(train_on_gpu):
    #     net.cuda()

    counter = 0 # for printing
    
    # train for some number of epochs
    net.train()
    best_val_loss = 1e99
    since_last_best = 0
    for e in range(epochs):

        # batch loop
        for inputs, labels in train_loader:
            counter += 1

            # if(train_on_gpu):
            #     inputs, labels = inputs.cuda(), labels.cuda()

            # zero accumulated gradients
            net.zero_grad()

            # get the output from the model
            output = net(inputs)

            # calculate the loss and perform backprop
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()

        # Get validation loss
        val_losses = []
        net.eval()
        for inputs, labels in valid_loader:

            # if(train_on_gpu):
            #     inputs, labels = inputs.cuda(), labels.cuda()

            output = net(inputs)
            val_loss = criterion(output, labels)

            val_losses.append(val_loss.item())
        val_loss_epoch = np.mean(val_losses)
        net.train()
        print("Epoch: {}/{}...".format(e+1, epochs),
                "Step: {}...".format(counter),
                "Loss: {:.6f}...".format(loss.item()),
                "Val Loss: {:.6f}".format(val_loss_epoch))
        if val_loss_epoch < best_val_loss:
            best_val_loss = val_loss_epoch
        else:
            since_last_best += 1
        
        if since_last_best == patience:
            break

In [41]:
# training params

epochs = 5 # this is approx where I noticed the validation loss stop decreasing
print_every = 10

train(lrec_lstm, lrec_train_loader, lrec_valid_loader, epochs, lrec_optimizer, print_every=print_every)

Epoch: 1/5... Step: 48... Loss: 1.037308... Val Loss: 1.018558
Epoch: 2/5... Step: 96... Loss: 0.838582... Val Loss: 0.985703
Epoch: 3/5... Step: 144... Loss: 0.078073... Val Loss: 1.138642
Epoch: 4/5... Step: 192... Loss: 0.107979... Val Loss: 1.263044
Epoch: 5/5... Step: 240... Loss: 0.730394... Val Loss: 1.321250


In [42]:
test_losses = [] # track loss
num_correct = 0
pred_tensor = None
label_tensor = None


lrec_lstm.eval()
# iterate over test data
for inputs, labels in lrec_test_loader:

    
    # get predicted outputs
    output = lrec_lstm(inputs)
    
    # calculate loss
    test_loss = criterion(output, labels)
    test_losses.append(test_loss.item())
    
    # convert output probabilities to predicted class (0 or 1)
    pred = torch.argmax(output, dim=1)  # argmax

    # compare predictions to true label
    correct_tensor = pred.eq(labels.view_as(pred))
    correct = np.squeeze(correct_tensor.numpy())
    num_correct += np.sum(correct)

    if pred_tensor == None:
        pred_tensor = pred
    else:
        pred_tensor = torch.cat((pred_tensor, pred), dim=-1)

    if label_tensor == None:
        label_tensor = labels
    else:
        label_tensor = torch.cat((label_tensor, labels), dim=-1)

from torcheval.metrics.functional import multiclass_f1_score
from torcheval.metrics.functional import multiclass_precision
from torcheval.metrics.functional import multiclass_recall

print("micro F1: {:.3f}".format(multiclass_f1_score(pred_tensor, label_tensor, num_classes=3).item()))
print("macro F1: {:.3f}".format(multiclass_f1_score(pred_tensor, label_tensor, num_classes=3, average='macro').item()))
print("weighted F1: {:.3f}\n".format(multiclass_f1_score(pred_tensor, label_tensor, num_classes=3, average='weighted').item()))

print("micro precision: {:.3f}".format(multiclass_precision(pred_tensor, label_tensor, num_classes=3).item()))
print("macro precision: {:.3f}".format(multiclass_precision(pred_tensor, label_tensor, num_classes=3, average='macro').item()))
print("weighted precision: {:.3f}\n".format(multiclass_precision(pred_tensor, label_tensor, num_classes=3, average='weighted').item()))

print("micro recall: {:.3f}".format(multiclass_recall(pred_tensor, label_tensor, num_classes=3).item()))
print("macro recall: {:.3f}".format(multiclass_recall(pred_tensor, label_tensor, num_classes=3, average='macro').item()))
print("weighted recall: {:.3f}\n".format(multiclass_recall(pred_tensor, label_tensor, num_classes=3, average='weighted').item()))

micro F1: 0.490
macro F1: 0.480
weighted F1: 0.492

micro precision: 0.490
macro precision: 0.511
weighted precision: 0.546

micro recall: 0.490
macro recall: 0.495
weighted recall: 0.490



------

In [38]:
# ARC
import json
with open("../preprocess/arc_split.json") as f:
    arc_data = json.load(f)

arc_tokenized_text = tokenize_all_text(embed_lookup, arc_data['train'])
arc_test_tokenized_text = tokenize_all_text(embed_lookup, arc_data['test'])

# Test your implementation!

seq_length = 15

arc_train_features = pad_features(arc_tokenized_text, seq_length=seq_length)

## test statements - do not change - ##
assert len(arc_train_features)==len(arc_tokenized_text), "Features should have as many rows as reviews."
assert len(arc_train_features[0])==seq_length, "Each feature row should contain seq_length values."

# print first 8 values of the first 20 batches 
print(arc_train_features[:20,:8])

# Test your implementation!

arc_test_features = pad_features(arc_test_tokenized_text, seq_length=seq_length)

## test statements - do not change - ##
assert len(arc_test_features)==len(arc_test_tokenized_text), "Features should have as many rows as reviews."
assert len(arc_test_features[0])==seq_length, "Each feature row should contain seq_length values."

# print first 8 values of the first 20 batches 
print(arc_test_features[:20,:8])


arc_train_labels = np.array([item['label'] for item in arc_data['train'].values()])
arc_test_labels = np.array([item['label'] for item in arc_data['test'].values()])

print(arc_test_labels[:20])

split_frac = 0.8

## split data into training, validation, and test data (features and labels, x and y)

split_idx = int(len(arc_train_features)*split_frac)
arc_train_x, arc_valid_x = arc_train_features[:split_idx], arc_train_features[split_idx:]
arc_train_y, arc_valid_y = arc_train_labels[:split_idx], arc_train_labels[split_idx:]

## print out the shapes of your resultant feature data
print("\t\t\tFeature Shapes:")
print("Train set: \t\t{}".format(arc_train_x.shape), 
      "\nValidation set: \t{}".format(arc_valid_x.shape),
      "\nTest set: \t\t{}".format(arc_test_features.shape))

from torch.utils.data import TensorDataset, DataLoader

# create Tensor datasets
arc_train_data = TensorDataset(torch.from_numpy(arc_train_x), torch.from_numpy(arc_train_y))
arc_valid_data = TensorDataset(torch.from_numpy(arc_valid_x), torch.from_numpy(arc_valid_y))
arc_test_data = TensorDataset(torch.from_numpy(arc_test_features), torch.from_numpy(arc_test_labels))

# dataloaders
batch_size = 4

# shuffling and batching data
arc_train_loader = DataLoader(arc_train_data, shuffle=True, batch_size=batch_size)
arc_valid_loader = DataLoader(arc_valid_data, shuffle=True, batch_size=batch_size)
arc_test_loader = DataLoader(arc_test_data, shuffle=True, batch_size=batch_size)

# lrec lstm

vocab_size = len(pretrained_words)
output_size = 3 # binary class (1 or 0)
embedding_dim = len(embed_lookup[pretrained_words[0]]) # 300-dim vectors

hidden_dim = 128

batch_size = 4
seq_length = 15

num_layers =2

arc_lstm = BiLSTMSentiment(embed_lookup, vocab_size, output_size, embedding_dim,
                   hidden_dim, batch_size, seq_length, num_layers)

print(arc_lstm)

# loss and optimization functions
lr=0.001

criterion = nn.CrossEntropyLoss()
arc_optimizer = torch.optim.Adam(arc_lstm.parameters(), lr=lr, weight_decay = 0.01)

[[     0    223     49  11534      0      0   4338   5408]
 [  8683    809      0   6776   2143      1   3221     30]
 [     0    897    359      0   1075      0     60  24442]
 [     0      0      0     43    400   2939     15      9]
 [     0      0      0      0   5687   2564   4281   1488]
 [   295     17   1931     41    373   2623      0   5779]
 [     0  18625   1337     22    630  20720      0      9]
 [     0    897    219      9 130013   1676      4      0]
 [    78    100    436   2129     56      0   2248    812]
 [  1780      0   3933   2251     45     14    219      0]
 [   295     17   1931     41    373   2623      0   5779]
 [     0    897    359      0   1075      0     60  24442]
 [     0    613    981      0    604    102      0   5043]
 [     0      0      0      0      0      0     43   8001]
 [     0      0      0      0      0      0      0      0]
 [     0  12430   2209   5043   3375    124   5871   2899]
 [     0    541   5317      0   1402      0    527   232

In [39]:
# training params

epochs = 5 # this is approx where I noticed the validation loss stop decreasing
print_every = 10

train(arc_lstm, arc_train_loader, arc_valid_loader, epochs, arc_optimizer, print_every=print_every)

Epoch: 1/5... Step: 40... Loss: 1.269917... Val Loss: 1.046957
Epoch: 2/5... Step: 80... Loss: 0.658942... Val Loss: 1.190822
Epoch: 3/5... Step: 120... Loss: 0.119232... Val Loss: 1.264298
Epoch: 4/5... Step: 160... Loss: 2.618878... Val Loss: 1.095488
Epoch: 5/5... Step: 200... Loss: 0.096877... Val Loss: 1.131243


In [43]:
test_losses = [] # track loss
num_correct = 0
pred_tensor = None
label_tensor = None


arc_lstm.eval()
# iterate over test data
for inputs, labels in arc_test_loader:

    
    # get predicted outputs
    output = arc_lstm(inputs)
    
    # calculate loss
    test_loss = criterion(output, labels)
    test_losses.append(test_loss.item())
    
    # convert output probabilities to predicted class (0 or 1)
    pred = torch.argmax(output, dim=1)  # argmax

    # compare predictions to true label
    correct_tensor = pred.eq(labels.view_as(pred))
    correct = np.squeeze(correct_tensor.numpy())
    num_correct += np.sum(correct)

    if pred_tensor == None:
        pred_tensor = pred
    else:
        pred_tensor = torch.cat((pred_tensor, pred), dim=-1)

    if label_tensor == None:
        label_tensor = labels
    else:
        label_tensor = torch.cat((label_tensor, labels), dim=-1)

from torcheval.metrics.functional import multiclass_f1_score
from torcheval.metrics.functional import multiclass_precision
from torcheval.metrics.functional import multiclass_recall

print("micro F1: {:.3f}".format(multiclass_f1_score(pred_tensor, label_tensor, num_classes=3).item()))
print("macro F1: {:.3f}".format(multiclass_f1_score(pred_tensor, label_tensor, num_classes=3, average='macro').item()))
print("weighted F1: {:.3f}\n".format(multiclass_f1_score(pred_tensor, label_tensor, num_classes=3, average='weighted').item()))

print("micro precision: {:.3f}".format(multiclass_precision(pred_tensor, label_tensor, num_classes=3).item()))
print("macro precision: {:.3f}".format(multiclass_precision(pred_tensor, label_tensor, num_classes=3, average='macro').item()))
print("weighted precision: {:.3f}\n".format(multiclass_precision(pred_tensor, label_tensor, num_classes=3, average='weighted').item()))

print("micro recall: {:.3f}".format(multiclass_recall(pred_tensor, label_tensor, num_classes=3).item()))
print("macro recall: {:.3f}".format(multiclass_recall(pred_tensor, label_tensor, num_classes=3, average='macro').item()))
print("weighted recall: {:.3f}\n".format(multiclass_recall(pred_tensor, label_tensor, num_classes=3, average='weighted').item()))

micro F1: 0.573
macro F1: 0.564
weighted F1: 0.576

micro precision: 0.573
macro precision: 0.554
weighted precision: 0.594

micro recall: 0.573
macro recall: 0.598
weighted recall: 0.573



------

In [44]:
# nu
import json
with open("../preprocess/nu_split.json") as f:
    nu_data = json.load(f)

nu_tokenized_text = tokenize_all_text(embed_lookup, nu_data['train'])
nu_test_tokenized_text = tokenize_all_text(embed_lookup, nu_data['test'])

# Test your implementation!

seq_length = 15

nu_train_features = pad_features(nu_tokenized_text, seq_length=seq_length)

## test statements - do not change - ##
assert len(nu_train_features)==len(nu_tokenized_text), "Features should have as many rows as reviews."
assert len(nu_train_features[0])==seq_length, "Each feature row should contain seq_length values."

# print first 8 values of the first 20 batches 
print(nu_train_features[:20,:8])

# Test your implementation!

nu_test_features = pad_features(nu_test_tokenized_text, seq_length=seq_length)

## test statements - do not change - ##
assert len(nu_test_features)==len(nu_test_tokenized_text), "Features should have as many rows as reviews."
assert len(nu_test_features[0])==seq_length, "Each feature row should contain seq_length values."

# print first 8 values of the first 20 batches 
print(nu_test_features[:20,:8])


nu_train_labels = np.array([item['label'] for item in nu_data['train'].values()])
nu_test_labels = np.array([item['label'] for item in nu_data['test'].values()])

print(nu_test_labels[:20])

split_frac = 0.8

## split data into training, validation, and test data (features and labels, x and y)

split_idx = int(len(nu_train_features)*split_frac)
nu_train_x, nu_valid_x = nu_train_features[:split_idx], nu_train_features[split_idx:]
nu_train_y, nu_valid_y = nu_train_labels[:split_idx], nu_train_labels[split_idx:]

## print out the shapes of your resultant feature data
print("\t\t\tFeature Shapes:")
print("Train set: \t\t{}".format(nu_train_x.shape), 
      "\nValidation set: \t{}".format(nu_valid_x.shape),
      "\nTest set: \t\t{}".format(nu_test_features.shape))

from torch.utils.data import TensorDataset, DataLoader

# create Tensor datasets
nu_train_data = TensorDataset(torch.from_numpy(nu_train_x), torch.from_numpy(nu_train_y))
nu_valid_data = TensorDataset(torch.from_numpy(nu_valid_x), torch.from_numpy(nu_valid_y))
nu_test_data = TensorDataset(torch.from_numpy(nu_test_features), torch.from_numpy(nu_test_labels))

# dataloaders
batch_size = 4

# shuffling and batching data
nu_train_loader = DataLoader(nu_train_data, shuffle=True, batch_size=batch_size)
nu_valid_loader = DataLoader(nu_valid_data, shuffle=True, batch_size=batch_size)
nu_test_loader = DataLoader(nu_test_data, shuffle=True, batch_size=batch_size)

# lrec lstm

vocab_size = len(pretrained_words)
output_size = 3 # binary class (1 or 0)
embedding_dim = len(embed_lookup[pretrained_words[0]]) # 300-dim vectors

hidden_dim = 128

batch_size = 4
seq_length = 15

num_layers =2

nu_lstm = BiLSTMSentiment(embed_lookup, vocab_size, output_size, embedding_dim,
                   hidden_dim, batch_size, seq_length, num_layers)

print(nu_lstm)

# loss and optimization functions
lr=0.001

criterion = nn.CrossEntropyLoss()
nu_optimizer = torch.optim.Adam(nu_lstm.parameters(), lr=lr, weight_decay = 0.01)

[[    0     0   780     0 73009     1     0  9560]
 [    0     0     0  4699  4480     9  4516  1207]
 [    0     0     0     0     0     0     0  9660]
 [    0     0     0     0     0     0     0     0]
 [    0     0     0  9660     9 19809     0     0]
 [    0     0     0     0     0     0     0     0]
 [    0     0     0     0     0    78     3     9]
 [    0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0    56]
 [    0     0     0     0  4328 86996  2629     0]
 [    0     0     0     0     0     0  4196     9]
 [    0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0  7460]
 [    0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0]
 [    0     0     0     0     0 23690     9   459]
 [  132    53    38  5838     9   389     0  1031]
 [    0     0     0     0     0

In [45]:
# training params

epochs = 5 # this is approx where I noticed the validation loss stop decreasing
print_every = 10

train(nu_lstm, nu_train_loader, nu_valid_loader, epochs, nu_optimizer, print_every=print_every)

Epoch: 1/5... Step: 84... Loss: 1.418625... Val Loss: 1.091758
Epoch: 2/5... Step: 168... Loss: 0.544706... Val Loss: 1.056411
Epoch: 3/5... Step: 252... Loss: 0.140157... Val Loss: 1.025819
Epoch: 4/5... Step: 336... Loss: 1.255961... Val Loss: 1.084976
Epoch: 5/5... Step: 420... Loss: 1.570338... Val Loss: 1.040039


In [46]:
test_losses = [] # track loss
num_correct = 0
pred_tensor = None
label_tensor = None


nu_lstm.eval()
# iterate over test data
for inputs, labels in nu_test_loader:

    
    # get predicted outputs
    output = nu_lstm(inputs)
    
    # calculate loss
    test_loss = criterion(output, labels)
    test_losses.append(test_loss.item())
    
    # convert output probabilities to predicted class (0 or 1)
    pred = torch.argmax(output, dim=1)  # argmax

    # compare predictions to true label
    correct_tensor = pred.eq(labels.view_as(pred))
    correct = np.squeeze(correct_tensor.numpy())
    num_correct += np.sum(correct)

    if pred_tensor == None:
        pred_tensor = pred
    else:
        pred_tensor = torch.cat((pred_tensor, pred), dim=-1)

    if label_tensor == None:
        label_tensor = labels
    else:
        label_tensor = torch.cat((label_tensor, labels), dim=-1)

from torcheval.metrics.functional import multiclass_f1_score
from torcheval.metrics.functional import multiclass_precision
from torcheval.metrics.functional import multiclass_recall

print("micro F1: {:.3f}".format(multiclass_f1_score(pred_tensor, label_tensor, num_classes=3).item()))
print("macro F1: {:.3f}".format(multiclass_f1_score(pred_tensor, label_tensor, num_classes=3, average='macro').item()))
print("weighted F1: {:.3f}\n".format(multiclass_f1_score(pred_tensor, label_tensor, num_classes=3, average='weighted').item()))

print("micro precision: {:.3f}".format(multiclass_precision(pred_tensor, label_tensor, num_classes=3).item()))
print("macro precision: {:.3f}".format(multiclass_precision(pred_tensor, label_tensor, num_classes=3, average='macro').item()))
print("weighted precision: {:.3f}\n".format(multiclass_precision(pred_tensor, label_tensor, num_classes=3, average='weighted').item()))

print("micro recall: {:.3f}".format(multiclass_recall(pred_tensor, label_tensor, num_classes=3).item()))
print("macro recall: {:.3f}".format(multiclass_recall(pred_tensor, label_tensor, num_classes=3, average='macro').item()))
print("weighted recall: {:.3f}\n".format(multiclass_recall(pred_tensor, label_tensor, num_classes=3, average='weighted').item()))

micro F1: 0.654
macro F1: 0.607
weighted F1: 0.664

micro precision: 0.654
macro precision: 0.606
weighted precision: 0.680

micro recall: 0.654
macro recall: 0.620
weighted recall: 0.654



------

In [47]:
import json
with open("../preprocess/arg_split.json") as f:
    arg_data = json.load(f)

arg_tokenized_text = tokenize_all_text(embed_lookup, arg_data['train'])
arg_test_tokenized_text = tokenize_all_text(embed_lookup, arg_data['test'])

# Test your implementation!

seq_length = 15

arg_train_features = pad_features(arg_tokenized_text, seq_length=seq_length)

## test statements - do not change - ##
assert len(arg_train_features)==len(arg_tokenized_text), "Features should have as many rows as reviews."
assert len(arg_train_features[0])==seq_length, "Each feature row should contain seq_length values."

# print first 8 values of the first 20 batches 
print(arg_train_features[:20,:8])

# Test your implementation!

arg_test_features = pad_features(arg_test_tokenized_text, seq_length=seq_length)

## test statements - do not change - ##
assert len(arg_test_features)==len(arg_test_tokenized_text), "Features should have as many rows as reviews."
assert len(arg_test_features[0])==seq_length, "Each feature row should contain seq_length values."

# print first 8 values of the first 20 batches 
print(arg_test_features[:20,:8])


arg_train_labels = np.array([item['label'] for item in arg_data['train'].values()])
arg_test_labels = np.array([item['label'] for item in arg_data['test'].values()])

print(arg_test_labels[:20])

split_frac = 0.8

## split data into training, validation, and test data (features and labels, x and y)

split_idx = int(len(arg_train_features)*split_frac)
arg_train_x, arg_valid_x = arg_train_features[:split_idx], arg_train_features[split_idx:]
arg_train_y, arg_valid_y = arg_train_labels[:split_idx], arg_train_labels[split_idx:]

## print out the shapes of your resultant feature data
print("\t\t\tFeature Shapes:")
print("Train set: \t\t{}".format(arg_train_x.shape), 
      "\nValidation set: \t{}".format(arg_valid_x.shape),
      "\nTest set: \t\t{}".format(arg_test_features.shape))

from torch.utils.data import TensorDataset, DataLoader

# create Tensor datasets
arg_train_data = TensorDataset(torch.from_numpy(arg_train_x), torch.from_numpy(arg_train_y))
arg_valid_data = TensorDataset(torch.from_numpy(arg_valid_x), torch.from_numpy(arg_valid_y))
arg_test_data = TensorDataset(torch.from_numpy(arg_test_features), torch.from_numpy(arg_test_labels))

# dataloaders
batch_size = 4

# shuffling and batching data
arg_train_loader = DataLoader(arg_train_data, shuffle=True, batch_size=batch_size)
arg_valid_loader = DataLoader(arg_valid_data, shuffle=True, batch_size=batch_size)
arg_test_loader = DataLoader(arg_test_data, shuffle=True, batch_size=batch_size)

# nu lstm

vocab_size = len(pretrained_words)
output_size = 4 # binary class (1 or 0)
embedding_dim = len(embed_lookup[pretrained_words[0]]) # 300-dim vectors

hidden_dim = 128

batch_size = 4
seq_length = 15

num_layers =2

arg_lstm = BiLSTMSentiment(embed_lookup, vocab_size, output_size, embedding_dim,
                   hidden_dim, batch_size, seq_length, num_layers)

print(arg_lstm)

# loss and optimization functions
lr=0.001

criterion = nn.CrossEntropyLoss()
arg_optimizer = torch.optim.Adam(arg_lstm.parameters(), lr=lr, weight_decay = 0.01)

[[   986      9   2887     12    390  58549     22     37]
 [     0     25  22694      9    770      0     78  72206]
 [     0  15370  10297  15760   1504      0   1444      0]
 [     9  24325   1947      8   2041      0      9    271]
 [     9   6398      2   6837     78   1599      0    192]
 [  5097      0   4408      0   1015      0  11542   3677]
 [    40   4932      0   1218   2000     22     37      2]
 [    34    857      0      9   4588      0      0  13080]
 [  4818     35   1471   5695      0   9577  21774      9]
 [    66   3411      3      0   8561      0 231574    179]
 [    34   2093   3980      0   7712      0  60560  10492]
 [    25   1427   2000  22915 104856 231574      1  36464]
 [  1682     24   7558   7110      0    129      0    125]
 [     0      0      0     34   4094      0     60   5426]
 [     0      0      9   8190    483     16      9   1931]
 [     0    957      9   8521   3936      0  80045  37286]
 [     0   5526   2878   3106    160   3079      0   210

In [48]:
# training params

epochs = 5 # this is approx where I noticed the validation loss stop decreasing
print_every = 10

train(arg_lstm, arg_train_loader, arg_valid_loader, epochs, arg_optimizer, print_every=print_every)

Epoch: 1/5... Step: 176... Loss: 1.488478... Val Loss: 1.226024
Epoch: 2/5... Step: 352... Loss: 0.631088... Val Loss: 1.319970
Epoch: 3/5... Step: 528... Loss: 0.395240... Val Loss: 1.074261
Epoch: 4/5... Step: 704... Loss: 0.209982... Val Loss: 1.140975
Epoch: 5/5... Step: 880... Loss: 0.847276... Val Loss: 0.834745


In [50]:
test_losses = [] # track loss
num_correct = 0
pred_tensor = None
label_tensor = None


arg_lstm.eval()
# iterate over test data
for inputs, labels in arg_test_loader:

    
    # get predicted outputs
    output = arg_lstm(inputs)
    
    # calculate loss
    test_loss = criterion(output, labels)
    test_losses.append(test_loss.item())
    
    # convert output probabilities to predicted class (0 or 1)
    pred = torch.argmax(output, dim=1)  # argmax

    # compare predictions to true label
    correct_tensor = pred.eq(labels.view_as(pred))
    correct = np.squeeze(correct_tensor.numpy())
    num_correct += np.sum(correct)

    if pred_tensor == None:
        pred_tensor = pred
    else:
        pred_tensor = torch.cat((pred_tensor, pred), dim=-1)

    if label_tensor == None:
        label_tensor = labels
    else:
        label_tensor = torch.cat((label_tensor, labels), dim=-1)

from torcheval.metrics.functional import multiclass_f1_score
from torcheval.metrics.functional import multiclass_precision
from torcheval.metrics.functional import multiclass_recall

print("micro F1: {:.3f}".format(multiclass_f1_score(pred_tensor, label_tensor, num_classes=4).item()))
print("macro F1: {:.3f}".format(multiclass_f1_score(pred_tensor, label_tensor, num_classes=4, average='macro').item()))
print("weighted F1: {:.3f}\n".format(multiclass_f1_score(pred_tensor, label_tensor, num_classes=4, average='weighted').item()))

print("micro precision: {:.3f}".format(multiclass_precision(pred_tensor, label_tensor, num_classes=4).item()))
print("macro precision: {:.3f}".format(multiclass_precision(pred_tensor, label_tensor, num_classes=4, average='macro').item()))
print("weighted precision: {:.3f}\n".format(multiclass_precision(pred_tensor, label_tensor, num_classes=4, average='weighted').item()))

print("micro recall: {:.3f}".format(multiclass_recall(pred_tensor, label_tensor, num_classes=4).item()))
print("macro recall: {:.3f}".format(multiclass_recall(pred_tensor, label_tensor, num_classes=4, average='macro').item()))
print("weighted recall: {:.3f}\n".format(multiclass_recall(pred_tensor, label_tensor, num_classes=4, average='weighted').item()))

micro F1: 0.659
macro F1: 0.556
weighted F1: 0.662

micro precision: 0.659
macro precision: 0.544
weighted precision: 0.666

micro recall: 0.659
macro recall: 0.572
weighted recall: 0.659

