In [3]:
import nltk
import numpy as np
import pandas as pd
import pickle
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
# Imports - our files
import utils
import models
#import argparse

# Global definitions - data
DATA_FN = '/Users/neelampatodia/Desktop/Yogesh/NLP/Assignments/hw2/data/crowdflower_data.csv'
LABEL_NAMES = ["happiness", "worry", "neutral", "sadness"]

# Global definitions - architecture
EMBEDDING_DIM = 100  # We will use pretrained 100-dimensional GloVe
BATCH_SIZE = 128
NUM_CLASSES = 4
USE_CUDA = torch.cuda.is_available()  # CUDA will be available if you are using the GPU image for this homework

# Global definitions - saving and loading data
FRESH_START = False  # set this to false after running once with True to just load your preprocessed data from file
#                     (good for debugging)
TEMP_FILE = "temporary_data.pkl"  # if you set FRESH_START to false, the program will look here for your data, etc.




In [4]:
with open(TEMP_FILE, "rb") as f:
    print("Loading DataLoaders and embeddings from file....")
    train_generator, dev_generator, test_generator, embeddings, train_data = pickle.load(f)

Loading DataLoaders and embeddings from file....


In [75]:
class RecurrentNetwork(nn.Module):
    def __init__(self,input_dim, hidden_dim, layer_dim, output_dim,embeddings):
        super(RecurrentNetwork, self).__init__()
        
        self.embedding_layer = nn.Embedding(embeddings.shape[0],
                                            embeddings.shape[1])
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.rnn = nn.GRU(input_dim, hidden_dim, num_layers=layer_dim, batch_first=True)#, nonlinearity='relu')
        self.fc = nn.Linear(hidden_dim, output_dim)

    # x is a PaddedSequence for an RNN
    def forward(self, x):
        ########## YOUR CODE HERE ##########
        # TODO: Fill in the forward pass of your neural network.
        # TODO: (The backward pass will be performed by PyTorch magic for you!)
        # TODO: Your architecture should...
        # TODO: 1) Put the words through an Embedding layer (which was initialized with the pretrained embeddings);
        # TODO: 2) Feed the sequence of embeddings through a 2-layer RNN; and
        # TODO: 3) Feed the last output state into a dense layer to become a 4-vector of values, one for each class
        output = self.embedding_layer(x)
        h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
        c0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
        out, h1 = self.rnn(output, h0)
        #out, h2 = self.rnn(out,h1)
        out = self.fc(out[:, -1, :]) 
        return out

In [93]:
model = RecurrentNetwork(100, 110, 2, 4,embeddings)
import torch.optim as optim
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
torch.manual_seed(0)

<torch._C.Generator at 0x7fe9d7e15df0>

In [94]:
import tqdm
train_loss = []
test_loss = []
development_loss = 100.0
continous_negative = 0
for n in tqdm.tqdm(range(30)):
    avg_loss = []
    train_loss = 0.0
    valid_loss = 0.0
    for x, y in train_generator:
        optimizer.zero_grad()
        output = model(x)
        loss = loss_fn(output, y)
        avg_loss.append(loss)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()*x.size(0)
    
    #print('train loss', train_loss)
    gold = []
    predicted = []
    # Keep track of the loss
    loss = torch.zeros(1)  # requires_grad = False by default; float32 by default
    if USE_CUDA:
        loss = loss.cuda()
    model.eval()
    with torch.no_grad():
        for X_b, y_b in dev_generator:
            y_pred = model(X_b)
            # Save gold and predicted labels for F1 score - take the argmax to convert to class labels
            gold.extend(y_b.cpu().detach().numpy())
            predicted.extend(y_pred.argmax(1).cpu().detach().numpy())
            loss += loss_fn(y_pred.double(), y_b.long()).data
    print('loss',n, loss, abs(development_loss-loss))
    if development_loss-loss<0:
        break
    development_loss = loss
    m=model
    

  3%|▎         | 1/30 [01:01<29:31, 61.10s/it]

loss 0 tensor([29.7429]) tensor([70.2571])


  7%|▋         | 2/30 [02:20<33:28, 71.75s/it]

loss 1 tensor([27.6676]) tensor([2.0753])


 10%|█         | 3/30 [03:43<34:37, 76.95s/it]

loss 2 tensor([26.5271]) tensor([1.1406])


 13%|█▎        | 4/30 [05:12<35:20, 81.57s/it]

loss 3 tensor([26.2748]) tensor([0.2523])


 13%|█▎        | 4/30 [06:38<43:11, 99.69s/it]

loss 4 tensor([26.7415]) tensor([0.4667])





In [95]:
def test_model(model, loss_fn, test_generator):
    """
    Evaluate the performance of a model on the development set, providing the loss and macro F1 score.
    :param model: a model that performs 4-way emotion classification
    :param loss_fn: a function that can calculate loss between the predicted and gold labels
    :param test_generator: a DataLoader that provides batches of the testing set
    """
    gold = []
    predicted = []

    # Keep track of the loss
    loss = torch.zeros(1)  # requires_grad = False by default; float32 by default
    if USE_CUDA:
        loss = loss.cuda()

    model.eval()

    # Iterate over batches in the test dataset
    with torch.no_grad():
        for X_b, y_b in test_generator:
            # Predict
            y_pred = model(X_b)

            # Save gold and predicted labels for F1 score - take the argmax to convert to class labels
            gold.extend(y_b.cpu().detach().numpy())
            predicted.extend(y_pred.argmax(1).cpu().detach().numpy())

            loss += loss_fn(y_pred.double(), y_b.long()).data

    # Print total loss and macro F1 score
    print("Test loss: ")
    print(loss)
    print("F-score: ")
    print(f1_score(gold, predicted, average='macro'))

In [96]:
 test_model(m, loss_fn, test_generator)

Test loss: 
tensor([27.3810])
F-score: 
0.4143990964422248


model