In [29]:
import nltk
import numpy as np
import pandas as pd
import pickle
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

# Imports - our files
import utils
import models
#import argparse

# Global definitions - data
DATA_FN = '/Users/neelampatodia/Desktop/Yogesh/NLP/Assignments/hw2/data/crowdflower_data.csv'
LABEL_NAMES = ["happiness", "worry", "neutral", "sadness"]

# Global definitions - architecture
EMBEDDING_DIM = 100  # We will use pretrained 100-dimensional GloVe
BATCH_SIZE = 128
NUM_CLASSES = 4
USE_CUDA = torch.cuda.is_available()  # CUDA will be available if you are using the GPU image for this homework

# Global definitions - saving and loading data
FRESH_START = False  # set this to false after running once with True to just load your preprocessed data from file
#                     (good for debugging)
TEMP_FILE = "temporary_data.pkl"  # if you set FRESH_START to false, the program will look here for your data, etc.



In [30]:
with open(TEMP_FILE, "rb") as f:
    print("Loading DataLoaders and embeddings from file....")
    train_generator, dev_generator, test_generator, embeddings, train_data = pickle.load(f)

Loading DataLoaders and embeddings from file....


In [12]:
train_generator, dev_generator, test_generator, embeddings, train_data
for 

TypeError: 'DataLoader' object is not an iterator

In [5]:
class DenseNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, pre_trained_embeddings):
        super(DenseNetwork).__init__()
        self.embedding_layer = nn.Embedding(pre_trained_embeddings.shape[0],
                                            pre_trained_embeddings.shape[1])
        self.embedding_layer.weight.data.copy_(pre_trained_embeddings)
        #self.cnn_layer = nn.Conv2d(x.size(1),int(x.size(1)/2),(x.size(0),2), stride=1)
        
        self.pooling_layer = nn.LPPool2d(norm_type=1,kernel_size=(91,1), stride = 1)
        self.layer1 = nn.Linear(input_size,hidden_size)
        self.activation_func = nn.ReLU()
        self.layer2 = nn.Linear(hidden_size, num_classes)


    def forward(self, x):
        output = self.embedding_layer(x)
        pooling_layer = nn.AvgPool2d(kernel_size=(x.size(1),1), stride = 1)
        output = pooling_layer(output)
        output = output.squeeze()
        output = self.layer1(output)
        output = self.activation_func(output)
        output = self.layer2(output)
        return output.squeeze()

In [53]:
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    
    def __init__(self, pre_trained_embeddings, num_filters, filter_sizes, output_dim,dropout):
        super().__init__()
        self.embedding_layer = nn.Embedding.pre_trained_embeddings.shape[1])
        self.embedding_layer.weight.data.copy_(pre_trained_embeddings)
        self.conv_layer = nn.ModuleList([nn.Conv2d(
                                                in_channels = 1,
                                                out_channels = num_filters,
                                                kernel_size = (fs, 100)) for fs in filter_sizes])
        self.layer1 = nn.Linear(len(filter_sizes)*num_filters, output_dim)
        self.dropout_layer = nn.Dropout(dropout)
        

    def forward(self, x):
        output = self.embedding_layer(x)
        output = output.unsqueeze(1)
        output = [F.relu(conv(output)).squeeze(3) for conv in self.conv_layer]
        output = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in output]
        output = self.dropout_layer(torch.cat(output, dim = 1))
        output = self.layer1(output)
        return output

In [54]:
a,b = None, None
for x,y in dev_generator:
    a=x
    b=y
    print(a.size(1), b.shape)
    break
z = torch.sum(a,)
z.shape
embeddings.shape

82 torch.Size([128])


torch.Size([17752, 100])

In [55]:
torch.manual_seed(0)
import torch.optim as optim
#model = DenseNetwork(100,10,4,embeddings)
model = CNN(embeddings, 100, [2,3,4], 4,0.5)
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
lambda1 = lambda epoch: 0.65 ** 100
#scheduler = torch.optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda = lambda epoch: 0.95 )

In [56]:
import tqdm
train_loss = []
test_loss = []
development_loss = 100.0
for n in tqdm.tqdm(range(100)):
    avg_loss = []
    train_loss = 0.0
    valid_loss = 0.0
    for x, y in train_generator:
        optimizer.zero_grad()
        output = model(x)
        loss = loss_fn(output, y)
        avg_loss.append(loss)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()*x.size(0)
    
    gold = []
    predicted = []
    # Keep track of the loss
    loss = torch.zeros(1)  # requires_grad = False by default; float32 by default
    if USE_CUDA:
        loss = loss.cuda()
    model.eval()
    with torch.no_grad():
        for X_b, y_b in dev_generator:
            y_pred = model(X_b)
            # Save gold and predicted labels for F1 score - take the argmax to convert to class labels
            gold.extend(y_b.cpu().detach().numpy())
            predicted.extend(y_pred.argmax(1).cpu().detach().numpy())
            loss += loss_fn(y_pred.double(), y_b.long()).data
    print('loss',n, loss, abs(development_loss-loss))
    #scheduler.step()
    if development_loss<loss:
        break
    development_loss = loss
    m = model

  1%|          | 1/100 [00:47<1:18:57, 47.85s/it]

loss 0 tensor([24.6769]) tensor([75.3231])


  2%|▏         | 2/100 [01:35<1:18:02, 47.78s/it]

loss 1 tensor([23.6988]) tensor([0.9781])


  2%|▏         | 2/100 [02:23<1:57:19, 71.83s/it]

loss 2 tensor([23.7946]) tensor([0.0958])





In [163]:
m

DenseNetwork(
  (embedding_layer): Embedding(17752, 100)
  (pooling_layer): LPPool2d(norm_type=1, kernel_size=(91, 1), stride=1, ceil_mode=False)
  (layer1): Linear(in_features=100, out_features=10, bias=True)
  (activation_func): ReLU()
  (layer2): Linear(in_features=10, out_features=4, bias=True)
)

In [71]:
pool = nn.LPPool2d(1,kernel_size=(91,1),stride=1)
pooled_data = pool(e_data)
pooled_data.shape
p_squeezed_data=pooled_data.squeeze()
p_squeezed_data.shape

torch.Size([128, 100])

In [57]:
def test_model(model, loss_fn, test_generator):
    """
    Evaluate the performance of a model on the development set, providing the loss and macro F1 score.
    :param model: a model that performs 4-way emotion classification
    :param loss_fn: a function that can calculate loss between the predicted and gold labels
    :param test_generator: a DataLoader that provides batches of the testing set
    """
    gold = []
    predicted = []

    # Keep track of the loss
    loss = torch.zeros(1)  # requires_grad = False by default; float32 by default
    if USE_CUDA:
        loss = loss.cuda()

    model.eval()

    # Iterate over batches in the test dataset
    with torch.no_grad():
        for X_b, y_b in test_generator:
            # Predict
            y_pred = model(X_b)

            # Save gold and predicted labels for F1 score - take the argmax to convert to class labels
            gold.extend(y_b.cpu().detach().numpy())
            predicted.extend(y_pred.argmax(1).cpu().detach().numpy())

            loss += loss_fn(y_pred.double(), y_b.long()).data

    # Print total loss and macro F1 score
    print("Test loss: ")
    print(loss)
    print("F-score: ")
    print(f1_score(gold, predicted, average='macro'))

In [165]:
dense_network_model

DenseNetwork(
  (embedding_layer): Embedding(17752, 100)
  (pooling_layer): LPPool2d(norm_type=1, kernel_size=(91, 1), stride=1, ceil_mode=False)
  (layer1): Linear(in_features=100, out_features=20, bias=True)
  (activation_func): ReLU()
  (layer2): Linear(in_features=20, out_features=4, bias=True)
)

In [58]:
test_model(m, loss_fn, test_generator)

Test loss: 
tensor([24.4684])
F-score: 
0.4887734851408822
