In [1]:
# -*- coding: utf-8 -*-
"""
   Introduction to Deep Learning (LDA-T3114)
   Skeleton Code for Assignment 1: Sentiment Classification on a Feed-Forward Neural Network

   Hande Celikkanat & Miikka Silfverberg
"""

'\n   Introduction to Deep Learning (LDA-T3114)\n   Skeleton Code for Assignment 1: Sentiment Classification on a Feed-Forward Neural Network\n\n   Hande Celikkanat & Miikka Silfverberg\n'

In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random

#ATTENTION: If necessary, add the paths to your data_semeval.py and paths.py here:
#import sys
#sys.path.append('</path/to/below/modules>')
from data_semeval import *
from paths import data_dir

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Is cuda available', torch.cuda.is_available())

Is cuda available True


In [3]:
#--- hyperparameters ---

N_CLASSES = len(LABEL_INDICES)
N_EPOCHS = 100
LEARNING_RATE = 0.25
BATCH_SIZE = 100
REPORT_EVERY = 1
IS_VERBOSE = True

In [4]:
def make_bow(tweet, indices):
    feature_ids = list(indices[tok] for tok in tweet['BODY'] if tok in indices)
    bow_vec = torch.zeros(len(indices))
    bow_vec[feature_ids] = 1
    return bow_vec.view(1, -1)

def generate_bow_representations(data):
    vocab = set(token for tweet in data['training'] for token in tweet['BODY'])
    vocab_size = len(vocab) 
    indices = {w:i for i, w in enumerate(vocab)}
  
    for split in ["training","development.input","development.gold",
                  "test.input","test.gold"]:
        for tweet in data[split]:
            tweet['BOW'] = make_bow(tweet,indices)

    return indices, vocab_size

# Convert string label to pytorch format.
def label_to_idx(label):
    return torch.LongTensor([LABEL_INDICES[label]])

In [5]:
#--- model ---

class FFNN(nn.Module):
    # Feel free to add whichever arguments you like here.
    def __init__(self, vocab_size, n_classes, extra_arg_1=None, extra_arg_2=None):
        super(FFNN, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(vocab_size, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, n_classes)
        )
        pass

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

In [6]:
#--- data loading ---
data = read_semeval_datasets(data_dir)
indices, vocab_size = generate_bow_representations(data)
print(vocab_size)

22031


In [7]:
#--- set up ---

# WRITE CODE HERE
model = FFNN(vocab_size, N_CLASSES).to(device) #add extra arguments here if you use
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

In [8]:
import gc

gc.collect()
torch.cuda.empty_cache()

model.train()

def label_to_tensor(label):
    return torch.as_tensor([float(int(label_to_idx(label) == i)) for i in range(N_CLASSES)])

#--- training ---
for epoch in range(N_EPOCHS):
    total_loss = 0
    # Generally speaking, it's a good idea to shuffle your
    # datasets once every epoch.
    random.shuffle(data['training'])    

    for i in range(int(len(data['training'])/BATCH_SIZE)):
        minibatch = data['training'][i*BATCH_SIZE:(i+1)*BATCH_SIZE]
        X = torch.stack([x['BOW'][0] for x in minibatch]).to(device)
        y = torch.stack([label_to_tensor(y['SENTIMENT']) for y in minibatch]).to(device)
        
        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        total_loss += loss.item()
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    
    if ((epoch+1) % REPORT_EVERY) == 0:
        print('epoch: %d, loss: %.4f' % (epoch+1, total_loss*BATCH_SIZE/len(data['training'])))


epoch: 1, loss: 0.9937
epoch: 2, loss: 0.9168
epoch: 3, loss: 0.8224
epoch: 4, loss: 0.7534
epoch: 5, loss: 0.6922
epoch: 6, loss: 0.6481
epoch: 7, loss: 0.5787
epoch: 8, loss: 0.5448
epoch: 9, loss: 0.4951
epoch: 10, loss: 0.4528
epoch: 11, loss: 0.3289
epoch: 12, loss: 0.4159
epoch: 13, loss: 0.3026
epoch: 14, loss: 0.2232
epoch: 15, loss: 0.0901
epoch: 16, loss: 0.0390
epoch: 17, loss: 0.0278
epoch: 18, loss: 0.0229
epoch: 19, loss: 0.0186
epoch: 20, loss: 0.0140
epoch: 21, loss: 0.0132
epoch: 22, loss: 0.0117
epoch: 23, loss: 0.0127
epoch: 24, loss: 0.0101
epoch: 25, loss: 0.0076
epoch: 26, loss: 0.0104
epoch: 27, loss: 0.0080
epoch: 28, loss: 0.0095
epoch: 29, loss: 0.0082
epoch: 30, loss: 0.0074
epoch: 31, loss: 0.0088
epoch: 32, loss: 0.0080
epoch: 33, loss: 0.0070
epoch: 34, loss: 0.0067
epoch: 35, loss: 0.0074
epoch: 36, loss: 0.0059
epoch: 37, loss: 0.0074
epoch: 38, loss: 0.0070
epoch: 39, loss: 0.0063
epoch: 40, loss: 0.0068
epoch: 41, loss: 0.0059
epoch: 42, loss: 0.0069
e

In [9]:
#--- test ---
model.eval()
correct = 0
with torch.no_grad():
    for tweet in data['test.gold']:
        gold_class = label_to_idx(tweet['SENTIMENT'])

        gold_class = label_to_idx(tweet['SENTIMENT'])

        bow = tweet['BOW'].to(device)
        predicted = model(bow)
        
        if int(predicted.argmax()) == int(gold_class):
            correct += 1
        
        #if IS_VERBOSE:
        #    print('TEST DATA: %s, GOLD LABEL: %s, GOLD CLASS %d, OUTPUT: %d' % 
        #         (' '.join(tweet['BODY'][:-1]), tweet['SENTIMENT'], gold_class, predicted))

    print('test accuracy: %.2f' % (100.0 * correct / len(data['test.gold'])))

test accuracy: 66.65
