In [15]:
from collections import defaultdict
import time 
import random 
import numpy as np 

import torch 
from torch import nn 
from torch.autograd import Variable
# from model import BoW

In [16]:
# Functions to read in the corpus
w2i = defaultdict(lambda: len(w2i))
t2i = defaultdict(lambda: len(t2i))
UNK = w2i["<unk>"]

def read_dataset(filename):
    with open(filename) as f:
        for line in f:
            tag, words = line.lower().strip().split(" ||| ")
            yield [w2i[x] for x in words.split(" ")], t2i[tag]

train = list(read_dataset(train_data))
train[0]

([1,
  2,
  3,
  4,
  5,
  6,
  1,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  9,
  17,
  5,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33],
 0)

In [6]:
w2i = defaultdict(lambda: UNK, w2i)

In [25]:
class BoW(torch.nn.Module):
    def __init__(self, nwords, ntags):
        super(BoW, self).__init__()
        """ variables """
        data_type = torch.FloatTensor
        use_cuda = torch.cuda.is_available()
        
        if use_cuda:
            data_type = torch.cuda.FloatTensor
        
        # bias initialized as a zero vector with same dimensions as the embeddings
        self.bias = Variable(torch.zeros(ntags),
                             requires_grad=True).type(data_type) 

        """ layers """
        # nn.Embedding takes the params (vocab size, embedding size)
        # The embeddings are randomly initialized; values are populated during backprop
        # Then, nn.Embedding can be used as a lookup table given the word indices
        self.embedding = nn.Embedding(nwords, ntags)
        # initialize the weights with xavier uniform (Glorot, X. & Bengio, Y. (2010))
        nn.init.xavier_uniform_(self.embedding.weight)


    def forward(self, words):
        emb = self.embedding(words)
        # torch.sum(emb, dim=0) sums a list of embeddings row-wise, so that
        # we end up with a 1 x ntags embedding for the sentence
        out = torch.sum(emb, dim=0) + self.bias # size(out) = N
        out = out.view(1, -1) # size(out) = 1 x N
        return out

In [26]:
dev = list(read_dataset("../data/classes/test.txt"))

nwords = len(w2i)
ntags = len(t2i)

# Initialize the model
model = BoW(nwords, ntags)  # Word ta
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

In [27]:
data_type = torch.LongTensor
use_cuda = torch.cuda.is_available()

if use_cuda:
    data_type = torch.cuda.LongTensor
    model.cuda()

In [28]:
for ITER in range(1):
    # Perform training
    random.shuffle(train)
    train_loss = 0.0
    start = time.time()
    
    model.train()
    for words, tag in train:
        words = torch.tensor(words).type(data_type)
        tag = torch.tensor([tag]).type(data_type)
        scores = model(words)
        loss = criterion(scores, tag)
        train_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print("iter %r: train loss/sent=%.4f, time=%.2fs" % (
                ITER, train_loss/len(train), time.time()-start))
    # Perform testing
    model.eval()
    test_correct = 0.0
    for words, tag in dev:
        words = torch.tensor(words).type(data_type)
        scores = model(words)[0].detach().cpu().numpy()
        predict = np.argmax(scores)
        if predict == tag:
            test_correct += 1
    print("iter %r: test acc=%.4f" % (ITER, test_correct/len(dev)))

iter 0: train loss/sent=1.4724, time=14.58s
iter 0: test acc=0.4054
