In [1]:
from collections import defaultdict
import time
import random
import numpy as np

import torch
from torch import nn
from torch.autograd import Variable
# from model import DeepCBoW

In [2]:
# Functions to read in the corpus
w2i = defaultdict(lambda: len(w2i))
t2i = defaultdict(lambda: len(t2i))
UNK = w2i["<unk>"]
def read_dataset(filename):
    with open(filename, "r") as f:
        for line in f:
            tag, words = line.lower().strip().split(" ||| ")
            yield ([w2i[x] for x in words.split(" ")], t2i[tag])

In [3]:
# Read in the data
train = list(read_dataset("../data/classes/train.txt"))
w2i = defaultdict(lambda: UNK, w2i)
dev = list(read_dataset("../data/classes/test.txt"))
nwords = len(w2i)
ntags = len(t2i)

In [5]:
class DeepCBoW(torch.nn.Module):
    def __init__(self, nwords, ntags, nlayers, emb_size, hid_size):
        super(DeepCBoW, self).__init__()

        """ variables """
        self.nlayers = nlayers

        """ layers """
        self.embedding = nn.Embedding(nwords, emb_size)
        # initialize the weights with xavier uniform (Glorot, X. & Bengio, Y. (2010))
        nn.init.xavier_uniform_(self.embedding.weight)

        # add nlayers number of layers
        self.linears = nn.ModuleList([
                nn.Linear(emb_size if i == 0 else hid_size, hid_size) \
                for i in range(nlayers)])
        # initialize the weights with xavier uniform (Glorot, X. & Bengio, Y. (2010))
        for i in range(nlayers):
            nn.init.xavier_uniform_(self.linears[i].weight)

        self.output_layer = nn.Linear(hid_size, ntags)
        # initialize the weights with xavier uniform (Glorot, X. & Bengio, Y. (2010))
        nn.init.xavier_uniform_(self.output_layer.weight)

    def forward(self, words):
        emb = self.embedding(words)
        emb_sum = torch.sum(emb, dim=0) # size(emb_sum) = emb_size
        h = emb_sum.view(1, -1) # size(h) = 1 x emb_size
        for i in range(self.nlayers):
            h = torch.tanh(self.linears[i](h)) # new hidden layer
        out = self.output_layer(h)
        return out

In [6]:
# initialize the model
EMB_SIZE = 64
HID_SIZE = 64
NLAYERS = 2
model = DeepCBoW(nwords, ntags, NLAYERS, EMB_SIZE, HID_SIZE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

type = torch.LongTensor
use_cuda = torch.cuda.is_available()

if use_cuda:
    type = torch.cuda.LongTensor
    model.cuda()

**What is happening in the forward algorithm?**

In [27]:
layer = nn.Linear(EMB_SIZE, HID_SIZE) # size(layer) = emb_size

# Get the sentence embedding by summing over words
word_indices = [0, 1, 2]
words = torch.tensor(word_indices).type(torch.LongTensor)
emb = model.embedding(words)
emb_sum = torch.sum(emb, dim=0)

# Set the word embedding as the first hidden layer
h = emb_sum.view(1, -1) # size(h) = 1 x emb_size
layer(h)

tensor([[ 0.0501,  0.0883,  0.0948,  0.0733,  0.0072,  0.0183,  0.0454, -0.0191,
          0.0785,  0.0137, -0.0029, -0.0553,  0.0901, -0.0942, -0.0329,  0.0110,
         -0.0332,  0.1417, -0.0160,  0.1058,  0.1229,  0.0100,  0.0711, -0.0067,
         -0.1250,  0.0965, -0.1606,  0.1043, -0.0297,  0.0020, -0.0315, -0.1196,
         -0.0309, -0.0550, -0.0587, -0.0042, -0.2053,  0.1045,  0.0077,  0.1215,
          0.0308, -0.1009,  0.0618, -0.0301,  0.1208,  0.0740, -0.0517, -0.0693,
          0.1031,  0.0887, -0.1267,  0.0197,  0.1525,  0.0624, -0.0769,  0.0525,
          0.0277,  0.0215, -0.1385, -0.0786, -0.0467, -0.0296,  0.0796, -0.0471]],
       grad_fn=<AddmmBackward>)

In [7]:
for ITER in range(1):
    # Perform training
    random.shuffle(train)
    train_loss = 0.0
    start = time.time()
    model.train()
    for words, tag in train:
        words = torch.tensor(words).type(type)
        tag = torch.tensor([tag]).type(type)
        scores = model(words)
        loss = criterion(scores, tag)
        train_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print("iter %r: train loss/sent=%.4f, time=%.2fs" % (
                ITER, train_loss/len(train), time.time()-start))
    # Perform testing
    model.eval()
    test_correct = 0.0
    for words, tag in dev:
        words = torch.tensor(words).type(type)
        scores = model(words)[0].detach().cpu().numpy()
        predict = np.argmax(scores)
        if predict == tag:
            test_correct += 1
    print("iter %r: test acc=%.4f" % (ITER, test_correct/len(dev)))

iter 0: train loss/sent=1.4329, time=170.45s
iter 0: test acc=0.4163
