In [35]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.optim as optim
from __future__ import print_function
from __future__ import division

torch.manual_seed(1)

<torch._C.Generator at 0x7f88640105d0>

In [59]:
class LSTMSequenceLabler(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super(LSTMSequenceLabler, self).__init__()
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim, num_layers=2, dropout=0.2, bidirectional=True)
        self.linear = nn.Linear(hidden_dim * 2, 1)
        self.sigmoid = nn.Sigmoid()
#         self.loss = nn.BCELoss()
        
#         self.hidden = self.init_hidden()

#     def init_hidden(self):
#         return (torch.randn(2, 1, self.hidden_dim),
#                 torch.randn(2, 1, self.hidden_dim))
    
    def get_loss(self, batch_x, batch_y):
        word_embeddings = self.embedding(batch_x) #outputs 50, 15, <embedding_size>
        lstm_out, _ = self.lstm(word_embeddings) #output 50, 15, <2 * hidden_size>
        linear_out = self.linear(lstm_out) #output 50, 15, 1
        sigmoid_out = self.sigmoid(linear_out) #output 50, 15, 1
        prediction = sigmoid_out.view(max_len, -1)
        
        # custom loss function
        # weighted binary cross entropy loss with 1 classifications are given higher priority
        w = 3.0
        loss = - (w * batch_y * torch.log(prediction) + (1-batch_y) * torch.log(1-prediction))
        loss = torch.sum(loss)
#         return self.loss(prediction, batch_y)
        return loss
    
    def forward(self, batch_x):
        word_embeddings = self.embedding(batch_x) #outputs 50, 15, <embedding_size>
        lstm_out, _ = self.lstm(word_embeddings) #output 50, 15, <2 * hidden_size>
        linear_out = self.linear(lstm_out) #output 50, 15, 1
        sigmoid_out = self.sigmoid(linear_out) #output 50, 15, 1
        prediction = sigmoid_out.view(max_len, -1)
        
        prediction[prediction >= 0.5] = 1
        prediction[prediction < 0.5] = 0
        
        return prediction

In [62]:
import math

PADDING = "<PAD>"
# max sentence size
max_len = 50

EMBEDDING_DIM = 5
HIDDEN_DIM = 4

batch_size = 15

train_file_x = 'data/restaurants_trial_x.txt'
train_file_y = 'data/restaurants_trial_y.txt'

def make_list(path):
    f = open(path)
    return [x.split() for x in f]

train_x = make_list(train_file_x)
# print('max len', max(map(len, train_x)))
train_y = make_list(train_file_y)
# training_data = list(zip(train_x, train_y))

# add padding
for i in xrange(len(train_x)):
    for _ in xrange(max_len-len(train_x[i])):
        train_x[i].append('<PAD>')
        train_y[i].append('0')

word_to_ix = {}
for sentence in train_x:
    for word in sentence:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
            
# make train data tensors
for i in xrange(len(train_x)):
    train_x[i] = map(lambda x: word_to_ix[x], train_x[i])
    train_y[i] = map(int, train_y[i])

train_x = torch.LongTensor(train_x)
train_y = torch.Tensor(train_y)

training_data = zip(train_x, train_y)


# embedding = nn.Embedding(len(word_to_ix), EMBEDDING_DIM)
# lstm = nn.LSTM(input_size=EMBEDDING_DIM, hidden_size=HIDDEN_DIM, num_layers=2, dropout=0.2, bidirectional=True)
# linear = nn.Linear(HIDDEN_DIM * 2, 1)
# sigmoid = nn.Sigmoid()

model = LSTMSequenceLabler(len(word_to_ix), EMBEDDING_DIM, HIDDEN_DIM)
optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay=1e-4)

for epoch in range(200):
    batch_id = 0
    start = 0
    end = 0
    
    loss_val = 0
    
    while(batch_id < int(math.ceil(len(training_data)/batch_size))):
        start = batch_id * batch_size
        end = min(start + batch_size, len(training_data))
        
        batch_x = train_x[start:end].permute(dims=(1,0)) #shape = 50 (max_len), 15 (batch_size)
        batch_y = train_y[start:end].permute(dims=(1,0))
        
        model.zero_grad()
        loss = model.get_loss(batch_x, batch_y)
        loss.backward()
        optimizer.step()
        loss_val += loss.tolist() * (end - start)
        batch_id += 1
    
    
    if epoch%20==0:
        print("Loss at epoch %d" % (epoch + 1), loss_val)
        # check training accuracy in each 20 epochs
        
        prediction = model(train_x.permute(dims=(1,0)))
        target = train_y.permute(dims=(1,0))
        
        true_pos = torch.sum(target * prediction).tolist()
        print("True pos", true_pos)
        tot_pos = torch.sum(target).tolist()
        tot_predicted = torch.sum(prediction).tolist()
        print("Tot pos predicted", tot_predicted)
        false_pos = torch.sum(prediction).tolist() - true_pos
        
#         if tot_predicted!=0 and tot_pos!=0:
#             print("Precision", true_pos/tot_predicted)
#             print("Recall", true_pos/tot_pos)

Loss at epoch 1 11689.6832848
True pos 0.0
Tot pos predicted 0.0
Loss at epoch 21 4127.71922493
True pos 0.0
Tot pos predicted 0.0
Loss at epoch 41 2659.08716011
True pos 16.0
Tot pos predicted 26.0
Loss at epoch 61 1195.99258018
True pos 39.0
Tot pos predicted 54.0
Loss at epoch 81 507.784154415
True pos 41.0
Tot pos predicted 49.0
Loss at epoch 101 270.818754077
True pos 41.0
Tot pos predicted 49.0
Loss at epoch 121 147.713740945
True pos 41.0
Tot pos predicted 44.0
Loss at epoch 141 93.9536977112
True pos 41.0
Tot pos predicted 45.0
Loss at epoch 161 169.856117032
True pos 41.0
Tot pos predicted 42.0
Loss at epoch 181 41.1793210134
True pos 41.0
Tot pos predicted 43.0
