In [35]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.optim as optim
from __future__ import print_function
from __future__ import division

torch.manual_seed(1)

<torch._C.Generator at 0x7f88640105d0>

In [70]:
class LSTMSequenceLabler(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super(LSTMSequenceLabler, self).__init__()
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim, num_layers=2, dropout=0.2, bidirectional=True)
        self.linear = nn.Linear(hidden_dim * 2, 1)
        self.sigmoid = nn.Sigmoid()
#         self.loss = nn.BCELoss()
        
#         self.hidden = self.init_hidden()

#     def init_hidden(self):
#         return (torch.randn(2, 1, self.hidden_dim),
#                 torch.randn(2, 1, self.hidden_dim))
    
    def get_loss(self, batch_x, batch_y):
        word_embeddings = self.embedding(batch_x) #outputs 50, 15, <embedding_size>
        lstm_out, _ = self.lstm(word_embeddings) #output 50, 15, <2 * hidden_size>
        linear_out = self.linear(lstm_out) #output 50, 15, 1
        sigmoid_out = self.sigmoid(linear_out) #output 50, 15, 1
        prediction = sigmoid_out.view(max_len, -1)
        
        # custom loss function
        # weighted binary cross entropy loss with 1 classifications are given higher priority
        w = 3.0
        loss = - (w * batch_y * torch.log(prediction) + (1-batch_y) * torch.log(1-prediction))
        loss = torch.sum(loss)
#         return self.loss(prediction, batch_y)
        return loss
    
    def forward(self, batch_x):
        word_embeddings = self.embedding(batch_x) #outputs 50, 15, <embedding_size>
        lstm_out, _ = self.lstm(word_embeddings) #output 50, 15, <2 * hidden_size>
        linear_out = self.linear(lstm_out) #output 50, 15, 1
        sigmoid_out = self.sigmoid(linear_out) #output 50, 15, 1
        prediction = sigmoid_out.view(max_len, -1)
        
        prediction[prediction >= 0.5] = 1
        prediction[prediction < 0.5] = 0
        
        return prediction.type(torch.LongTensor)

In [None]:
import math

PADDING = "<PAD>"
# max sentence size
max_len = 68

EMBEDDING_DIM = 5
HIDDEN_DIM = 4

batch_size = 15

train_file_x = 'data/restaurants_train_x.txt'
train_file_y = 'data/restaurants_train_y.txt'

test_file_x = 'data/restaurants_test_x.txt'
test_file_y = 'data/restaurants_test_y.txt'

def make_list(path):
    f = open(path)
    return [x.split() for x in f]

train_x = make_list(train_file_x)
# print('max len', max(map(len, train_x)))
train_y = make_list(train_file_y)
# training_data = list(zip(train_x, train_y))
test_x = make_list(test_file_x)
test_y = make_list(test_file_y)

# print('max len', max(map(len, test_x)))

# add padding
for i in xrange(len(train_x)):
    for _ in xrange(max_len-len(train_x[i])):
        train_x[i].append('<PAD>')
        train_y[i].append('0')

for i in xrange(len(test_x)):
    for _ in xrange(max_len-len(test_x[i])):
        test_x[i].append('<PAD>')
        test_y[i].append('0')

word_to_ix = {}
for sentence in train_x:
    for word in sentence:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)

# for unknown words, only in testing, randomly initialized
word_to_ix['<unk>'] = len(word_to_ix)

# make train data tensors
for i in xrange(len(train_x)):
    train_x[i] = map(lambda x: word_to_ix[x], train_x[i])
    train_y[i] = map(int, train_y[i])
    
for i in xrange(len(test_x)):
    test_x[i] = map(lambda x: word_to_ix[x] if x in word_to_ix else word_to_ix['<unk>'], test_x[i])
    test_y[i] = map(int, test_y[i])

train_x = torch.LongTensor(train_x)
train_y = torch.Tensor(train_y)
test_x = torch.LongTensor(test_x)
test_y = torch.LongTensor(test_y)

training_data = zip(train_x, train_y)

model = LSTMSequenceLabler(len(word_to_ix), EMBEDDING_DIM, HIDDEN_DIM)
optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay=1e-4)

for epoch in range(200):
    batch_id = 0
    start = 0
    end = 0
    
    loss_val = 0
    
    while(batch_id < int(math.ceil(len(training_data)/batch_size))):
        start = batch_id * batch_size
        end = min(start + batch_size, len(training_data))
        
        batch_x = train_x[start:end].permute(dims=(1,0)) #shape = 50 (max_len), 15 (batch_size)
        batch_y = train_y[start:end].permute(dims=(1,0))
        
        model.zero_grad()
        loss = model.get_loss(batch_x, batch_y)
        loss.backward()
        optimizer.step()
        loss_val += loss.tolist() * (end - start)
        batch_id += 1
    
    
    if epoch%20==0:
        print("==========================================")
        print("Loss at epoch %d" % epoch, loss_val)
        # check training accuracy in each 20 epochs
        
        ## Training
        prediction = model(train_x.permute(dims=(1,0)))
        target = train_y.permute(dims=(1,0))
        true_pos = torch.sum(target * prediction).tolist()
        tot_pos = torch.sum(target).tolist()
        tot_predicted = torch.sum(prediction).tolist()
        false_pos = torch.sum(prediction).tolist() - true_pos
        
        if tot_predicted!=0 and tot_pos!=0:
            print("Training Set Precision", true_pos/tot_predicted)
            print("Training Set Recall", true_pos/tot_pos)
        
        print("-----------------------------------------")
        ## Testing
        prediction = model(test_x.permute(dims=(1,0)))
        target = test_y.permute(dims=(1,0))
        
        true_pos = torch.sum(target * prediction).tolist()
        tot_pos = torch.sum(target).tolist()
        tot_predicted = torch.sum(prediction).tolist()
        false_pos = torch.sum(prediction).tolist() - true_pos
        
        if tot_predicted!=0 and tot_pos!=0:
            print("Test Set Precision", true_pos/tot_predicted)
            print("Test Set Recall", true_pos/tot_pos)
            
        print("==========================================")

Loss at epoch 1 136630.188667
True pos 0
Tot pos predicted 0
Loss at epoch 21 24856.8224063
True pos 274
Tot pos predicted 429
Precision 0.638694638695
Recall 0.693670886076
Loss at epoch 41 16806.1950073
True pos 282
Tot pos predicted 438
Precision 0.643835616438
Recall 0.713924050633
Loss at epoch 61 13068.8999619
True pos 279
Tot pos predicted 421
Precision 0.66270783848
Recall 0.706329113924
Loss at epoch 81 10967.3413696
True pos 287
Tot pos predicted 412
Precision 0.696601941748
Recall 0.726582278481
