In [1]:
import sys
import argparse
import time
import numpy as np
import logging
import math
from sklearn.externals import joblib
import torch
import torch.autograd as autograd

# Load Data

In [100]:
input_dir =  'data/preprocessed'

data = joblib.load(input_dir+'/fine.pkl')

train,dev,test = data['data']

word2id = data['dicts']['word2id']

id2word = data['dicts']['id2word']

embd = data['embd']

input_dim = len(embd[0])

max_len_train = max([len(d) for d in train[0]])

max_len_dev = max([len(d) for d in dev[0]])

max_len_test = max([len(d) for d in test[0]])

max_len = max([max_len_train,max_len_dev,max_len_test])


# Batcher

In [3]:
import numpy as np
from sklearn.externals import joblib
import random


class Batcher:
    def __init__(self,data,batch_size,max_len):
        
        self.xs = data[0]
        
        self.ys = data[1]
        
        self.max_length = max_len
        
        self.batch_size = batch_size
        
        self.batch_num = 0
        
        self.num_of_samples = len(self.xs)
        
        self.max_batch_num = int(self.num_of_samples / self.batch_size) 
        
        self.indexes = np.arange(len(data[0]))
        

    def next(self):
        
        X = np.zeros((self.batch_size,self.max_length),dtype=np.int32)
        
        Y = np.zeros((self.batch_size),dtype=np.int32)
        
        lengths = np.zeros((self.batch_size),dtype=np.int32)    
        
        for i in range(self.batch_size):
            
            index = self.indexes[self.batch_num * self.batch_size + i]
            
            lengths[i] = len(self.xs[index])

            X[i,:lengths[i]] = self.xs[index]
            
            Y[i] = self.ys[index]
            
        self.batch_num = (self.batch_num + 1) % self.max_batch_num

        res = [X,Y,lengths]

        return res
    
    def shuffle(self):
        
        np.random.shuffle(self.indexes)


# Main Loop

In [None]:
train_batcher = Batcher(train,25,max_len)

dev_batcher = Batcher(dev,109,max_len)

test_batcher = Batcher(test,67,max_len)

model = Model(5,max_len,len(embd),embd,emb_dim =input_dim,hidden_dim=120)

def hook(outputs,ys):
    
    corr = 0
    for output,y in zip(outputs,ys):
        pred = np.argmax(output)
        if pred == y :
            corr +=1 
    return corr,len(ys)


best_acc = 0

best_dev_acc = 0
for epoch in range(30):
    
    train_batcher.shuffle()
    
    step_per_epoch = train_batcher.max_batch_num

    loss = 0.0
    
    print "Epoch %d" % epoch
        
    for i in range(step_per_epoch):
        
        input_x, y, lengths = train_batcher.next()
        
        input_x = autograd.Variable(torch.from_numpy(input_x).long())
        
        y = autograd.Variable(torch.from_numpy(y)).long()

        lengths = autograd.Variable(torch.from_numpy(lengths))
        
        loss += model.train_(input_x,y,lengths)
            
    step_per_epoch = dev_batcher.max_batch_num
    corr = 0
    total = 0
    for i in range(step_per_epoch):
        
        input_x, y, lengths = dev_batcher.next()
        
        input_x = autograd.Variable(torch.from_numpy(input_x).long())
        
        y = autograd.Variable(torch.from_numpy(y)).long()

        lengths = autograd.Variable(torch.from_numpy(lengths))
                
        outputs  = model.test(input_x,lengths)

        corr_,total_ = hook(outputs,y.data.numpy())
        
        corr += corr_
        total += total_
        
    dev_acc = (float(corr)*100/total)
    print 'dev acc: %.2f' % dev_acc
    
    
    step_per_epoch = test_batcher.max_batch_num
    corr = 0
    total = 0
    for i in range(step_per_epoch):
        
        input_x, y, lengths = test_batcher.next()
        
        input_x = autograd.Variable(torch.from_numpy(input_x).long())
        
        y = autograd.Variable(torch.from_numpy(y)).long()

        lengths = autograd.Variable(torch.from_numpy(lengths))
        
        outputs  = model.test(input_x,lengths)

        corr_,total_ = hook(outputs,y.data.numpy())
        
        corr += corr_
        total += total_
        
    test_acc = (float(corr) * 100 / total)
    print 'test acc: %.2f' % test_acc
    
    if dev_acc > best_dev_acc:
        best_acc = test_acc
        
print "best test acc:%.2f" % (best_acc)

Epoch 0
dev acc: 35.78
test acc: 36.89
Epoch 1
dev acc: 35.50
test acc: 39.97
Epoch 2
dev acc: 35.05
test acc: 39.51
Epoch 3
dev acc: 39.27
test acc: 41.37
Epoch 4
dev acc: 38.17
test acc: 42.72
Epoch 5
dev acc: 37.98
test acc: 39.51
Epoch 6
dev acc: 39.72
test acc: 43.61
Epoch 7
dev acc: 39.72
test acc: 44.78
Epoch 8


# Model

In [None]:
from torch import nn
import torch.optim as optim

def _lengths_to_masks(lengths, max_length):
    
    tiled_ranges = autograd.Variable(torch.arange(0,float(max_length)).unsqueeze(0).expand([len(lengths),max_length]))
    
    lengths = lengths.float().unsqueeze(1).expand_as(tiled_ranges)
    
    mask = tiled_ranges.lt(lengths).float()

    return mask

# def gather(indexes, max_length,data):
    

def weight_variable(shape):
    
    initial = np.random.uniform(-0.01, 0.01,shape)
    
    initial = torch.from_numpy(initial)
    
    return initial.float()



class Model(nn.Module):

    def __init__(self,num_classes,max_length,num_tokens,embd,emb_dim = 300,hidden_dim=100,lr=0.05):
        
        super(Model,self).__init__()
    
        self.emb_dim = emb_dim
        
        self.hidden_dim = hidden_dim
        
        self.max_length = max_length
        
        self.num_classes = num_classes
                        
        self.num_att = num_classes
        
            
        self.embedding = nn.Embedding(num_tokens,emb_dim)
        
        self.embedding.weight = nn.Parameter(torch.from_numpy(embd),requires_grad=True)
        
        self.linear = nn.Linear(self.hidden_dim * 2, self.num_classes)
        
        self.lstm_fw = nn.LSTMCell(self.emb_dim ,self.hidden_dim)
        
        self.lstm_bw = nn.LSTMCell(self.emb_dim ,self.hidden_dim)

        self.loss_fn = nn.functional.cross_entropy
        
        self.softmax = nn.Softmax()
        
        self.sigmoid = nn.Sigmoid()
        
        self.tanh = nn.Tanh()
        
        self.dropout = nn.Dropout(0.5)
        
        self.err = 1e-24
        
        self.optimizer = optim.Adagrad(filter(lambda p: p.requires_grad, self.parameters()), lr=lr,weight_decay=1e-4)
 
    def init_hidden(self,batch_size):
        
        return (autograd.Variable(torch.zeros(batch_size, self.hidden_dim)),autograd.Variable(torch.zeros(batch_size, self.hidden_dim)))
        
    def bidirectional_forward(self,x,lengths):
        
        max_length = x.size()[1]
        
        batch_size = x.size()[0]
        
        mask =  _lengths_to_masks(lengths,max_length)
        
        x_embd =  self.dropout(self.embedding(x).transpose(0,1))
                
        hidden_fw = self.init_hidden(len(x))
        
        hidden_bw = self.init_hidden(len(x))
        
        lstm_fw_outputs = []
        
        lstm_bw_outputs = []
        indices =  autograd.Variable(torch.arange(0,float(batch_size))*max_length).long() + (lengths-1).long()
        
        for i in range(self.max_length): 
        
            hidden_fw = self.lstm_fw(x_embd[i],hidden_fw)
        
            hidden_fw = [self.dropout(fw) * mask[:,i].unsqueeze(1).expand_as(fw) for fw in hidden_fw]
            
            hidden_bw = self.lstm_bw(x_embd[-i-1],hidden_bw)
            
            hidden_bw = [self.dropout(bw) * mask[:,-i-1].unsqueeze(1).expand_as(bw) for bw in hidden_bw]
            
                        
            lstm_fw_outputs.append(hidden_fw[0])
            
            lstm_bw_outputs.append(hidden_bw[0])
        
        lstm_bw_outputs = lstm_bw_outputs[::-1]
        
        lstm_fw_output = torch.cat([fw.unsqueeze(1) for fw in lstm_fw_outputs],1).view(-1,self.hidden_dim).index_select(0,indices)
        
        lstm_bw_output =  lstm_bw_outputs[0]
        
        
        lstm_outputs = torch.cat([lstm_fw_output,lstm_bw_output],-1)
        
        return lstm_outputs
    
    def classify(self, outputs):
        
        return self.softmax(self.linear(outputs))
    
    def train_(self,x,y,lengths):
        
        self.zero_grad()
        
        self.train()
        
        lstm_outputs = self.bidirectional_forward(x,lengths)
        
        output_ = self.classify(lstm_outputs)
        
        y = y.view(-1)
        
        loss = self.loss_fn(output_,y)
        
        loss.backward()
        
        self.optimizer.step()
        
        return loss
    def test(self,x,lengths):
        
        self.eval()
        
        lstm_outputs = self.bidirectional_forward(x,lengths)
        
              

        output_ = self.classify(lstm_outputs)

        return output_.data.numpy()#,c_att,global_att