In [4]:
import sys
import argparse
import time
import numpy as np
import logging
import math
from sklearn.externals import joblib
import torch
import torch.autograd as autograd

# Load Data

In [40]:
input_dir =  'data/preprocessed'

data = joblib.load(input_dir+'/fine.pkl')

train,dev,test = data['data']

word2id = data['dicts']['word2id']

id2word = data['dicts']['id2word']

embd = data['embd']

input_dim = len(embd[0])

max_len_train = max([len(d) for d in train[0]])

max_len_dev = max([len(d) for d in dev[0]])

max_len_test = max([len(d) for d in test[0]])

max_len = max([max_len_train,max_len_dev,max_len_test])


# Batcher

In [21]:
import numpy as np
from sklearn.externals import joblib
import random


class Batcher:
    def __init__(self,data,batch_size,max_len):
        
        self.xs = data[0]
        
        self.ys = data[1]
        
        self.max_length = max_len
        
        self.batch_size = batch_size
        
        self.batch_num = 0
        
        self.num_of_samples = len(self.xs)
        
        self.max_batch_num = int(self.num_of_samples / self.batch_size) 
        
        self.indexes = np.arange(len(data[0]))
        

    def next(self):
        
        X = np.zeros((self.batch_size,self.max_length),dtype=np.int32)
        
        Y = np.zeros((self.batch_size),dtype=np.int32)
        
        lengths = np.zeros((self.batch_size),dtype=np.int32)    
        
        for i in range(self.batch_size):
            
            index = self.indexes[self.batch_num * self.batch_size + i]
            
            lengths[i] = len(self.xs[index])

            X[i,:lengths[i]] = self.xs[index]
            
            Y[i] = self.ys[index]
            
        self.batch_num = (self.batch_num + 1) % self.max_batch_num

        res = [X,Y,lengths]

        return res
    
    def shuffle(self):
        
        np.random.shuffle(self.indexes)


# Main Loop

In [53]:
train_batcher = Batcher(train,25,max_len)

dev_batcher = Batcher(dev,25,max_len)

test_batcher = Batcher(test,25,max_len)

model = Model(5,max_len,len(embd),embd,emb_dim =input_dim)

for epoch in range(15):
    
    train_batcher.shuffle()
    
    step_per_epoch = train_batcher.max_batch_num

    loss = 0.0
    
    print "Epoch %d" % epoch
        
    for i in range(step_per_epoch):
        
        input_x, y, lengths = train_batcher.next()
    
    step_per_epoch = dev_batcher.max_batch_num
    
    for i in range(step_per_epoch):
        
        input_x, y, lengths = dev_batcher.next()

    step_per_epoch = test_batcher.max_batch_num
        
    for i in range(step_per_epoch):
        
        input_x, y, lengths = test_batcher.next()

Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Epoch 11
Epoch 12
Epoch 13
Epoch 14


# Model

In [66]:
from torch import nn
import torch.optim as optim

def _lengths_to_masks(lengths, max_length):
    
    tiled_ranges = autograd.Variable(torch.arange(0,float(max_length)).unsqueeze(0).expand([len(lengths),max_length]))
    
    lengths = lengths.float().unsqueeze(1).expand_as(tiled_ranges)
    
    mask = tiled_ranges.lt(lengths).float()

    return mask

def weight_variable(shape):
    
    initial = np.random.uniform(-0.01, 0.01,shape)
    
    initial = torch.from_numpy(initial)
    
    return initial.float()
class Model(nn.Module):

    def __init__(self,num_classes,max_length,num_tokens,embd,emb_dim = 300,hidden_dim=100,lr=0.001):
        
        super(Model,self).__init__()
    
        self.emb_dim = emb_dim
        
        self.hidden_dim = hidden_dim
        
        self.max_length = max_length
        
        self.num_classes = num_classes
        
        self.att_dim = 50
        
        self.depth = 10
        
        self.num_att = num_classes
        
            
        self.embedding = nn.Embedding(num_tokens,emb_dim)
        
        self.embedding.weight = nn.Parameter(torch.from_numpy(embd),requires_grad=False)
        
        self.lstm_fw = nn.LSTMCell(self.emb_dim ,self.hidden_dim)
        
        self.lstm_bw = nn.LSTMCell(self.emb_dim ,self.hidden_dim)

        self.loss_fn = nn.functional.cross_entropy
        
        self.softmax = nn.Softmax()
        
        self.sigmoid = nn.Sigmoid()
        
        self.tanh = nn.Tanh()
        
        self.dropout = nn.Dropout(0.1)
        
        self.err = 1e-24
        
        self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr)
 
    def init_hidden(self,batch_size):
        
        return (autograd.Variable(torch.zeros(batch_size, self.hidden_dim)),autograd.Variable(torch.zeros(batch_size, self.hidden_dim)))
        
    def bidirectional_forward(self,x,lengths):
        
        max_length = x.size()[1]
                
        mask =  _lengths_to_masks(lengths,max_length)
        
        x_embd =  self.dropout(self.embedding(x).transpose(0,1))
                
        hidden_fw = self.init_hidden(len(x))
        
        hidden_bw = self.init_hidden(len(x))
        
        lstm_fw_outputs = []
        
        lstm_bw_outputs = []
        
        for i in range(self.max_length): 
        
            hidden_fw = self.lstm_fw(x_embd[i],hidden_fw)
        
            hidden_fw = [fw * mask[:,i].unsqueeze(1).expand_as(fw) for fw in hidden_fw]
            
            
            hidden_bw = self.lstm_bw(x_embd[-i-1],hidden_bw)
            
            hidden_bw = [bw * mask[:,-i-1].unsqueeze(1).expand_as(bw) for bw in hidden_bw]
            
                        
            lstm_fw_outputs.append(hidden_fw[0])
            
            lstm_bw_outputs.append(hidden_bw[0])
        
        lstm_bw_outputs = lstm_bw_outputs[::-1]
        
        lstm_outputs = torch.cat([torch.cat([fw,bw],1).unsqueeze(1) for fw,bw in zip(lstm_fw_outputs,lstm_bw_outputs)],1)
        
        return self.dropout(lstm_outputs)

In [67]:
input_x, y, lengths =train_batcher.next()
input_x = autograd.Variable(torch.from_numpy(input_x).long())
        
y = autograd.Variable(torch.from_numpy(y)).long()

lengths = autograd.Variable(torch.from_numpy(lengths))

In [68]:
model = Model(5,max_len,len(embd),embd,emb_dim =input_dim)

model.bidirectional_forward(input_x,lengths)

Variable containing:
( 0 ,.,.) = 
  0.0420 -0.0886 -0.0133  ...  -0.1151 -0.1539  0.0280
  0.0589  0.1427 -0.1632  ...  -0.0000 -0.1333  0.0603
  0.0928  0.0844 -0.1697  ...  -0.0388 -0.0634 -0.0179
           ...             ⋱             ...          
 -0.0000  0.0000 -0.0000  ...  -0.0000 -0.0000 -0.0000
 -0.0000  0.0000 -0.0000  ...  -0.0000 -0.0000 -0.0000
  0.0000  0.0000 -0.0000  ...  -0.0000 -0.0000 -0.0000

( 1 ,.,.) = 
  0.0021  0.0399  0.0584  ...   0.1026 -0.3495  0.0499
  0.0143  0.1123  0.1419  ...   0.0789 -0.0000  0.0716
  0.1315 -0.1021  0.0743  ...   0.0670 -0.3094 -0.0663
           ...             ⋱             ...          
  0.0000  0.0000 -0.0000  ...  -0.0000 -0.0000 -0.0000
  0.0000  0.0000 -0.0000  ...  -0.0000 -0.0000 -0.0000
  0.0000  0.0000 -0.0000  ...  -0.0000 -0.0000 -0.0000

( 2 ,.,.) = 
  0.1015  0.1227 -0.0728  ...  -0.1120 -0.2880  0.0255
  0.0804  0.0752 -0.0941  ...  -0.0585 -0.1575  0.0322
  0.1611  0.0836 -0.0000  ...  -0.1543 -0.1319 -0.0710
   

array([[5451, 7073, 7074, ...,    0,    0,    0],
       [ 372,  161,  284, ...,    0,    0,    0],
       [1338, 9908,   23, ...,    0,    0,    0],
       ..., 
       [   0, 4402,    9, ...,    0,    0,    0],
       [   6,   88,  377, ...,    0,    0,    0],
       [3654, 3808,    2, ...,    0,    0,    0]], dtype=int32)