# LSTM RNN

In [11]:
import numpy as np

from torchtext import data
from torchtext import datasets
from torchtext.vocab import Vectors

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.optim as optim
from torch.nn import Linear
from torch.nn.functional import softmax, relu, tanh
from torchtext.vocab import Vectors, GloVe, CharNGram, FastText
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

from sklearn.manifold import TSNE

from bokeh.plotting import figure, ColumnDataSource
from bokeh.models import HoverTool
from bokeh.io import output_notebook, show, push_notebook
output_notebook()

In [12]:
use_cuda = torch.cuda.is_available()

def get_variable(x):
    """ Converts tensors to cuda, if available. """
    if use_cuda:
        return x.cuda()
    return x

def get_numpy(x):
    """ Get numpy array for both cuda and not. """
    if use_cuda:
        return x.cpu().data.numpy()
    return x.data.numpy()

In [13]:
TEXT = data.Field(sequential=True, pad_first = True)
LABEL = data.Field(sequential=False)

train_set, validation_set, test_set = datasets.SST.splits(TEXT,
                                                          LABEL,
                                                          fine_grained=False,
                                                          train_subtrees=True,
                                                          filter_pred=lambda ex: ex.label != 'neutral')

In [14]:


#for val_batch in val_iter:
#    print(val_batch.text.size())

In [15]:
# build the vocabulary
url = 'https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.simple.vec'
TEXT.build_vocab(train_set, max_size=None, vectors=Vectors('wiki.simple.vec', url=url))
LABEL.build_vocab(train_set)
# print vocab information
print('len(TEXT.vocab)', len(TEXT.vocab))
print('TEXT.vocab.vectors.size()', TEXT.vocab.vectors.size())

# make iterator for splits
train_iter, val_iter, test_iter = data.BucketIterator.splits(
    (train_set, validation_set, test_set), batch_size=3)




# print batch information
batchsst = next(iter(train_iter))


len(TEXT.vocab) 18005
TEXT.vocab.vectors.size() torch.Size([18005, 300])


In [16]:
print(TEXT)

<torchtext.data.field.Field object at 0x0000020719837A20>


In [21]:
embedding_dim = TEXT.vocab.vectors.size()[1]
num_embeddings = TEXT.vocab.vectors.size()[0]
num_classes = len(LABEL.vocab.itos)
dropout_rate = 0.2

input_dim = 100

con_dim = 200

# build the LSTM model
class LSTMNet(nn.Module):

    def __init__(self):
        super(LSTMNet, self).__init__()
        self.embeddings = nn.Embedding(num_embeddings, embedding_dim)
        # use pretrained embeddings
        self.embeddings.weight.data.copy_(TEXT.vocab.vectors)
        
        # add hidden layers
        # YOUR CODE HERE!
        
        self.lstm_input = nn.LSTM(input_size = embedding_dim,
                                  hidden_size = embedding_dim,
                                  batch_first = False,
                                  num_layers = 1)
        
        self.input = Linear(in_features = embedding_dim,
                             out_features = input_dim,
                             bias = False)
    
        self.l_1 = Linear(in_features=con_dim,
                           out_features=con_dim,
                           bias = False)
        self.l_2 = Linear(in_features=con_dim,
                           out_features=con_dim,
                           bias=False)
        self.l_3 = Linear(in_features=con_dim,
                           out_features = con_dim,
                           bias = False)
        
        
        self.drop = nn.Dropout(p = dropout_rate)
        
        # output layer
        self.l_out = Linear(in_features=con_dim,
                            out_features=num_classes,
                            bias=False)
        
    def forward(self, x, y):
        out = {}
        #x_text = x[0] #Text of input
        #y_text = y[0]
        #x_len = x[1] #Sequence length of input
        #y_len = y[1]
        packed_x = x
        packed_y = y
        print(x)
        
       # x_len, perm_idx1 = x_len.sort(0, descending=True)
       # x_text = x_text[perm_idx1.cpu().numpy()]
        
       # y_len,perm_idx2 = y_len.sort(0,descending=True)
       # print(perm_idx2.cpu().numpy())
       # y_text = y_text[perm_idx2.cpu().numpy()]
        
        # get embeddings
        x = self.embeddings(packed_x) # (bs,len,300)
        #num_len = x.size()[1]
        y = self.embeddings(packed_y)
        
        x = self.drop(x)
        y = self.drop(y)
        
       # packed_x = pack_padded_sequence(x,x_len.cpu().numpy())
       #packed_y = pack_padded_sequence(y,y_len.cpu().numpy())
        
        #print(packed_x.size())
        
        x, xht = self.lstm_input(x)
        y, yht = self.lstm_input(y)
        #xht = xht[0].cuda()
        #yht = yht[0].cuda()
        
        print(y.size())
        print(yht[0].size())
        
        xt = xht[0]
       #print(xt)
        yt = yht[0]
        #print(x.size())
        #print(xt.size())
        #tanh # (bs,100)
        
        xt = torch.tanh(self.input(xt))
        yt = torch.tanh(self.input(yt))
 
        z = torch.cat((xt,yt),2)
    
        z = torch.squeeze(z,0)
                
        z = torch.tanh(self.l_1(z))     
        z = torch.tanh(self.l_2(z))
        z = torch.tanh(self.l_3(z))
       # print(z.size())
        
        
        # Softmax
        out['out'] = self.l_out(z)
        return out

net = LSTMNet()
if use_cuda:
    net.cuda()
print(net)

LSTMNet(
  (embeddings): Embedding(18005, 300)
  (lstm_input): LSTM(300, 300)
  (input): Linear(in_features=300, out_features=100, bias=False)
  (l_1): Linear(in_features=200, out_features=200, bias=False)
  (l_2): Linear(in_features=200, out_features=200, bias=False)
  (l_3): Linear(in_features=200, out_features=200, bias=False)
  (drop): Dropout(p=0.2)
  (l_out): Linear(in_features=200, out_features=3, bias=False)
)


In [22]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adadelta(net.parameters(), lr=0.001,weight_decay=0.001)

def accuracy(ys, ts):
    # making a one-hot encoded vector of correct (1) and incorrect (0) predictions
    correct_prediction = torch.eq(torch.max(ys, 1)[1], ts)
    # averaging the one-hot encoded vector
    return torch.mean(correct_prediction.float())

In [23]:
def construct_sentences(batch):
    """    
    Parameters
    ----------
    batch: torchtext.data.batch.Batch
    
    Returns
    -------
    [str]
    """
    return [" ".join([TEXT.vocab.itos[elm] 
                      for elm in get_numpy(batch.text[:,i])])
            for i in range(batch.text.size()[1])]

def get_labels(batch):
    """
    Parameters
    ----------
    batch: torchtext.data.batch.Batch
    
    Returns
    -------
    [str]
    """
    return [LABEL.vocab.itos[get_numpy(batch.label[i])] for i in range(len(batch.label))]


In [24]:
max_iter = 1
eval_every = 1000
log_every = 200

# will be updated while iterating
#tsne_plot = show(p, notebook_handle=True)

train_loss, train_accs = [], []

net.train()
for i, batch in enumerate(train_iter):
    if i % eval_every == 0:
        net.eval()
        val_losses, val_accs, val_lengths = 0, 0, 0
        val_meta = {'label_idx': [], 'sentences': [], 'labels': []}
        for val_batch in val_iter:
           # print(val_batch.text[1].size())
            output = net(val_batch.text,val_batch.text)
            # batches sizes might vary, which is why we cannot just mean the batch's loss
            # we multiply the loss and accuracies with the batch's size,
            # to later divide by the total size
            #print(val_batch.text.size())
            #print(val_batch.label.size())
            #print(output['out'].size())
            val_losses += criterion(output['out'], val_batch.label) * val_batch.batch_size
            val_accs += accuracy(output['out'], val_batch.label) * val_batch.batch_size
            val_lengths += val_batch.batch_size
            
        
        # divide by the total accumulated batch sizes
        val_losses /= val_lengths
        val_accs /= val_lengths
        
        print("valid, it: {} loss: {:.2f} accs: {:.2f}\n".format(i, get_numpy(val_losses), get_numpy(val_accs)))
        #update_plot(val_meta, 'LSTM', tsne_plot)
        
        net.train()
    
    output = net(batch.text,batch.text)
    batch_loss = criterion(output['out'], batch.label)
    
    train_loss.append(get_numpy(batch_loss))
    train_accs.append(get_numpy(accuracy(output['out'], batch.label)))
    
    optimizer.zero_grad()
    batch_loss.backward()
    optimizer.step()
    
    if i % log_every == 0:        
        print("train, it: {} loss: {:.2f} accs: {:.2f}".format(i, 
                                                               np.mean(train_loss), 
                                                               np.mean(train_accs)))
        # reset
        train_loss, train_accs = [], []
        
    if max_iter < i:
        break

  return Variable(arr, volatile=not train)


tensor([[2034,    1,    1],
        [  67, 1189, 5204],
        [   8,    8,  419]], device='cuda:0')
torch.Size([3, 3, 300])
torch.Size([1, 3, 300])
tensor([[14726,  3178,     0],
        [  259,     5,  1515],
        [   27,  1089,     5],
        [    8,     8,  1644]], device='cuda:0')
torch.Size([4, 3, 300])
torch.Size([1, 3, 300])
tensor([[3385,    1,    1],
        [  22,   66, 4027],
        [8882,   28,    2],
        [1110, 6087,  202],
        [   8,   91,    8]], device='cuda:0')
torch.Size([5, 3, 300])
torch.Size([1, 3, 300])
tensor([[ 36, 420,  79],
        [520,  35,   9],
        [753,   3, 106],
        [ 52, 172,   0],
        [  8,   8,   8]], device='cuda:0')
torch.Size([5, 3, 300])
torch.Size([1, 3, 300])
tensor([[  183,     0, 13215],
        [   20,   257,   568],
        [   10,     6,  2342],
        [ 7980,     0,     0],
        [    8,     8,     8]], device='cuda:0')
torch.Size([5, 3, 300])
torch.Size([1, 3, 300])
tensor([[   79,   793,    36],
        [  

        [   8,    8,    8]], device='cuda:0')
torch.Size([9, 3, 300])
torch.Size([1, 3, 300])
tensor([[  26,   36,   79],
        [  16,  824,    9],
        [  10,  169,    4],
        [ 261,    5,  279],
        [   2, 2676,    5],
        [1333,  908,  159],
        [   5,  558, 1549],
        [  54,  100,  649],
        [   8,    8,    8]], device='cuda:0')
torch.Size([9, 3, 300])
torch.Size([1, 3, 300])
tensor([[  26,   63,   79],
        [  16,   66,    9],
        [  10, 1561,   45],
        [ 999,  149,  235],
        [   2, 2909,    4],
        [   0,    6, 2099],
        [   5,   21,  110],
        [3660,   16, 5356],
        [   8,    8,    8]], device='cuda:0')
torch.Size([9, 3, 300])
torch.Size([1, 3, 300])
tensor([[11596,    36,    26],
        [    9,   287,    49],
        [  145,   786,   169],
        [ 9514,     6,  6460],
        [  118,     3,   528],
        [  335,  1976,     6],
        [  326,  7294,   487],
        [  301,  2881,   272],
        [    8,     8,

        [    8,     8,     8]], device='cuda:0')
torch.Size([11, 3, 300])
torch.Size([1, 3, 300])
tensor([[   26,    63,    57],
        [ 4210,   192,    10],
        [    0,     7,    19],
        [  112,   137,     0],
        [  230,   300,   566],
        [   87,   187,    33],
        [    4,    21,  1558],
        [  180,   108,    29],
        [  103,     0,     0],
        [  431,   720, 17370],
        [    8,     8,     8]], device='cuda:0')
torch.Size([11, 3, 300])
torch.Size([1, 3, 300])
tensor([[  36,   36,    0],
        [1594,  623,   59],
        [   6,   11, 1106],
        [   0,    9,   18],
        [1376,   64,  972],
        [ 460,   51,    2],
        [1514,  210,  360],
        [  12,   18,    2],
        [2325,   17,   41],
        [5524,    0,  283],
        [   8,    8,    8]], device='cuda:0')
torch.Size([11, 3, 300])
torch.Size([1, 3, 300])
tensor([[2169,  166, 6959],
        [  98,  696,   45],
        [ 117,   54,    4],
        [   0,    2, 4644],
       

torch.Size([13, 3, 300])
torch.Size([1, 3, 300])
tensor([[ 183,   36,  183],
        [  10,  209,   16],
        [ 198,   56,  240],
        [  56, 7859,    0],
        [  33,   29,   18],
        [  17,    4, 6271],
        [  49,  659,    2],
        [ 252,  938,  213],
        [   2,   18,  782],
        [ 125, 4562,   27],
        [   5,    5, 2374],
        [   0, 9847, 3488],
        [   8,    8,    8]], device='cuda:0')
torch.Size([13, 3, 300])
torch.Size([1, 3, 300])
tensor([[   26,  1664, 11715],
        [    0,    11,     5],
        [    0,   682,   256],
        [   18,     4,    45],
        [  727,  3317,     4],
        [  187,    12,   536],
        [   21,     3,   103],
        [   66,  4269,  2110],
        [  820,     6,    18],
        [   45,    26,     3],
        [   13,  3846,   432],
        [  314,  2033,   186],
        [    8,     8,     8]], device='cuda:0')
torch.Size([13, 3, 300])
torch.Size([1, 3, 300])
tensor([[14564,  1236,    36],
        [    3,    

        [    8,     8,    57]], device='cuda:0')
torch.Size([14, 3, 300])
torch.Size([1, 3, 300])
tensor([[   0,   79, 8353],
        [   0,  411,   10],
        [2870,   78, 5502],
        [   2,  527, 6175],
        [ 136,    3,    5],
        [   9,    0, 9807],
        [  78, 4555,    2],
        [   7,    6,   14],
        [  88,  251,   77],
        [  40,    5,    0],
        [  21, 7508,  213],
        [2365, 1802,    0],
        [2283,  237, 8916],
        [   8,    8,    8]], device='cuda:0')
torch.Size([14, 3, 300])
torch.Size([1, 3, 300])
tensor([[6995,    0,  166],
        [ 256,  236, 4329],
        [ 758,  168,  710],
        [   5, 8716, 2992],
        [2495,    7,   15],
        [   2,    4,    4],
        [  45,  465, 1862],
        [   0,  466, 1083],
        [   4,    2,    6],
        [   0, 3377,  170],
        [   0,    9,  100],
        [ 133,    4,    5],
        [   3,   43, 5731],
        [   0,  127,   56],
        [   8,    8,    8]], device='cuda:0')
torch

        [   8,    8,    8]], device='cuda:0')
torch.Size([16, 3, 300])
torch.Size([1, 3, 300])
tensor([[  420,    26,     0],
        [    6,  4241,    38],
        [    3,     9,     4],
        [ 3648,  4019,    20],
        [  278,    10,    44],
        [   27,     4,  5006],
        [11314,   866, 13886],
        [  434,     6,     3],
        [   63,  9676,   358],
        [  230,    11,     6],
        [  204,  9593,     4],
        [   12,    17,   243],
        [    4,   177,     5],
        [  151,     6,    39],
        [   73,  3477,    93],
        [    8,     8,     8]], device='cuda:0')
torch.Size([16, 3, 300])
torch.Size([1, 3, 300])
tensor([[  415,   767,   303],
        [   25,   186,    10],
        [16569,     0,     4],
        [   27,    12,  3097],
        [ 1165,   487,     6],
        [ 5232,   896,   453],
        [    9,    38,   405],
        [  451,     3,   227],
        [ 2427,     0,     2],
        [   57,     6,     5],
        [ 2342, 14507,  5352],
 

        [   8,    8,    8]], device='cuda:0')
torch.Size([17, 3, 300])
torch.Size([1, 3, 300])
tensor([[14578,  4716,   668],
        [ 1081,    57, 11758],
        [13282,    38, 10334],
        [  572,   365,     2],
        [    4,    19,  5582],
        [  384,  1409,    38],
        [ 4091,   603,     0],
        [   18,     6,    19],
        [  513,   648,   528],
        [   15,  1080,   211],
        [   21,   120,    55],
        [  201,    35,     4],
        [    2,    39,  7971],
        [  680,  2329,     2],
        [   78,     0,  3880],
        [   16,   138,    20],
        [    8,     8,     8]], device='cuda:0')
torch.Size([17, 3, 300])
torch.Size([1, 3, 300])
tensor([[   63,  1952,   415],
        [  736,     2,    25],
        [  295,    51,   308],
        [  105,     2,    30],
        [  171,    10,   447],
        [  192,    21,   587],
        [ 5912,    56,    15],
        [ 2326,    40,   125],
        [   73,  2331,  5307],
        [   14,   434,    62],
 

        [    8,     8,     8]], device='cuda:0')
torch.Size([18, 3, 300])
torch.Size([1, 3, 300])
tensor([[   26,    63,  1426],
        [  127,   102,    17],
        [  700,    24,    68],
        [  130,   451,     2],
        [    7,   537,  1919],
        [ 5266,   295,    10],
        [    0, 15300,  6087],
        [ 9687,  4840,   106],
        [    2,     2,    45],
        [   22,    22,     3],
        [   39,   102,  2759],
        [  354,    24,     0],
        [    6,   865,   341],
        [    0,   205,  2877],
        [   10,    45,     6],
        [ 3509,     4,     3],
        [ 1377,  3730,  3983],
        [    8,     8,     8]], device='cuda:0')
torch.Size([18, 3, 300])
torch.Size([1, 3, 300])
tensor([[ 420,   79,   79],
        [   6,  249,    9],
        [ 173,   24,  168],
        [ 676, 5678, 1117],
        [ 368,   77,    2],
        [1159,   13,    0],
        [   2,  853,  362],
        [  77,    7,    2],
        [ 179,   23,  664],
        [1373,    4,   15

        [   8,    8,    8]], device='cuda:0')
torch.Size([19, 3, 300])
torch.Size([1, 3, 300])
tensor([[   79, 13077,  8299],
        [    9,   130,     3],
        [    4,  1617,   275],
        [ 1754,     5,   193],
        [ 2081,     0,   275],
        [    6,    13,   193],
        [ 3733,    10,     2],
        [   11,     2,     0],
        [  102,    21,    60],
        [   24,   119,    15],
        [  870,    23,  5227],
        [   60,     3,     0],
        [    7,   214,     5],
        [    4,   311,  1541],
        [  340,  1697,    47],
        [  228,    38,    29],
        [    6,   160,   178],
        [  128,   365,     0],
        [    8,     8,     8]], device='cuda:0')
torch.Size([19, 3, 300])
torch.Size([1, 3, 300])
tensor([[10382,    79,    57],
        [10405,   110,     3],
        [    9,   205,    16],
        [    0,   324,  1232],
        [   10,     7,    35],
        [ 1990,     0,     4],
        [    7,   295,   423],
        [ 3874,   615,     6],
 

        [    8,     8,     8]], device='cuda:0')
torch.Size([20, 3, 300])
torch.Size([1, 3, 300])
tensor([[   57,     0,  7681],
        [    4,     2,   199],
        [  349,   202,     7],
        [ 3341,    83, 15210],
        [    6,     0, 11275],
        [ 1401,    81,    62],
        [ 3608,   362,     2],
        [    5,    10,    22],
        [ 1691,     4,   356],
        [ 9679,  3216,    38],
        [   11,     2,   126],
        [   71,  2902,   306],
        [  102,   421,    41],
        [   78,    12,   525],
        [    7,   364,   125],
        [ 7689,  3952,     7],
        [    3,     5,   385],
        [    0,    67,    40],
        [ 1121,   156,   150],
        [    8,     8,     8]], device='cuda:0')
torch.Size([20, 3, 300])
torch.Size([1, 3, 300])
tensor([[  668,   594,    79],
        [ 1118,     3,     9],
        [   59,   206,  6177],
        [  125,     2,    29],
        [  601,     3,     4],
        [   18,    20,     0],
        [   19,  3298,   143]

        [    0,     8,     8]], device='cuda:0')
torch.Size([21, 3, 300])
torch.Size([1, 3, 300])
tensor([[  793,  1947,  3390],
        [  117,  3546,     3],
        [   67,     9,   340],
        [   44,  1139,     2],
        [   64,     5,     3],
        [   14,  3098,    20],
        [ 2128,    38,   442],
        [  153,     4,   360],
        [ 1236,  1324,     2],
        [  351,     0,   361],
        [16745,    40,     5],
        [  380,    13,  4477],
        [ 2683,     2,     7],
        [  167,    26,  5014],
        [  598,  6028,    18],
        [   45,  6036,    17],
        [  131,    10,  2536],
        [  351,   267,     0],
        [    0,    22,     5],
        [ 3158,   106,  4728],
        [    8,     8,     8]], device='cuda:0')
torch.Size([21, 3, 300])
torch.Size([1, 3, 300])
tensor([[  26,    0,    0],
        [  16,    5,    7],
        [ 364, 2197,   23],
        [1300,   14,  106],
        [ 147,  180,    2],
        [  13,   14,    5],
        [ 146,  

torch.Size([1, 3, 300])
tensor([[   57,  6399,     0],
        [    4,     9,     3],
        [ 2902,     0,  8836],
        [ 1180,  1133,     0],
        [  800,    17,    11],
        [   29,   172, 16032],
        [    4,    12,  9245],
        [  255,     3,   112],
        [    0,   167,    23],
        [  537,   310,    76],
        [   51,     2,   531],
        [   64,    22,  1097],
        [  106,    17,     3],
        [    0,  4992,   514],
        [    3,    37,    32],
        [ 4570,    12,    96],
        [  211,    59,    10],
        [    7,  1199,    12],
        [  289,   310,  1389],
        [ 2182,    33,     6],
        [   13,    46,    13],
        [    8,     8,     8]], device='cuda:0')
torch.Size([22, 3, 300])
torch.Size([1, 3, 300])
tensor([[ 1571, 11828,     0],
        [    3,     9,   165],
        [ 1136,  1691,     2],
        [ 8436, 11211,     0],
        [    0,   607,  4492],
        [    3,    60,     2],
        [ 2902, 15635,     0],
        [ 

        [   8,    8,    8]], device='cuda:0')
torch.Size([23, 3, 300])
torch.Size([1, 3, 300])
tensor([[ 1430,    26,  8422],
        [  769,  1933,     9],
        [   26,     3,   158],
        [14734,    20,    38],
        [ 4706,   375,   541],
        [    6,     2,    91],
        [ 1635,     3,   116],
        [   83,   531,     7],
        [ 1828,    13,  1594],
        [   81,   304,     3],
        [  112,     2,  2351],
        [   23,    22,   293],
        [    7,    13,     5],
        [    0,     9,    28],
        [   66,   289,   194],
        [  130,   251,    91],
        [ 4498,    43,  3575],
        [17727,    12,     7],
        [   45,     3,   253],
        [   13,   175,     3],
        [   65,   162,  5349],
        [   23,   163,   704],
        [    8,     8,     8]], device='cuda:0')
torch.Size([23, 3, 300])
torch.Size([1, 3, 300])
tensor([[  183,    26,    26],
        [  257,   158,   900],
        [    6,    10,     2],
        [12981,    24,    69],
 

        [   8,    8,    8]], device='cuda:0')
torch.Size([24, 3, 300])
torch.Size([1, 3, 300])
tensor([[   13,  7585,    26],
        [  240,    13,    49],
        [    7,   135,  5058],
        [  205,  1228,  3464],
        [    3,   173,    16],
        [   16,    58,     6],
        [   10,    88,     3],
        [   40,    97,   208],
        [    3,    11,     2],
        [  312,     0,  7310],
        [    6,    15,    94],
        [11268,  1107,    18],
        [  134,  1565,     3],
        [  109,     2,  7209],
        [  133,   263,     6],
        [  160,  2344,   139],
        [ 2542,  1127,  1442],
        [  150,   286,    14],
        [ 7799,    71,     4],
        [  379,     0,   863],
        [   25,   300,     0],
        [   42,    12,     0],
        [  365,  3712,   242],
        [   44,     8,     8]], device='cuda:0')
torch.Size([24, 3, 300])
torch.Size([1, 3, 300])
tensor([[  50, 2441,   36],
        [   0, 1093, 3776],
        [  48,    9,  854],
        [ 

        [    8,     8,     8]], device='cuda:0')
torch.Size([26, 3, 300])
torch.Size([1, 3, 300])
tensor([[  183,  3390,  8389],
        [  790,    21,     9],
        [ 1149,  9087, 11584],
        [  886,  1909,    10],
        [  840,     2,     4],
        [  570,  8398,  5831],
        [  380,    10,     5],
        [   52,    70,   201],
        [ 3872,    64,   354],
        [   15,     4,     6],
        [    3,  1031,   955],
        [ 5887,    12,   368],
        [  764,     3,   185],
        [    6,   167,    10],
        [  247,   265,  1541],
        [  434,     2, 17992],
        [  153,    15,   181],
        [ 1855,    17,     2],
        [ 9937,  1670,   175],
        [   12,     6,    29],
        [   17, 16650,   506],
        [  503,     2,     5],
        [    5,     0,   356],
        [ 3308,     5,    29],
        [12196,  1492,  5382],
        [    8,     8,     8]], device='cuda:0')
torch.Size([26, 3, 300])
torch.Size([1, 3, 300])
tensor([[    0,   668,    26]

        [    8,     8,     8]], device='cuda:0')
torch.Size([27, 3, 300])
torch.Size([1, 3, 300])
tensor([[    0, 13054,   829],
        [  247,   122,  9383],
        [ 6330,  1605,  1824],
        [    5,    44,    19],
        [ 3647,   452, 12919],
        [   37,     3,    10],
        [ 8192,  5059,    19],
        [   47,     5,   742],
        [    3,  1312,   279],
        [ 2500,     6,   479],
        [   15,    78,    56],
        [    3,  2976,    11],
        [  292,    27,   256],
        [  479, 13419,    44],
        [  100,     0,   247],
        [   11,    14,    12],
        [ 1833,    92,  2829],
        [ 3726,    14,     5],
        [    3,     3,   270],
        [  482,  7219,    13],
        [ 1048,   611,     9],
        [  222,    58,  1118],
        [    4,   617,    17],
        [ 2895,    12,   105],
        [    5,     3,   178],
        [ 4173,  2734,   945],
        [    8,     8,     8]], device='cuda:0')
torch.Size([27, 3, 300])
torch.Size([1, 3, 300]

        [    8,     8,     8]], device='cuda:0')
torch.Size([28, 3, 300])
torch.Size([1, 3, 300])
tensor([[  669,  3182,  6991],
        [   71,  3183,     7],
        [ 2467,  3282,  3684],
        [   15,     4,     9],
        [    4,  2438,  1414],
        [    0,  4414,  2300],
        [  672,    10,     5],
        [    6,     4,     0],
        [  130,   398,     9],
        [    3,     5,   233],
        [17757,  1089,     0],
        [    6,  1667,     2],
        [  916,    16,  2300],
        [ 5740,    11, 14538],
        [   27,  3726,    10],
        [    5,     3,    34],
        [    3,   190,     6],
        [  606,  8547,     3],
        [ 3025,   222,    49],
        [  182,   320,   169],
        [    7,     2, 11484],
        [  734,  2360,    27],
        [    0,     2,   594],
        [16432,   715,     3],
        [    3,     2,  3378],
        [13680,     5,     6],
        [ 3025,  5142,  2119],
        [    8,     8,     8]], device='cuda:0')
torch.Size([28, 

        [    8,     8,     8]], device='cuda:0')
torch.Size([29, 3, 300])
torch.Size([1, 3, 300])
tensor([[ 2863,    50,     0],
        [  484,  9195,   192],
        [ 2471,     9,     3],
        [   29,    48,    43],
        [   31,   381,   128],
        [  396,  3455,     7],
        [    9,    12,   138],
        [    4,    19,   191],
        [  382,  1136,    58],
        [  147,     2,    37],
        [   25,  2172,     2],
        [  102,  2916,  1179],
        [   24,     6,  2561],
        [   42,     4,     2],
        [    7,  1961, 13375],
        [ 1749,  7295,    29],
        [    0,     5,     3],
        [   31,    12,  4572],
        [    5,     3,  1657],
        [   76,  1972,     2],
        [    4,     2, 17370],
        [    0,   485,   356],
        [    7,   165,   304],
        [    3,   328,   424],
        [  857,  1151,   120],
        [  182,    18,    35],
        [  300,   118,   150],
        [    0,    62,    46],
        [    8,     8,     8]], de

        [    8,     8,     8]], device='cuda:0')
torch.Size([30, 3, 300])
torch.Size([1, 3, 300])
tensor([[   79,     0,   448],
        [  278,     4,     3],
        [    4,  7108,    16],
        [  624,  5467,   442],
        [  143,   145,    12],
        [    6,    35,  1520],
        [ 8776,     3,  3098],
        [    7,    95,    13],
        [  614,  4211,   101],
        [    3,  4206,    60],
        [ 1018,     2,    18],
        [    6,    83,    12],
        [ 1120,  2859,  1492],
        [ 9232,    81,     2],
        [    2,  4063,    14],
        [ 3536,     3, 12994],
        [ 6943,   275,   346],
        [    2,   697,     7],
        [    0,    14,   218],
        [ 7605,     0,     3],
        [    2,     9, 15629],
        [    5,    83,     6],
        [14777,     0,   880],
        [    0,    81,    12],
        [   46,    15,     3],
        [   12,    30,   857],
        [    3,     0,     6],
        [  275,  1080,  5010],
        [   20,  1530,   115],
   

        [    8,     8,     8]], device='cuda:0')
torch.Size([31, 3, 300])
torch.Size([1, 3, 300])
tensor([[  374,    79,     1],
        [ 3179,  2913,    26],
        [   10,  1262,    52],
        [    4,     2,     5],
        [ 1912,     0,     3],
        [   18,     2,   997],
        [  313,     5,  6731],
        [    2,   133,    12],
        [    5,  6168,   142],
        [   59,   244,     4],
        [ 1170,    13,    80],
        [    4,    61,    11],
        [ 1912,    24,    25],
        [   18,   224,   161],
        [ 9270,    25,   235],
        [ 4916,    73,     4],
        [ 2760,     7,   472],
        [    2,  7378,   396],
        [   58,    27,   179],
        [   65,     3,    32],
        [  598,     0,     4],
        [  870,    31,  8531],
        [   97,     5,     6],
        [    7,     3,     3],
        [    3,  4210,  4405],
        [ 1320,   886,     5],
        [    6,     0, 10784],
        [  212,    31,    11],
        [   96,     6,     0],
   

        [    8,     8,     8]], device='cuda:0')
torch.Size([33, 3, 300])
torch.Size([1, 3, 300])
tensor([[  869,   183,  1202],
        [    0,    10,    77],
        [   71,     4,    25],
        [  164,   575,   102],
        [    4,   795,    24],
        [ 4112,    56,   363],
        [   47,    11,    50],
        [    6,   572,  2434],
        [    0,  9188,     9],
        [    0,    65,    48],
        [    0,   501,    90],
        [    2,   446,    30],
        [    3,   226,   998],
        [  946,     5,     6],
        [    6,   891,  3708],
        [    3,   446, 13372],
        [  185,  5885,    32],
        [   71,    27,    49],
        [  308,     3,   757],
        [    4,     0,     0],
        [ 5350,     2,     2],
        [   56,    66,    96],
        [   15,    14,   414],
        [ 2060,   450,   522],
        [   11,    14,    89],
        [  184,    74,     4],
        [   89,  4279, 12027],
        [    3,  4724, 12797],
        [14970,   507,    95],
   

        [    8,     8,     8]], device='cuda:0')
torch.Size([35, 3, 300])
torch.Size([1, 3, 300])
tensor([[  594,    26,  1148],
        [    3,   408,  7624],
        [  206,  1568,  7622],
        [    2,   163,    10],
        [  131,    37,  3024],
        [   37,   677,    15],
        [  477,  4844, 11856],
        [   15,     2,   191],
        [  107,    22,     5],
        [   45,    49,  2372],
        [  144,     6,     5],
        [    0,     3,  5495],
        [ 5103,    20,     4],
        [   12,    10,   277],
        [    3,     3,    11],
        [ 1586,   275,     9],
        [  179,  2068,  1282],
        [   32,   247,   201],
        [   90,  8096,     2],
        [ 1757, 15175,     3],
        [   55,   131,    20],
        [ 1138,   230,    10],
        [   88,   204,    51],
        [    2,   215,   344],
        [ 2293,   187,     5],
        [ 2105,    94,    51],
        [   41,    21,  1533],
        [    3,    73,     7],
        [ 1283,    25,  1422],
   

        [    8,     8,     8]], device='cuda:0')
torch.Size([38, 3, 300])
torch.Size([1, 3, 300])
tensor([[   79,     1,     1],
        [    9,  1148,   951],
        [    4, 10433,    46],
        [   64,     5,    17],
        [   30,  6010,  1691],
        [  189,    89, 10200],
        [  649,   859,     2],
        [   32,  1837,     3],
        [   85,     7,    16],
        [ 2232,  4993,    10],
        [   38,    82,    44],
        [  712,    55,  5087],
        [   86,     3,    12],
        [   12,  3205,  1887],
        [   39,     2,     5],
        [  490,  5372,     0],
        [  144,   158,     0],
        [   97,    29,    11],
        [    2,  6926,    13],
        [    5,     0,     9],
        [ 4429,     5,    91],
        [    2,  1165,     7],
        [    0,     0,    89],
        [   12,     2,    34],
        [   18,   675,     0],
        [ 2653,     3,    18],
        [ 2963,   191,     3],
        [    2,   631,   691],
        [   10,   127,   147],
   

        [    8,     8,   419]], device='cuda:0')
torch.Size([45, 3, 300])
torch.Size([1, 3, 300])
tensor([[   79,     1],
        [    9,    85],
        [   34,   117],
        [    6,   101],
        [  173,    13],
        [13419,   207],
        [  676,    10],
        [  280,    11],
        [    3,    13],
        [  523,  1684],
        [   10,    86],
        [    0,    55],
        [    2,    17],
        [    3,   185],
        [ 1543,     2],
        [   10,   298],
        [ 4581,    86],
        [    2,     4],
        [    3,   523],
        [  313,   368],
        [   37,  1690],
        [   14,     5],
        [  754,  4632],
        [   14,   131],
        [   46,    65],
        [    0,   734],
        [    5,     2],
        [    3, 15718],
        [ 1587,   284],
        [  620,    15],
        [  521,   125],
        [   37,    62],
        [  151,     5],
        [   91, 12649],
        [    7,    86],
        [    0,    47],
        [    2,     6],
        [   22

# SNLI

In [None]:
print("Run test on SNLI...")
TEXT = datasets.snli.ParsedTextField()
LABEL = data.LabelField()
TREE = datasets.snli.ShiftReduceField()

train, val, test = datasets.SNLI.splits(TEXT, LABEL, TREE)

print("Fields:", train.fields)
print("Number of examples:\n", len(train))
print("First Example instance:\n", vars(train[0]))

url = 'https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.simple.vec'
TEXT.build_vocab(train, max_size=None, vectors=[CharNGram(),Vectors('wiki.simple.vec', url=url)])
#TEXT.build_vocab(train,vectors=GloVe[name='840B',dim='300'])
LABEL.build_vocab(train)


train_iter, val_iter, test_iter = data.Iterator.splits((train, val, test), batch_size=3)

#batch = next(iter(train_iter))
#print("Numericalize premises:\n", batch.premise)
#print("Numericalize hypotheses:\n", batch.hypothesis)
#print("Entailment labels:\n", batch.label)

print("Test iters function")
train_iter, val_iter, test_iter = datasets.SNLI.iters(batch_size=4, trees=True)

batch = next(iter(train_iter))
print("Numericalize premises:\n", batch.premise)
print("Numericalize hypotheses:\n", batch.hypothesis)
print("Entailment labels:\n", batch.label)

#val_iter_set = next(iter(val_iter))


In [None]:
embedding_dim = TEXT.vocab.vectors.size()[1]
num_embeddings = TEXT.vocab.vectors.size()[0]
num_classes = len(LABEL.vocab.itos)
dropout_rate = 0.2

input_dim = 100

con_dim = 200

# build the LSTM model
class LSTMNet(nn.Module):

    def __init__(self):
        super(LSTMNet, self).__init__()
        self.embeddings = nn.Embedding(num_embeddings, embedding_dim)
        # use pretrained embeddings
        self.embeddings.weight.data.copy_(TEXT.vocab.vectors)
        
        # add hidden layers
        # YOUR CODE HERE!
        
        self.lstm_input = nn.LSTM(input_size = embedding_dim,
                                  hidden_size = embedding_dim,
                                  batch_first = False,
                                  num_layers = 1)
        
        self.input = Linear(in_features = embedding_dim,
                             out_features = input_dim,
                             bias = False)
    
        self.l_1 = Linear(in_features=con_dim,
                           out_features=con_dim,
                           bias = False)
        self.l_2 = Linear(in_features=con_dim,
                           out_features=con_dim,
                           bias=False)
        self.l_3 = Linear(in_features=con_dim,
                           out_features = con_dim,
                           bias = False)
        
        
        self.drop = nn.Dropout(p = dropout_rate)
        
        # output layer
        self.l_out = Linear(in_features=con_dim,
                            out_features=num_classes,
                            bias=False)
        
    def forward(self, x, y):
        out = {}
        # get embeddings
        x = self.embeddings(x) # (bs,len,300)
        #num_len = x.size()[1]
        y = self.embeddings(y)
        
        x = self.drop(x)
        y = self.drop(y)
        
        x, xht = self.lstm_input(x)
        y, yht = self.lstm_input(y)
        
        xt = xht[0]
        yt = yht[0]
        #print(x.size())
        #print(xt.size())
        #tanh # (bs,100)
        
        xt = torch.tanh(self.input(xt))
        yt = torch.tanh(self.input(yt))
 
        z = torch.cat((xt,yt),2)
    
        z = torch.squeeze(z,0)
                
        z = torch.tanh(self.l_1(z))     
        z = torch.tanh(self.l_2(z))
        z = torch.tanh(self.l_3(z))
      #  print(z.size())
        
        
        # Softmax
        out['out'] = self.l_out(z)
        return out

net = LSTMNet()
if use_cuda:
    net.cuda()
print(net)

In [6]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adadelta(net.parameters(), lr=0.001,weight_decay=0.001)

def accuracy(ys, ts):
    # making a one-hot encoded vector of correct (1) and incorrect (0) predictions
    correct_prediction = torch.eq(torch.max(ys, 1)[1], ts)
    # averaging the one-hot encoded vector
    return torch.mean(correct_prediction.float())

In [7]:
max_iter = 2000
eval_every = 500
log_every = 200

# will be updated while iterating
#tsne_plot = show(p, notebook_handle=True)

train_loss, train_accs = [], []

net.train()
for i, batch in enumerate(train_iter):
    if i % eval_every == 0:
        net.eval()
        val_losses, val_accs, val_lengths = 0, 0, 0
       # val_meta = {'label_idx': [], 'sentences': [], 'labels': []}
        for val_batch in val_iter:
            output = net(val_batch.premise,val_batch.hypothesis)
            # batches sizes might vary, which is why we cannot just mean the batch's loss
            # we multiply the loss and accuracies with the batch's size,
            # to later divide by the total size
            #print(output['out'])
            #print(val_batch.label)
            val_losses += criterion(output['out'], val_batch.label) * val_batch.batch_size
            val_accs += accuracy(output['out'], val_batch.label) * val_batch.batch_size
            val_lengths += val_batch.batch_size
           # print(val_batch.batch_size)
            
        
        # divide by the total accumulated batch sizes
        val_losses /= val_lengths
        val_accs /= val_lengths
        
        print("valid, it: {} loss: {:.2f} accs: {:.2f}\n".format(i, get_numpy(val_losses), get_numpy(val_accs)))
        #update_plot(val_meta, 'bow', tsne_plot)
        
        net.train()
    
    output = net(batch.premise,batch.hypothesis)
    batch_loss = criterion(output['out'], batch.label)
    
    train_loss.append(get_numpy(batch_loss))
    train_accs.append(get_numpy(accuracy(output['out'], batch.label)))
    
    optimizer.zero_grad()
    batch_loss.backward()
    optimizer.step()
    
    if i % log_every == 0:        
        print("train, it: {} loss: {:.2f} accs: {:.2f}".format(i, 
                                                               np.mean(train_loss), 
                                                               np.mean(train_accs)))
        # reset
        train_loss, train_accs = [], []
        
    if max_iter < i:
        break

  return Variable(arr, volatile=not train)


valid, it: 0 loss: 1.01 accs: 0.01

train, it: 0 loss: 0.82 accs: 0.00
train, it: 200 loss: 1.01 accs: 0.03
train, it: 400 loss: 1.01 accs: 0.04
valid, it: 500 loss: 1.01 accs: 0.03



KeyboardInterrupt: 