In [61]:
from torchtext import data, datasets
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import re
import random

In [17]:
inputs = datasets.snli.ParsedTextField(lower=True)
answers = data.Field(sequential=False)

train, dev, test = datasets.SNLI.splits(inputs, answers)

inputs.build_vocab(train, dev, test)
answers.build_vocab(train)

train_iter, dev_iter, test_iter = data.BucketIterator.splits(
            (train, dev, test), batch_size=64, device=-1)

In [46]:
len(inputs.vocab)

64280

In [19]:
batch = next(iter(train_iter))

In [31]:
example = train[0]

In [36]:
train[3].hypothesis

['They', 'are', 'smiling', 'at', 'their', 'parents']

In [32]:
example.hypothesis

['A', 'person', 'is', 'training', 'his', 'horse', 'for', 'a', 'competition.']

In [29]:
batch = next(iter(train_iter))
print(batch.premise)
print(batch.hypothesis)
print(batch.label)

Variable containing:

Columns 0 to 10 
    17     56      3   4901      3      3      3   1677     52   6155     13
   177    102      7   2545   2047    543      7    459     30    504    762
    82      8      5   4706    245     11    230     64     40  15800    174
    90     24     70   3383      5     88     55     10      4   5995    253
    10  22000      8    229    155      8      2    176      2     61    690
    42      4      2      6      2      2    648     15    267      6      9
   458     86   5462  10027   3337    118   1057    976  18280   1590  10197

Columns 11 to 21 
     3     17     49      3    145      3      3     54     17      2      2
    27     18     10   2359     18     22    511    145     14    130    592
    31     10  14042    171     39      7     18      7    176  15220    998
    39     39    163      5     15      4    135    100      2     55      2
   308     61     77     21    240      2     16      2   1389     81  17071
     2     79    4

In [137]:
# A Multi-Layer Perceptron (MLP)
class MLPClassifier(nn.Module): # inheriting from nn.Module!
    
    def __init__(self, input_size, embedding_dim, hidden_dim, num_labels):
        super(MLPClassifier, self).__init__()
        
        # Define the parameters that you will need.  
        # You need an embedding matrix, parameters for affine mappings and ReLus
        # Pay attention to dimensions!
        
        self.embed = nn.Embedding(input_size, embedding_dim, padding_idx=0)
        self.dropout = nn.Dropout(p=0.5)
            
        self.linear_1 = nn.Linear(2*embedding_dim, hidden_dim) 
        self.linear_2 = nn.Linear(hidden_dim, hidden_dim)
        self.linear_3 = nn.Linear(hidden_dim, num_labels)
        self.init_weights()
        
    def forward(self, prem, hypo):
        # Pass the input through your layers in order
        emb_prem = self.embed(prem).mean(1)
        emb_hypo = self.embed(hypo).mean(1)
        emb_concat = torch.cat([emb_prem, emb_hypo],1)
        out = self.dropout(emb_concat)
        out = F.relu(self.linear_1(out))
        out = F.relu(self.linear_2(out))
        out = self.dropout(self.linear_3(out))
        return F.log_softmax(out)

    def init_weights(self):
        initrange = 0.1
        lin_layers = [self.linear_1, self.linear_2]
        em_layer = [self.embed]
     
        for layer in lin_layers+em_layer:
            layer.weight.data.uniform_(-initrange, initrange)
            if layer in lin_layers:
                layer.bias.data.fill_(0)

In [145]:
def training_loop(model, loss, optimizer, train_iter, dev_iter):
    step = 0
    for i in range(num_train_steps):
        model.train()
        for batch in train_iter:
            premise = batch.premise.transpose(0,1)
            hypothesis = batch.hypothesis.transpose(0,1)
            labels = batch.label-1
            model.zero_grad()
            output = model(premise, hypothesis)
            lossy = loss(output, labels)
            #print(lossy)
            lossy.backward()
            optimizer.step()

            if step % 10 == 0:
                print( "Step %i; Loss %f; Dev acc %f" 
                %(step, lossy.data[0], evaluate(model, dev_iter)))

            step += 1

In [149]:
def evaluate(model, data_iter):
    model.eval()
    correct = 0
    total = 0
    for batch in data_iter:
        premise = batch.premise.transpose(0,1)
        hypothesis = batch.hypothesis.transpose(0,1)
        labels = (batch.label-1).data
        output = model(premise, hypothesis)
        _, predicted = torch.max(output.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum()
    model.train()
    return correct / float(total)

In [150]:
vocab_size = len(inputs.vocab)
input_size = vocab_size
num_labels = 3
hidden_dim = 50
embedding_dim = 100
batch_size = 32
learning_rate = 0.004
num_train_steps = 1000

In [None]:
model = MLPClassifier(input_size, embedding_dim, hidden_dim, num_labels)
    
# Loss and Optimizer
loss = nn.CrossEntropyLoss()  
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
training_loop(model, loss, optimizer, train_iter, dev_iter)



Step 0; Loss 1.075891; Dev acc 0.328693
Step 10; Loss 1.115394; Dev acc 0.329709
Step 20; Loss 1.100434; Dev acc 0.334078
Step 30; Loss 1.096971; Dev acc 0.356127
Step 40; Loss 1.083404; Dev acc 0.354908
Step 50; Loss 1.081633; Dev acc 0.405609
Step 60; Loss 1.076163; Dev acc 0.401951
Step 70; Loss 1.141551; Dev acc 0.405304
Step 80; Loss 1.102221; Dev acc 0.486893
Step 90; Loss 1.043828; Dev acc 0.495529
Step 100; Loss 1.078658; Dev acc 0.488823
Step 110; Loss 1.090243; Dev acc 0.506198
Step 120; Loss 1.094956; Dev acc 0.525198
Step 130; Loss 0.953534; Dev acc 0.521540
Step 140; Loss 1.060090; Dev acc 0.517273
Step 150; Loss 1.090013; Dev acc 0.525097
Step 160; Loss 1.037443; Dev acc 0.525503
Step 170; Loss 1.068275; Dev acc 0.539016
Step 180; Loss 1.002581; Dev acc 0.528856
Step 190; Loss 1.052827; Dev acc 0.541963
Step 200; Loss 1.168307; Dev acc 0.554765
Step 210; Loss 0.993622; Dev acc 0.554867
Step 220; Loss 0.936447; Dev acc 0.545824
Step 230; Loss 0.991443; Dev acc 0.557712
Ste