In [61]:
from torchtext import data, datasets
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import re
import random

In [17]:
inputs = datasets.snli.ParsedTextField(lower=True)
answers = data.Field(sequential=False)

train, dev, test = datasets.SNLI.splits(inputs, answers)

inputs.build_vocab(train, dev, test)
answers.build_vocab(train)

train_iter, dev_iter, test_iter = data.BucketIterator.splits(
            (train, dev, test), batch_size=64, device=-1)

In [46]:
len(inputs.vocab)

64280

In [19]:
batch = next(iter(train_iter))

In [31]:
example = train[0]

In [36]:
train[3].hypothesis

['They', 'are', 'smiling', 'at', 'their', 'parents']

In [32]:
example.hypothesis

['A', 'person', 'is', 'training', 'his', 'horse', 'for', 'a', 'competition.']

In [29]:
batch = next(iter(train_iter))
print(batch.premise)
print(batch.hypothesis)
print(batch.label)

Variable containing:

Columns 0 to 10 
    17     56      3   4901      3      3      3   1677     52   6155     13
   177    102      7   2545   2047    543      7    459     30    504    762
    82      8      5   4706    245     11    230     64     40  15800    174
    90     24     70   3383      5     88     55     10      4   5995    253
    10  22000      8    229    155      8      2    176      2     61    690
    42      4      2      6      2      2    648     15    267      6      9
   458     86   5462  10027   3337    118   1057    976  18280   1590  10197

Columns 11 to 21 
     3     17     49      3    145      3      3     54     17      2      2
    27     18     10   2359     18     22    511    145     14    130    592
    31     10  14042    171     39      7     18      7    176  15220    998
    39     39    163      5     15      4    135    100      2     55      2
   308     61     77     21    240      2     16      2   1389     81  17071
     2     79    4

In [137]:
# A Multi-Layer Perceptron (MLP)
class MLPClassifier(nn.Module): # inheriting from nn.Module!
    
    def __init__(self, input_size, embedding_dim, hidden_dim, num_labels):
        super(MLPClassifier, self).__init__()
        
        # Define the parameters that you will need.  
        # You need an embedding matrix, parameters for affine mappings and ReLus
        # Pay attention to dimensions!
        
        self.embed = nn.Embedding(input_size, embedding_dim, padding_idx=0)
        self.dropout = nn.Dropout(p=0.5)
            
        self.linear_1 = nn.Linear(2*embedding_dim, hidden_dim) 
        self.linear_2 = nn.Linear(hidden_dim, hidden_dim)
        self.linear_3 = nn.Linear(hidden_dim, num_labels)
        self.init_weights()
        
    def forward(self, prem, hypo):
        # Pass the input through your layers in order
        emb_prem = self.embed(prem).mean(1)
        emb_hypo = self.embed(hypo).mean(1)
        emb_concat = torch.cat([emb_prem, emb_hypo],1)
        out = self.dropout(emb_concat)
        out = F.relu(self.linear_1(out))
        out = F.relu(self.linear_2(out))
        out = self.dropout(self.linear_3(out))
        return F.log_softmax(out)

    def init_weights(self):
        initrange = 0.1
        lin_layers = [self.linear_1, self.linear_2]
        em_layer = [self.embed]
     
        for layer in lin_layers+em_layer:
            layer.weight.data.uniform_(-initrange, initrange)
            if layer in lin_layers:
                layer.bias.data.fill_(0)

In [145]:
def training_loop(model, loss, optimizer, train_iter, dev_iter):
    step = 0
    for i in range(num_train_steps):
        model.train()
        for batch in train_iter:
            premise = batch.premise.transpose(0,1)
            hypothesis = batch.hypothesis.transpose(0,1)
            labels = batch.label-1
            model.zero_grad()
            output = model(premise, hypothesis)
            lossy = loss(output, labels)
            #print(lossy)
            lossy.backward()
            optimizer.step()

            if step % 10 == 0:
                print( "Step %i; Loss %f; Dev acc %f" 
                %(step, lossy.data[0], evaluate(model, dev_iter)))

            step += 1

In [146]:
def evaluate(model, data_iter):
    model.eval()
    correct = 0
    total = 0
    for batch in data_iter:
        premise = batch.premise.transpose(0,1)
        hypothesis = batch.hypothesis.transpose(0,1)
        labels = (batch.label-1).data
        output = model(premise, hypothesis)
        _, predicted = torch.max(output.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum()
    return correct / float(total)

In [147]:
vocab_size = len(inputs.vocab)
input_size = vocab_size
num_labels = 3
hidden_dim = 50
embedding_dim = 100
batch_size = 32
learning_rate = 0.004
num_train_steps = 1000

In [None]:
model = MLPClassifier(input_size, embedding_dim, hidden_dim, num_labels)
    
# Loss and Optimizer
loss = nn.CrossEntropyLoss()  
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
training_loop(model, loss, optimizer, train_iter, dev_iter)



Step 0; Loss 1.092246; Dev acc 0.338244
Step 10; Loss 1.095578; Dev acc 0.336720
Step 20; Loss 1.100651; Dev acc 0.333062
Step 30; Loss 1.102983; Dev acc 0.333062
Step 40; Loss 1.099342; Dev acc 0.417192
Step 50; Loss 1.084354; Dev acc 0.431721
Step 60; Loss 0.989143; Dev acc 0.442390
Step 70; Loss 1.022728; Dev acc 0.489941
Step 80; Loss 1.030023; Dev acc 0.525198
Step 90; Loss 1.037210; Dev acc 0.522150
Step 100; Loss 1.014148; Dev acc 0.527129
Step 110; Loss 0.787402; Dev acc 0.539728
Step 120; Loss 0.908419; Dev acc 0.553444
Step 130; Loss 0.925783; Dev acc 0.555273
Step 140; Loss 1.031158; Dev acc 0.562182
Step 150; Loss 1.003083; Dev acc 0.554257
Step 160; Loss 0.944038; Dev acc 0.571124
Step 170; Loss 0.829129; Dev acc 0.566856
Step 180; Loss 1.008304; Dev acc 0.571937
Step 190; Loss 0.907513; Dev acc 0.574375
Step 200; Loss 0.813880; Dev acc 0.566145
Step 210; Loss 1.010208; Dev acc 0.577525
Step 220; Loss 0.995342; Dev acc 0.570514
Step 230; Loss 0.905400; Dev acc 0.578744
Ste

Step 1940; Loss 0.843748; Dev acc 0.640825
Step 1950; Loss 0.861337; Dev acc 0.642959
Step 1960; Loss 0.763894; Dev acc 0.641231
Step 1970; Loss 0.632962; Dev acc 0.640723
