In [6]:
import json
from numpy import array
from numpy import asarray
from numpy import zeros

In [7]:
embeddings_index = dict()
f = open('/Users/satrajitmaitra/Downloads/glove.6B/glove.6B.50d.txt')
for line in f:
    values = line.split()
    word = values[0]
    coefs = asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()
print('Loaded %s word vectors.' % len(embeddings_index))

Loaded 400000 word vectors.


In [8]:
from nltk import word_tokenize
from nltk.corpus import stopwords
import string

stop = list(string.punctuation) + stopwords.words('english')

# Processing the data

In [9]:
def process_data(data):
    premises = []
    hypotheses = []
    labels = []
    for i in range(len(data)):
        premises.append(data[i]['sentence1'].encode('utf-8'))
        hypotheses.append(data[i]['sentence2'].encode('utf-8'))
        labels.append(data[i]['gold_label'])

    for i in range(len(premises)):
        premises[i] = [j for j in word_tokenize(premises[i]) if j not in stop]

    premise_encodings = []
    for i in range(len(premises)):
        encoding = [zeros(50)]*15
        for j in range(len(premises[i])):
            if embeddings_index.get(str(premises[i][j]).lower()) is not None:
                encoding[j] = embeddings_index.get(str(premises[i][j]).lower())
        premise_encodings.append(encoding)

    for i in range(len(hypotheses)):
        hypotheses[i] = [j for j in word_tokenize(hypotheses[i]) if j not in stop]

    hypotheses_encodings = []
    for i in range(len(hypotheses)):
        encoding = [zeros(50)]*15
        for j in range(len(hypotheses[i])):
            if embeddings_index.get(str(hypotheses[i][j]).lower()) is not None:
                encoding[j] = embeddings_index.get(str(hypotheses[i][j]).lower())
        hypotheses_encodings.append(encoding)

    x1 = premise_encodings
    x1 = [array(x1[i]).flatten() for i in range(len(x1))]

    x2 = hypotheses_encodings
    x2 = [array(x2[i]).flatten() for i in range(len(x2))]

    rounded_labels = []
    for i in range(len(labels)):
        if labels[i] == 'entailment':
            rounded_labels.append([1.0, 0.0, 0.0])
        elif labels[i] == 'contradiction':
            rounded_labels.append([0.0, 1.0, 0.0])
        else:
            rounded_labels.append([0.0, 0.0, 1.0])

    y = rounded_labels
    return x1, x2, y

# Load train dataset and create embeddings

In [10]:
with open('/Users/satrajitmaitra/NLP_projects/train.json', 'r') as f:
    data = json.load(f)

x1, x2, y = process_data(data)


  # This is added back by InteractiveShellApp.init_path()


# Load test dataset and create embeddings

In [11]:
with open('/Users/satrajitmaitra/NLP_projects/test.json', 'r') as f:
    data = json.load(f)

test_x1, test_x2, test_y = process_data(data)

  # This is added back by InteractiveShellApp.init_path()


# Load dev dataset and create embeddings

In [12]:
with open('/Users/satrajitmaitra/NLP_projects/dev.json', 'r') as f:
    data = json.load(f)

dev_x1, dev_x2, dev_y = process_data(data)

  # This is added back by InteractiveShellApp.init_path()


# Model definition

In [13]:
from __future__ import print_function
import torch
import torch.nn as nn

# Hyper-parameters 
f_input_size = 750
f_hidden_size = 500
f_output_size = 100

g_input_size = 200
g_hidden_size = 50
g_output_size = 3


num_epochs = 10
learning_rate = 0.0001


class NeuralNet(nn.Module):
    def __init__(self, f_input_size, f_hidden_size, f_output_size, g_input_size, g_hidden_size, g_output_size):
        super(NeuralNet, self).__init__()
        self.fc11 = nn.Linear(f_input_size, f_hidden_size)
        self.fc12 = nn.Linear(f_input_size, f_hidden_size)
        self.relu = nn.ReLU()
        self.fc21 = nn.Linear(f_hidden_size, f_output_size)
        self.fc22 = nn.Linear(f_hidden_size, f_output_size)
        self.gc1 = nn.Linear(g_input_size, g_hidden_size) 
        self.relu = nn.ReLU()
        self.gc2 = nn.Linear(g_hidden_size, g_output_size) 
        self.softmax = nn.Softmax()
    
    def forward(self, x1, x2):
        outf1 = self.fc11(x1)
        outf2 = self.fc12(x2)
        outf1 = self.relu(outf1)
        outf2 = self.relu(outf2)
        outf1 = self.fc21(outf1)
        outf2 = self.fc22(outf2)
        outf = torch.cat([outf1, outf2], dim=0)
        outg = self.gc1(outf)
        outg = self.relu(outg)
        outg = self.gc2(outg)
        outg = self.softmax(outg)
        return outg


model = NeuralNet(f_input_size, f_hidden_size, f_output_size, g_input_size, g_hidden_size, g_output_size)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training the model

In [90]:
total_step = 10000
for epoch in range(num_epochs):
    for i in range(10000):
        outputg = model(torch.tensor(x1[i]).float(), torch.tensor(x2[i]).float())

        loss = nn.functional.mse_loss(outputg, torch.tensor(y[i]))
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 500 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))



Epoch [1/10], Step [500/10000], Loss: 0.2688
Epoch [1/10], Step [1000/10000], Loss: 0.1879
Epoch [1/10], Step [1500/10000], Loss: 0.1554
Epoch [1/10], Step [2000/10000], Loss: 0.1940
Epoch [1/10], Step [2500/10000], Loss: 0.2852
Epoch [1/10], Step [3000/10000], Loss: 0.2125
Epoch [1/10], Step [3500/10000], Loss: 0.2512
Epoch [1/10], Step [4000/10000], Loss: 0.2394
Epoch [1/10], Step [4500/10000], Loss: 0.2060
Epoch [1/10], Step [5000/10000], Loss: 0.2383
Epoch [1/10], Step [5500/10000], Loss: 0.1870
Epoch [1/10], Step [6000/10000], Loss: 0.2166
Epoch [1/10], Step [6500/10000], Loss: 0.1936
Epoch [1/10], Step [7000/10000], Loss: 0.1455
Epoch [1/10], Step [7500/10000], Loss: 0.2046
Epoch [1/10], Step [8000/10000], Loss: 0.2650
Epoch [1/10], Step [8500/10000], Loss: 0.2245
Epoch [1/10], Step [9000/10000], Loss: 0.2436
Epoch [1/10], Step [9500/10000], Loss: 0.2084
Epoch [1/10], Step [10000/10000], Loss: 0.0725
Epoch [2/10], Step [500/10000], Loss: 0.4042
Epoch [2/10], Step [1000/10000], Lo

Epoch [9/10], Step [10000/10000], Loss: 0.0000
Epoch [10/10], Step [500/10000], Loss: 0.0178
Epoch [10/10], Step [1000/10000], Loss: 0.1959
Epoch [10/10], Step [1500/10000], Loss: 0.1037
Epoch [10/10], Step [2000/10000], Loss: 0.0263
Epoch [10/10], Step [2500/10000], Loss: 0.0000
Epoch [10/10], Step [3000/10000], Loss: 0.0000
Epoch [10/10], Step [3500/10000], Loss: 0.0000
Epoch [10/10], Step [4000/10000], Loss: 0.0005
Epoch [10/10], Step [4500/10000], Loss: 0.0002
Epoch [10/10], Step [5000/10000], Loss: 0.0001
Epoch [10/10], Step [5500/10000], Loss: 0.0000
Epoch [10/10], Step [6000/10000], Loss: 0.0000
Epoch [10/10], Step [6500/10000], Loss: 0.1140
Epoch [10/10], Step [7000/10000], Loss: 0.1307
Epoch [10/10], Step [7500/10000], Loss: 0.0287
Epoch [10/10], Step [8000/10000], Loss: 0.0000
Epoch [10/10], Step [8500/10000], Loss: 0.0000
Epoch [10/10], Step [9000/10000], Loss: 0.0010
Epoch [10/10], Step [9500/10000], Loss: 0.0000
Epoch [10/10], Step [10000/10000], Loss: 0.0000


# Testing on test data

In [91]:
with torch.no_grad():
    correct = 0
    total = 1000.0
    for i in range(1000):
        outputg = model(torch.tensor(x1[i]).float(), torch.tensor(x2[i]).float())
        
        predicted = torch.argmax(outputg.data)

        correct += (predicted == torch.argmax(torch.tensor(test_y[i]))).sum().item()

    print('Accuracy of the network on the 1000 test premises/hypotheses: {} %'.format(100 * correct / total))



Accuracy of the network on the 1000 test premises/hypotheses: 31.8 %


# Testing on train data

In [92]:
with torch.no_grad():
    correct = 0
    total = 10000.0
    for i in range(10000):
        outputg = model(torch.tensor(x1[i]).float(), torch.tensor(x2[i]).float())

        predicted = torch.argmax(outputg.data)

        correct += (predicted == torch.argmax(torch.tensor(y[i]))).sum().item()

    print('Accuracy of the network on the 10000 train premises/hypotheses: {} %'.format(100 * correct / total))



Accuracy of the network on the 10000 train premises/hypotheses: 88.32 %


# Testing on dev data

In [93]:
with torch.no_grad():
    correct = 0
    total = 1000.0
    for i in range(1000):
        outputg = model(torch.tensor(x1[i]).float(), torch.tensor(x2[i]).float())
        
        predicted = torch.argmax(outputg.data)

        correct += (predicted == torch.argmax(torch.tensor(dev_y[i]))).sum().item()

    print('Accuracy of the network on the 1000 dev premises/hypotheses: {} %'.format(100 * correct / total))



Accuracy of the network on the 1000 dev premises/hypotheses: 34.7 %


# epoch 10; lr = 0.0001

# f_output = 100, g_hidden = 50 (Test: 31.8, train: 88.32, dev: 34.7) MSEloss
# f_output = 100, g_hidden = 100 (Test: 31.5, train: 87.95, dev: 35.1) MSEloss
# f_output = 300, g_hidden = 100 (Test: 31.6, train: 83.65, dev: 34.8) MSEloss
# f_output = 300, g_hidden = 50 (Test: 30.7, train: 85.59, dev: 35.1) MSEloss
