In [1]:
pip install tensorflow-gpu


Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install numpy




In [3]:
pip install matplotlib

Note: you may need to restart the kernel to use updated packages.


In [1]:
%matplotlib inline

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import urllib
import sys
import os
import zipfile

In [2]:
#used the global vectors for word representation (GloVe) which consists of the vector representations for words
glove_zip_file = "glove.6B.zip"

In [3]:
glove_vectors_file = "glove.6B.50d.txt"

In [4]:
#excracting and unzipping the glove and SNLI zip files
from urllib.request import urlretrieve
if (not os.path.isfile(glove_zip_file) and
    not os.path.isfile(glove_vectors_file)):
    urlretrieve("http://nlp.stanford.edu/data/glove.6B.zip", glove_zip_file) 

In [5]:
snli_zip_file = "snli_1.0.zip"
snli_dev_file = "snli_1.0_dev.txt"
snli_full_dataset_file = "snli_1.0_train.txt"

In [6]:
if (not os.path.isfile(snli_zip_file) and
    not os.path.isfile(snli_dev_file)):
    urlretrieve ("https://nlp.stanford.edu/projects/snli/snli_1.0.zip", snli_zip_file)

In [7]:
#separating the words into a dictonary
def unzip_single_file(zip_file_name, output_file_name):
    if not os.path.isfile(output_file_name):
        with open(output_file_name, 'wb') as out_file:
            with zipfile.ZipFile(zip_file_name) as zipped:
                for info in zipped.infolist():
                    if output_file_name in info.filename:
                        with zipped.open(info) as requested_file:
                            out_file.write(requested_file.read())
                            return

unzip_single_file(glove_zip_file, glove_vectors_file)
unzip_single_file(snli_zip_file, snli_dev_file)

In [8]:

glove_wordmap = {}
with open(glove_vectors_file, "r",encoding='utf-8') as glove:
    for lines in glove:
        name, vector = tuple(lines.split(" ", 1))
        glove_wordmap[name] = np.fromstring(vector, sep=" ")

In [9]:

def sentence2sequence(sentence):
    tokens = sentence.lower().split(" ")
    rows = []
    words = []
    
    for token in tokens:
        i = len(token)
        while len(token) > 0 and i > 0:
            word = token[:i]
            if word in glove_wordmap:
                rows.append(glove_wordmap[word])
                words.append(word)
                token = token[i:]
                i = len(token)
            else:
                i = i-1
    return rows, words

In [10]:
#LSTM was used to make the model
max_hypothesis_length, max_evidence_length = 30, 30
batch_size, vector_size, hidden_size = 128, 50, 64

lstm_size = hidden_size

weight_decay = 0.0001

learning_rate = 1

input_p, output_p = 0.5, 0.5

training_iterations_count = 100000

display_step = 10

def score_setup(row):
    convert_dict = {
      'entailment': 0,
      'neutral': 1,
      'contradiction': 2
    }
    score = np.zeros((3,))
    for x in range(1,6):
        tag = row["label"+str(x)]
        if tag in convert_dict: score[convert_dict[tag]] += 1
    return score / (1.0*np.sum(score))

def fit_to_size(matrix, shape):
    res = np.zeros(shape)
    slices = [slice(0,min(dim,shape[e])) for e, dim in enumerate(matrix.shape)]
    res[slices] = matrix[slices]
    return res

In [11]:

def split_data_into_scores():
    import csv
    with open("snli_1.0_dev.txt","r") as data:
        train = csv.DictReader(data, delimiter='\t')
        evi_sentences = []
        hyp_sentences = []
        labels = []
        scores = []
        for row in train:
            hyp_sentences.append(np.vstack(
                    sentence2sequence(row["sentence1"].lower())[0]))
            evi_sentences.append(np.vstack(
                    sentence2sequence(row["sentence2"].lower())[0]))
            labels.append(row["gold_label"])
            scores.append(score_setup(row))
        
        hyp_sentences = np.stack([fit_to_size(x, (max_hypothesis_length, vector_size))
                          for x in hyp_sentences])
        evi_sentences = np.stack([fit_to_size(x, (max_evidence_length, vector_size))
                          for x in evi_sentences])
                                 
        return (hyp_sentences, evi_sentences), labels, np.array(scores)
    
data_feature_list, correct_values, correct_scores = split_data_into_scores()

l_h, l_e = max_hypothesis_length, max_evidence_length
N, D, H = batch_size, vector_size, hidden_size
l_seq = l_h + l_e

  res[slices] = matrix[slices]


In [44]:
tf.compat.v1.reset_default_graph()
lstm = tf.keras.layers.LSTMCell(lstm_size)

In [45]:
lstm_drop =  tf.compat.v1.nn.rnn_cell.DropoutWrapper(lstm, input_p, output_p)

In [46]:
tf.compat.v1.disable_eager_execution()
hyp = tf.compat.v1.placeholder(tf.float32, [N, l_h, D], 'hypothesis')
evi = tf.compat.v1.placeholder(tf.float32, [N, l_e, D], 'evidence')
y = tf.compat.v1.placeholder(tf.float32, [N, 3], 'label')
lstm_back = tf.keras.layers.LSTMCell(lstm_size)
lstm_drop_back = tf.compat.v1.nn.rnn_cell.DropoutWrapper(lstm_back, input_p, output_p)
fc_initializer = tf.random_normal_initializer(stddev=0.1) 



In [52]:
fc_weight = tf.compat.v1.get_variable('fc_weight', [2*hidden_size, 3], initializer = fc_initializer)
fc_bias = tf.compat.v1.get_variable('bias', [3])
tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(fc_weight)) 

In [53]:

x = tf.concat([hyp, evi], 1) 

x = tf.transpose(x, [1, 0, 2]) 

x = tf.reshape(x, [-1, vector_size])

x = tf.split(x, l_seq,)






In [54]:
#note: this line does not work properly sometimes i could not figure out why. It works after a few attempts
rnn_outputs, _, _= tf.compat.v1.nn.static_bidirectional_rnn(lstm, lstm_back, x,  dtype=tf.float32)

In [55]:
classification_scores = tf.matmul(rnn_outputs[-1], fc_weight) + fc_bias

In [56]:

with tf.compat.v1.variable_scope('Accuracy'):
    predicts = tf.cast(tf.argmax(classification_scores, 1), 'int32')
    y_label = tf.cast(tf.argmax(y, 1), 'int32')
    corrects = tf.equal(predicts, y_label)
    num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
    accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

with tf.compat.v1.variable_scope("loss"):
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
        logits = classification_scores, labels = y)
    loss = tf.reduce_mean(cross_entropy)
    total_loss = loss + weight_decay * tf.add_n(
        tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES))

optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate)

opt_op = optimizer.minimize(total_loss)

In [57]:
init = tf.compat.v1.global_variables_initializer()

# Launching the Tensorflow session
sess = tf.compat.v1.Session()
sess.run(init)

# training_iterations_count: The number of data pieces to train on in total
# batch_size: The number of data pieces per batch
training_iterations = range(0,training_iterations_count,batch_size)


for i in training_iterations:

    
    batch = np.random.randint(data_feature_list[0].shape[0], size=batch_size)
    
    hyps, evis, ys = (data_feature_list[0][batch,:],
                      data_feature_list[1][batch,:],
                      correct_scores[batch])
    

    sess.run([opt_op], feed_dict={hyp: hyps, evi: evis, y: ys})
   
    if (i/batch_size) % display_step == 0:
        # Calculating batch accuracy
        acc = sess.run(accuracy, feed_dict={hyp: hyps, evi: evis, y: ys})
        # Calculating batch loss
        tmp_loss = sess.run(loss, feed_dict={hyp: hyps, evi: evis, y: ys})
        
        print("Iter " + str(i/batch_size) + ", Minibatch Loss= " + \
              "{:.6f}".format(tmp_loss) + ", Training Accuracy= " + \
              "{:.5f}".format(acc))

Iter 0.0, Minibatch Loss= 1.101009, Training Accuracy= 0.35156
Iter 10.0, Minibatch Loss= 1.092380, Training Accuracy= 0.37500
Iter 20.0, Minibatch Loss= 1.073564, Training Accuracy= 0.45312
Iter 30.0, Minibatch Loss= 1.093903, Training Accuracy= 0.38281
Iter 40.0, Minibatch Loss= 1.086954, Training Accuracy= 0.45312
Iter 50.0, Minibatch Loss= 1.088913, Training Accuracy= 0.43750
Iter 60.0, Minibatch Loss= 1.088047, Training Accuracy= 0.42969
Iter 70.0, Minibatch Loss= 1.092216, Training Accuracy= 0.39844
Iter 80.0, Minibatch Loss= 1.092379, Training Accuracy= 0.32812
Iter 90.0, Minibatch Loss= 1.094183, Training Accuracy= 0.41406
Iter 100.0, Minibatch Loss= 1.080356, Training Accuracy= 0.44531
Iter 110.0, Minibatch Loss= 1.096282, Training Accuracy= 0.39844
Iter 120.0, Minibatch Loss= 1.068137, Training Accuracy= 0.45312
Iter 130.0, Minibatch Loss= 1.085275, Training Accuracy= 0.33594
Iter 140.0, Minibatch Loss= 1.077732, Training Accuracy= 0.43750
Iter 150.0, Minibatch Loss= 1.109047

In [58]:
#inputting the initial statement
first = input("Enter the first statement: ")

Enter the first statement: The boy likes playing sports 


In [59]:
print(first)

The boy likes playing sports 


In [60]:
#inputting the initial statement
hypo = input("Enter the hypothesis: ")

Enter the hypothesis: The boy is a singer


In [62]:

s1 = [first]

hypotheses = [hypo]

#based on the prediction, gives entailment, contradiction or neutral
sentence1 = [fit_to_size(np.vstack(sentence2sequence(evidence)[0]),
                         (30, 50)) for evidence in s1]

sentence2 = [fit_to_size(np.vstack(sentence2sequence(hypothesis)[0]),
                         (30,50)) for hypothesis in hypotheses]

prediction = sess.run(classification_scores, feed_dict={hyp: (sentence1 * N),
                                                        evi: (sentence2 * N),
                                                        y: [[0,0,0]]*N})
print(s1)
print(hypotheses)
print(["entailment", "Neutral", "Contradiction"][np.argmax(prediction[0])])

['The boy likes playing sports ']
['The boy is a singer']
Contradiction


  res[slices] = matrix[slices]
