We’ll start by doing all necessary imports, and we’ll let our Jupyter Notebook know that it should display graphs and images in the notebook itself.

In [1]:
%matplotlib inline
#Reference : https://www.oreilly.com/learning/textual-entailment-with-tensorflow
import time
start_time = time.time()
print("--- %s seconds ---" % (time.time() - start_time))
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import urllib
import sys
import os
import zipfile
learning_rate = 0.0001


--- 4.410743713378906e-05 seconds ---


In [2]:
glove_zip_file = "glove.6B.zip"
glove_vectors_file = "glove.6B.50d.txt"

snli_zip_file = "snli_1.0.zip"
snli_dev_file = "snli_1.0_dev.txt"
snli_full_dataset_file = "snli_1.0_train.txt"

In [3]:
from six.moves.urllib.request import urlretrieve
    
#large file - 862 MB
if (not os.path.isfile(glove_zip_file) and
    not os.path.isfile(glove_vectors_file)):
    urlretrieve ("http://nlp.stanford.edu/data/glove.6B.zip", 
                 glove_zip_file)

#medium-sized file - 94.6 MB
if (not os.path.isfile(snli_zip_file) and
    not os.path.isfile(snli_dev_file)):
    urlretrieve ("https://nlp.stanford.edu/projects/snli/snli_1.0.zip", 
                 snli_zip_file)

In [4]:
def unzip_single_file(zip_file_name, output_file_name):
    """
        If the outFile is already created, don't recreate
        If the outFile does not exist, create it from the zipFile
    """
    if not os.path.isfile(output_file_name):
        with open(output_file_name, 'wb') as out_file:
            with zipfile.ZipFile(zip_file_name) as zipped:
                for info in zipped.infolist():
                    if output_file_name in info.filename:
                        with zipped.open(info) as requested_file:
                            out_file.write(requested_file.read())
                            return

unzip_single_file(glove_zip_file, glove_vectors_file)
unzip_single_file(snli_zip_file, snli_dev_file)
# unzip_single_file(snli_zip_file, snli_full_dataset_file)

---

Now that we have our GloVe vectors downloaded, we can load them into memory, deserializing the space separated format into a Python dictionary:

In [5]:
glove_wordmap = {}
with open(glove_vectors_file, "r") as glove:
    for line in glove:
        name, vector = tuple(line.split(" ", 1))
        glove_wordmap[name] = np.fromstring(vector, sep=" ")

---

Once we have our words, we need our input to contain entire sentences and process it through a neural network. Let's start with making the sequence:

In [6]:
def sentence2sequence(sentence):
    """
     
    - Turns an input sentence into an (n,d) matrix, 
        where n is the number of tokens in the sentence
        and d is the number of dimensions each word vector has.
    
      Tensorflow doesn't need to be used here, as simply
      turning the sentence into a sequence based off our 
      mapping does not need the computational power that
      Tensorflow provides. Normal Python suffices for this task.
    """
    tokens = sentence.lower().split(" ")
    rows = []
    words = []
    #Greedy search for tokens
    for token in tokens:
        i = len(token)
        while len(token) > 0 and i > 0:
            word = token[:i]
            if word in glove_wordmap:
                rows.append(glove_wordmap[word])
                words.append(word)
                token = token[i:]
                i = len(token)
            else:
                i = i-1
    return rows, words

In [7]:
#Constants setup
max_hypothesis_length, max_evidence_length = 30, 30
batch_size, vector_size, hidden_size = 128, 50, 64

lstm_size = hidden_size

weight_decay = 0.0001



input_p, output_p = 0.5, 0.5

training_iterations_count = 100000

display_step = 10

def score_setup(row):
    convert_dict = {
      'entailment': 0,
      'neutral': 1,
      'contradiction': 2
    }
    score = np.zeros((3,))
    for x in range(1,6):
        tag = row["label"+str(x)]
        if tag in convert_dict: score[convert_dict[tag]] += 1
    return score / (1.0*np.sum(score))

def fit_to_size(matrix, shape):
    res = np.zeros(shape)
    slices = [slice(0,min(dim,shape[e])) for e, dim in enumerate(matrix.shape)]
    res[slices] = matrix[slices]
    return res


In [8]:
def split_data_into_scores():
    import csv
    with open("snli_1.0_dev.txt","r") as data:
        train = csv.DictReader(data, delimiter='\t')
        evi_sentences = []
        hyp_sentences = []
        labels = []
        scores = []
        for row in train:
            hyp_sentences.append(np.vstack(
                    sentence2sequence(row["sentence1"].lower())[0]))
            evi_sentences.append(np.vstack(
                    sentence2sequence(row["sentence2"].lower())[0]))
            labels.append(row["gold_label"])
            scores.append(score_setup(row))
        
        hyp_sentences = np.stack([fit_to_size(x, (max_hypothesis_length, vector_size))
                          for x in hyp_sentences])
        evi_sentences = np.stack([fit_to_size(x, (max_evidence_length, vector_size))
                          for x in evi_sentences])
                                 
        return (hyp_sentences, evi_sentences), labels, np.array(scores)
    
data_feature_list, correct_values, correct_scores = split_data_into_scores()

l_h, l_e = max_hypothesis_length, max_evidence_length
N, D, H = batch_size, vector_size, hidden_size
l_seq = l_h + l_e


In [9]:
tf.reset_default_graph()

In [10]:
lstm = tf.contrib.rnn.GRUCell(lstm_size)

In [11]:
lstm_drop =  tf.contrib.rnn.DropoutWrapper(lstm, input_p, output_p)

In [12]:
# N: The number of elements in each of our batches, 
#   which we use to train subsets of data for efficiency's sake.
# l_h: The maximum length of a hypothesis, or the second sentence.  This is
#   used because training an RNN is extraordinarily difficult without 
#   rolling it out to a fixed length.
# l_e: The maximum length of evidence, the first sentence.  This is used
#   because training an RNN is extraordinarily difficult without 
#   rolling it out to a fixed length.
# D: The size of our used GloVe or other vectors.
hyp = tf.placeholder(tf.float32, [N, l_h, D], 'hypothesis')
evi = tf.placeholder(tf.float32, [N, l_e, D], 'evidence')
y = tf.placeholder(tf.float32, [N, 3], 'label')
print('y',y)
# hyp: Where the hypotheses will be stored during training.
# evi: Where the evidences will be stored during training.
# y: Where correct scores will be stored during training.

# lstm_size: the size of the gates in the LSTM, 
#    as in the first LSTM layer's initialization.
lstm_back = tf.contrib.rnn.GRUCell(lstm_size)
# lstm_back:  The LSTM used for looking backwards 
#   through the sentences, similar to lstm.

# input_p: the probability that inputs to the LSTM will be retained at each
#   iteration of dropout.
# output_p: the probability that outputs from the LSTM will be retained at 
#   each iteration of dropout.
lstm_drop_back = tf.contrib.rnn.DropoutWrapper(lstm_back, input_p, output_p)
# lstm_drop_back:  A dropout wrapper for lstm_back, like lstm_drop.


fc_initializer = tf.random_normal_initializer(stddev=0.1) 
# fc_initializer: initial values for the fully connected layer's weights.
# hidden_size: the size of the outputs from each lstm layer.  
#   Multiplied by 2 to account for the two LSTMs.
fc_weight = tf.get_variable('fc_weight', [2*hidden_size, 3], 
                            initializer = fc_initializer)
# fc_weight: Storage for the fully connected layer's weights.
fc_bias = tf.get_variable('bias', [3])
# fc_bias: Storage for the fully connected layer's bias.

# tf.GraphKeys.REGULARIZATION_LOSSES:  A key to a collection in the graph
#   designated for losses due to regularization.
#   In this case, this portion of loss is regularization on the weights
#   for the fully connected layer.
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, 
                     tf.nn.l2_loss(fc_weight)) 

x = tf.concat([hyp, evi], 1) # N, (Lh+Le), d
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2]) # (Le+Lh), N, d
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, vector_size]) # (Le+Lh)*N, d
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(x, l_seq,)

# x: the inputs to the bidirectional_rnn


# tf.contrib.rnn.static_bidirectional_rnn: Runs the input through
#   two recurrent networks, one that runs the inputs forward and one
#   that runs the inputs in reversed order, combining the outputs.
rnn_outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(lstm, lstm_back,
                                                            x, dtype=tf.float32)
# rnn_outputs: the list of LSTM outputs, as a list. 
#   What we want is the latest output, rnn_outputs[-1]

classification_scores = tf.matmul(rnn_outputs[-1], fc_weight) + fc_bias
print('c',classification_scores)
# The scores are relative certainties for how likely the output matches
#   a certain entailment: 
#     0: Positive entailment
#     1: Neutral entailment
#     2: Negative entailment

y Tensor("label:0", shape=(128, 3), dtype=float32)
c Tensor("add:0", shape=(128, 3), dtype=float32)


In [13]:
with tf.variable_scope('Accuracy'):
    predicts = tf.cast(tf.argmax(classification_scores, 1), 'int32')
    y_label = tf.cast(tf.argmax(y, 1), 'int32')
    corrects = tf.equal(predicts, y_label)
    num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
    accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

with tf.variable_scope("loss"):
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
        logits = classification_scores, labels = y)
    loss = tf.reduce_mean(cross_entropy)
    total_loss = loss + weight_decay * tf.add_n(
        tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
with tf.variable_scope('logging'):
    tf.summary.scalar('Accuracy', accuracy)
    summary = tf.summary.merge_all()

saver = tf.train.Saver()

optimizer = tf.train.GradientDescentOptimizer(learning_rate)

opt_op = optimizer.minimize(total_loss)

In [14]:
# Initialize variables
init = tf.global_variables_initializer()

# Use TQDM if installed
tqdm_installed = False
try:
    from tqdm import tqdm
    tqdm_installed = True
except:
    pass

# Launch the Tensorflow session
sess = tf.Session()
sess.run(init)
training_writer = tf.summary.FileWriter('./logs/training', sess.graph)
#sess.run(y)
#sess.run(classification_scores)

# training_iterations_count: The number of data pieces to train on in total
# batch_size: The number of data pieces per batch
training_iterations = range(0,training_iterations_count,batch_size)
if tqdm_installed:
    # Add a progress bar if TQDM is installed
    training_iterations = tqdm(training_iterations)

for i in training_iterations:

    # Select indices for a random data subset
    batch = np.random.randint(data_feature_list[0].shape[0], size=batch_size)
    
    # Use the selected subset indices to initialize the graph's 
    #   placeholder values
    hyps, evis, ys = (data_feature_list[0][batch,:],
                      data_feature_list[1][batch,:],
                      correct_scores[batch])
    
    # Run the optimization with these initialized values
    sess.run([opt_op], feed_dict={hyp: hyps, evi: evis, y: ys})
    # display_step: how often the accuracy and loss should 
    #   be tested and displayed.
    if (i/batch_size) % display_step == 0:
        # Calculate batch accuracy
        acc,training_summary = sess.run([accuracy,summary] ,feed_dict={hyp: hyps, evi: evis, y: ys})
        # Calculate batch loss
        tmp_loss = sess.run(loss, feed_dict={hyp: hyps, evi: evis, y: ys})
        training_writer.add_summary(training_summary, i/batch_size)
        # Display results
        print("Iter " + str(i/batch_size) + ", Minibatch Loss= " + \
              "{:.6f}".format(tmp_loss) + ", Training Accuracy= " + \
              "{:.5f}".format(acc))
        print('c',predicts)
        print('y',y_label)
        save_path = saver.save(sess, "logs/trained_model.ckpt")

  0%|          | 0/782 [00:00<?, ?it/s]

Iter 0.0, Minibatch Loss= 1.237018, Training Accuracy= 0.36719
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


  1%|          | 9/782 [00:03<05:12,  2.47it/s]

Iter 10.0, Minibatch Loss= 1.420697, Training Accuracy= 0.22656
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


  3%|▎         | 20/782 [00:04<02:49,  4.50it/s]

Iter 20.0, Minibatch Loss= 1.369558, Training Accuracy= 0.30469
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


  4%|▎         | 28/782 [00:05<02:21,  5.34it/s]

Iter 30.0, Minibatch Loss= 1.311908, Training Accuracy= 0.29688
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


  5%|▌         | 40/782 [00:06<01:51,  6.65it/s]

Iter 40.0, Minibatch Loss= 1.306388, Training Accuracy= 0.32031
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


  6%|▌         | 48/782 [00:06<01:42,  7.16it/s]

Iter 50.0, Minibatch Loss= 1.293746, Training Accuracy= 0.33594
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


  8%|▊         | 60/782 [00:07<01:30,  7.95it/s]

Iter 60.0, Minibatch Loss= 1.321059, Training Accuracy= 0.36719
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


  9%|▉         | 69/782 [00:08<01:25,  8.37it/s]

Iter 70.0, Minibatch Loss= 1.389376, Training Accuracy= 0.26562
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 10%|▉         | 77/782 [00:09<01:22,  8.53it/s]

Iter 80.0, Minibatch Loss= 1.303764, Training Accuracy= 0.32812
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 11%|█         | 86/782 [00:09<01:18,  8.83it/s]

Iter 90.0, Minibatch Loss= 1.305147, Training Accuracy= 0.33594
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 13%|█▎        | 98/782 [00:10<01:14,  9.19it/s]

Iter 100.0, Minibatch Loss= 1.319216, Training Accuracy= 0.35938
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 14%|█▍        | 110/782 [00:11<01:12,  9.25it/s]

Iter 110.0, Minibatch Loss= 1.266755, Training Accuracy= 0.35938
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 15%|█▌        | 119/782 [00:12<01:12,  9.19it/s]

Iter 120.0, Minibatch Loss= 1.304309, Training Accuracy= 0.32812
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 17%|█▋        | 130/782 [00:13<01:09,  9.41it/s]

Iter 130.0, Minibatch Loss= 1.369680, Training Accuracy= 0.28906
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 18%|█▊        | 138/782 [00:14<01:07,  9.48it/s]

Iter 140.0, Minibatch Loss= 1.292157, Training Accuracy= 0.35938
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 19%|█▉        | 150/782 [00:15<01:05,  9.71it/s]

Iter 150.0, Minibatch Loss= 1.340553, Training Accuracy= 0.34375
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 20%|██        | 157/782 [00:16<01:04,  9.67it/s]

Iter 160.0, Minibatch Loss= 1.275220, Training Accuracy= 0.35938
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 22%|██▏       | 169/782 [00:17<01:02,  9.87it/s]

Iter 170.0, Minibatch Loss= 1.271137, Training Accuracy= 0.33594
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 23%|██▎       | 180/782 [00:17<01:00, 10.02it/s]

Iter 180.0, Minibatch Loss= 1.394372, Training Accuracy= 0.31250
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 24%|██▍       | 187/782 [00:18<00:59,  9.99it/s]

Iter 190.0, Minibatch Loss= 1.325836, Training Accuracy= 0.31250
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 25%|██▌       | 199/782 [00:19<00:57, 10.16it/s]

Iter 200.0, Minibatch Loss= 1.339285, Training Accuracy= 0.31250
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 26%|██▋       | 207/782 [00:20<00:56, 10.18it/s]

Iter 210.0, Minibatch Loss= 1.259617, Training Accuracy= 0.37500
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 28%|██▊       | 219/782 [00:21<00:54, 10.29it/s]

Iter 220.0, Minibatch Loss= 1.362679, Training Accuracy= 0.27344
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 29%|██▉       | 230/782 [00:22<00:53, 10.38it/s]

Iter 230.0, Minibatch Loss= 1.296026, Training Accuracy= 0.35938
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 30%|███       | 237/782 [00:23<00:53, 10.28it/s]

Iter 240.0, Minibatch Loss= 1.332457, Training Accuracy= 0.34375
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 32%|███▏      | 249/782 [00:24<00:51, 10.31it/s]

Iter 250.0, Minibatch Loss= 1.256927, Training Accuracy= 0.35938
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 33%|███▎      | 260/782 [00:25<00:50, 10.37it/s]

Iter 260.0, Minibatch Loss= 1.301425, Training Accuracy= 0.35938
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 34%|███▍      | 267/782 [00:25<00:49, 10.35it/s]

Iter 270.0, Minibatch Loss= 1.328192, Training Accuracy= 0.33594
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 36%|███▌      | 278/782 [00:26<00:48, 10.40it/s]

Iter 280.0, Minibatch Loss= 1.415861, Training Accuracy= 0.24219
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 37%|███▋      | 289/782 [00:27<00:47, 10.36it/s]

Iter 290.0, Minibatch Loss= 1.251703, Training Accuracy= 0.37500
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 38%|███▊      | 300/782 [00:28<00:46, 10.40it/s]

Iter 300.0, Minibatch Loss= 1.297153, Training Accuracy= 0.30469
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 40%|███▉      | 310/782 [00:29<00:45, 10.38it/s]

Iter 310.0, Minibatch Loss= 1.196084, Training Accuracy= 0.40625
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 41%|████      | 317/782 [00:30<00:45, 10.30it/s]

Iter 320.0, Minibatch Loss= 1.411694, Training Accuracy= 0.21094
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 42%|████▏     | 326/782 [00:31<00:44, 10.35it/s]

Iter 330.0, Minibatch Loss= 1.326190, Training Accuracy= 0.32031
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 43%|████▎     | 336/782 [00:32<00:42, 10.41it/s]

Iter 340.0, Minibatch Loss= 1.331024, Training Accuracy= 0.31250
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 44%|████▍     | 346/782 [00:33<00:41, 10.47it/s]

Iter 350.0, Minibatch Loss= 1.308911, Training Accuracy= 0.27344
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 46%|████▌     | 360/782 [00:33<00:39, 10.60it/s]

Iter 360.0, Minibatch Loss= 1.265778, Training Accuracy= 0.35156
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 47%|████▋     | 369/782 [00:34<00:38, 10.63it/s]

Iter 370.0, Minibatch Loss= 1.242714, Training Accuracy= 0.38281
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 48%|████▊     | 377/782 [00:35<00:38, 10.65it/s]

Iter 380.0, Minibatch Loss= 1.321563, Training Accuracy= 0.36719
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 50%|████▉     | 389/782 [00:36<00:36, 10.72it/s]

Iter 390.0, Minibatch Loss= 1.272860, Training Accuracy= 0.35156
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 51%|█████     | 396/782 [00:37<00:36, 10.70it/s]

Iter 400.0, Minibatch Loss= 1.416404, Training Accuracy= 0.22656
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 52%|█████▏    | 406/782 [00:37<00:34, 10.75it/s]

Iter 410.0, Minibatch Loss= 1.324896, Training Accuracy= 0.25000
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 53%|█████▎    | 416/782 [00:38<00:33, 10.79it/s]

Iter 420.0, Minibatch Loss= 1.282333, Training Accuracy= 0.33594
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 55%|█████▍    | 430/782 [00:39<00:32, 10.90it/s]

Iter 430.0, Minibatch Loss= 1.274603, Training Accuracy= 0.37500
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 56%|█████▌    | 439/782 [00:40<00:31, 10.93it/s]

Iter 440.0, Minibatch Loss= 1.378520, Training Accuracy= 0.30469
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 57%|█████▋    | 447/782 [00:41<00:30, 10.90it/s]

Iter 450.0, Minibatch Loss= 1.350565, Training Accuracy= 0.29688
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 59%|█████▉    | 460/782 [00:41<00:29, 10.98it/s]

Iter 460.0, Minibatch Loss= 1.275560, Training Accuracy= 0.32812
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 60%|█████▉    | 468/782 [00:42<00:28, 10.97it/s]

Iter 470.0, Minibatch Loss= 1.302271, Training Accuracy= 0.35156
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 61%|██████    | 476/782 [00:43<00:27, 10.97it/s]

Iter 480.0, Minibatch Loss= 1.349419, Training Accuracy= 0.28125
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 62%|██████▏   | 486/782 [00:44<00:26, 11.01it/s]

Iter 490.0, Minibatch Loss= 1.326871, Training Accuracy= 0.29688
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 64%|██████▍   | 499/782 [00:44<00:25, 11.09it/s]

Iter 500.0, Minibatch Loss= 1.369215, Training Accuracy= 0.28906
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 65%|██████▍   | 507/782 [00:45<00:24, 11.07it/s]

Iter 510.0, Minibatch Loss= 1.367058, Training Accuracy= 0.33594
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 66%|██████▌   | 516/782 [00:46<00:23, 11.09it/s]

Iter 520.0, Minibatch Loss= 1.244105, Training Accuracy= 0.35938
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 67%|██████▋   | 526/782 [00:47<00:22, 11.13it/s]

Iter 530.0, Minibatch Loss= 1.237344, Training Accuracy= 0.38281
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 69%|██████▉   | 539/782 [00:48<00:21, 11.18it/s]

Iter 540.0, Minibatch Loss= 1.286118, Training Accuracy= 0.32812
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 70%|██████▉   | 547/782 [00:48<00:21, 11.19it/s]

Iter 550.0, Minibatch Loss= 1.315741, Training Accuracy= 0.32031
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 71%|███████   | 556/782 [00:49<00:20, 11.21it/s]

Iter 560.0, Minibatch Loss= 1.430840, Training Accuracy= 0.25781
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 72%|███████▏  | 566/782 [00:50<00:19, 11.26it/s]

Iter 570.0, Minibatch Loss= 1.309786, Training Accuracy= 0.30469
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 74%|███████▎  | 576/782 [00:51<00:18, 11.29it/s]

Iter 580.0, Minibatch Loss= 1.303807, Training Accuracy= 0.29688
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 75%|███████▍  | 586/782 [00:51<00:17, 11.31it/s]

Iter 590.0, Minibatch Loss= 1.239487, Training Accuracy= 0.39062
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 76%|███████▌  | 596/782 [00:52<00:16, 11.32it/s]

Iter 600.0, Minibatch Loss= 1.318200, Training Accuracy= 0.36719
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 77%|███████▋  | 606/782 [00:53<00:15, 11.35it/s]

Iter 610.0, Minibatch Loss= 1.257174, Training Accuracy= 0.35156
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 79%|███████▉  | 616/782 [00:54<00:14, 11.38it/s]

Iter 620.0, Minibatch Loss= 1.346040, Training Accuracy= 0.32031
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 81%|████████  | 630/782 [00:55<00:13, 11.42it/s]

Iter 630.0, Minibatch Loss= 1.219843, Training Accuracy= 0.35938
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 82%|████████▏ | 640/782 [00:56<00:12, 11.40it/s]

Iter 640.0, Minibatch Loss= 1.314506, Training Accuracy= 0.31250
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 83%|████████▎ | 650/782 [00:57<00:11, 11.39it/s]

Iter 650.0, Minibatch Loss= 1.245793, Training Accuracy= 0.35938
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 84%|████████▍ | 657/782 [00:57<00:11, 11.33it/s]

Iter 660.0, Minibatch Loss= 1.332906, Training Accuracy= 0.29688
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 86%|████████▌ | 670/782 [00:59<00:09, 11.35it/s]

Iter 670.0, Minibatch Loss= 1.320322, Training Accuracy= 0.32812
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 87%|████████▋ | 677/782 [00:59<00:09, 11.33it/s]

Iter 680.0, Minibatch Loss= 1.344002, Training Accuracy= 0.25000
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 88%|████████▊ | 689/782 [01:01<00:08, 11.28it/s]

Iter 690.0, Minibatch Loss= 1.340363, Training Accuracy= 0.31250
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 89%|████████▉ | 698/782 [01:02<00:07, 11.18it/s]

Iter 700.0, Minibatch Loss= 1.287017, Training Accuracy= 0.35938
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 91%|█████████ | 709/782 [01:03<00:06, 11.20it/s]

Iter 710.0, Minibatch Loss= 1.298590, Training Accuracy= 0.26562
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 92%|█████████▏| 717/782 [01:04<00:05, 11.17it/s]

Iter 720.0, Minibatch Loss= 1.245359, Training Accuracy= 0.38281
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 93%|█████████▎| 726/782 [01:05<00:05, 11.16it/s]

Iter 730.0, Minibatch Loss= 1.243121, Training Accuracy= 0.39062
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 95%|█████████▍| 739/782 [01:06<00:03, 11.19it/s]

Iter 740.0, Minibatch Loss= 1.230383, Training Accuracy= 0.35938
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 96%|█████████▌| 748/782 [01:06<00:03, 11.17it/s]

Iter 750.0, Minibatch Loss= 1.305895, Training Accuracy= 0.32031
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 97%|█████████▋| 760/782 [01:07<00:01, 11.22it/s]

Iter 760.0, Minibatch Loss= 1.244093, Training Accuracy= 0.36719
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 98%|█████████▊| 768/782 [01:08<00:01, 11.22it/s]

Iter 770.0, Minibatch Loss= 1.212554, Training Accuracy= 0.40625
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


 99%|█████████▉| 776/782 [01:09<00:00, 11.22it/s]

Iter 780.0, Minibatch Loss= 1.280082, Training Accuracy= 0.39062
c Tensor("Accuracy/Cast:0", shape=(128,), dtype=int32)
y Tensor("Accuracy/Cast_1:0", shape=(128,), dtype=int32)


100%|██████████| 782/782 [01:09<00:00, 11.20it/s]


In [15]:
evidences = ["I will work."]

hypotheses = ["I will study."]

sentence1 = [fit_to_size(np.vstack(sentence2sequence(evidence)[0]),
                         (30, 50)) for evidence in evidences]

sentence2 = [fit_to_size(np.vstack(sentence2sequence(hypothesis)[0]),
                         (30,50)) for hypothesis in hypotheses]

prediction = sess.run(classification_scores, feed_dict={hyp: (sentence1 * N),
                                                        evi: (sentence2 * N),
                                                        y: [[0,0,0]]*N})
print(["Positive", "Neutral", "Negative"][np.argmax(prediction[0])]+
      " entailment")
end_time = time.time()
print("--- %s seconds ---" % (time.time() - start_time))


Neutral entailment
--- 106.50873398780823 seconds ---
