In [1]:
#from google.colab import drive
#drive.mount('/content/gdrive')

In [1]:
import tensorflow as tf

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.model_selection import train_test_split

import unicodedata
import re
import numpy as np
import os
import io
import time

In [2]:
movie_line = "cornell movie-dialogs corpus/movie_lines.txt"
movie_convo = "cornell movie-dialogs corpus/movie_conversations.txt"

m_lines = open(movie_line , encoding='utf-8',errors='ignore').read().split('\n')
c_lines = open(movie_convo , encoding='utf-8',errors='ignore').read().split('\n')

convo_line = []
for lines in c_lines:
    _lines = lines.split(" +++$+++ ")[-1][1:-1].replace("'","").replace(" ","")
    convo_line.append(_lines.split(","))

id_line = {}
for lines in m_lines:
    _lines = lines.split(" +++$+++ ")
    if len(_lines) == 5:
        id_line[_lines[0]] = _lines[4]

questions = []
answers = []

for line in convo_line:
    for i in range(len(line) -1):
        questions.append(id_line[line[i]])
        answers.append(id_line[line[i+1]])

In [3]:
def preprocess_sentence(text):
    text = text.lower()
    
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"it's", "it is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "that is", text)
    text = re.sub(r"where's", "where is", text)
    text = re.sub(r"how's", "how is", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"n't", " not", text)
    text = re.sub(r"n'", "ng", text)
    text = re.sub(r"'bout", "about", text)
    text = re.sub(r"'til", "until", text)
    text = re.sub(r"  ","",text)
    text = re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,]", "", text)
    
    text = re.sub(r"([?.!,¿])", r" \1 ", text)
    text = re.sub(r'[" "]+', " ", text)
    text = re.sub(r"[^a-zA-Z?.!,¿]+", " ", text)

    text = text.strip()
    text = '<start> ' + text + ' <end>'
    return text

In [4]:
preprocess_sentence(questions[0])

'<start> can we make this quickroxanne korrine and andrew barrett are having an incredibly horrendous public break up on the quadagain <end>'

In [5]:
clean_questions = []
clean_answers = []

for q in questions:
    clean_questions.append(preprocess_sentence(q))
for a in answers:
    clean_answers.append(preprocess_sentence(a))

In [6]:
max_length = 20
min_length = 2
short_questions_temp = []
short_answers_temp = []

i = 0
for question in clean_questions:
    if len(question.split()) >= min_length and len(question.split()) <= max_length:
        short_questions_temp.append(question)
        short_answers_temp.append(clean_answers[i])
    i += 1

# Filter out the answers that are too short/long
shorted_q = []
shorted_a = []

i = 0
for answer in short_answers_temp:
    if len(answer.split()) >= min_length and len(answer.split()) <= max_length:
        shorted_a.append(answer)
        shorted_q.append(short_questions_temp[i])
    i += 1

In [7]:
def tokenize(lang):
    lang_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')
    lang_tokenizer.fit_on_texts(lang)

    tensor = lang_tokenizer.texts_to_sequences(lang)

    tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor,
                                                         padding='post')

    return tensor, lang_tokenizer

In [8]:
def load_dataset(inp_lang, targ_lang):
  # creating cleaned input, output pairs
    input_tensor, inp_lang_tokenizer = tokenize(inp_lang)
    target_tensor, targ_lang_tokenizer = tokenize(targ_lang)

    return input_tensor, target_tensor, inp_lang_tokenizer, targ_lang_tokenizer

In [9]:
input_tensor, target_tensor, inp_lang, targ_lang = load_dataset(shorted_q, shorted_a)

In [10]:
input_tensor.shape

(159650, 20)

In [11]:
target_tensor.shape

(159650, 20)

In [12]:
# Creating training and validation sets using an 80-20 split
input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor, target_tensor, test_size=0.2)

# Show length
print(len(input_tensor_train), len(target_tensor_train), len(input_tensor_val), len(target_tensor_val))

127720 127720 31930 31930


In [13]:
def convert(lang, tensor):
    for t in tensor:
        if t!=0:
            print("%d ----> %s" % (t, lang.index_word[t]))

In [14]:
print ("Input Language; index to word mapping")
convert(inp_lang, input_tensor_train[0])
print ()
print ("Target Language; index to word mapping")
convert(targ_lang, target_tensor_train[0])

Input Language; index to word mapping
1 ----> <start>
1902 ----> jamie
12517 ----> kurth
5078 ----> jonathan
5360 ----> edmunds
2 ----> <end>

Target Language; index to word mapping
1 ----> <start>
28 ----> my
170 ----> god
700 ----> nick
2 ----> <end>


In [15]:
BUFFER_SIZE = len(input_tensor_train)
BATCH_SIZE = 64
steps_per_epoch = len(input_tensor_train)//BATCH_SIZE
embedding_dim = 256
units = 1024
vocab_inp_size = len(inp_lang.word_index)+1
vocab_tar_size = len(targ_lang.word_index)+1

dataset = tf.data.Dataset.from_tensor_slices((input_tensor_train, target_tensor_train)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

In [16]:
example_input_batch, example_target_batch = next(iter(dataset))
example_input_batch.shape, example_target_batch.shape

(TensorShape([64, 20]), TensorShape([64, 20]))

In [17]:
class Encoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
        super(Encoder, self).__init__()
        self.batch_sz = batch_sz
        self.enc_units = enc_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(self.enc_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')

    def call(self, x, hidden):
        x = self.embedding(x)
        output, state = self.gru(x, initial_state = hidden)
        return output, state

    def initialize_hidden_state(self):
        return tf.zeros((self.batch_sz, self.enc_units))

In [18]:
encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)

# sample input
sample_hidden = encoder.initialize_hidden_state()
sample_output, sample_hidden = encoder(example_input_batch, sample_hidden)
print ('Encoder output shape: (batch size, sequence length, units) {}'.format(sample_output.shape))
print ('Encoder Hidden state shape: (batch size, units) {}'.format(sample_hidden.shape))

Encoder output shape: (batch size, sequence length, units) (64, 20, 1024)
Encoder Hidden state shape: (batch size, units) (64, 1024)


In [19]:
class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)

    def call(self, query, values):
    # query hidden state shape == (batch_size, hidden size)
    # query_with_time_axis shape == (batch_size, 1, hidden size)
    # values shape == (batch_size, max_len, hidden size)
    # we are doing this to broadcast addition along the time axis to calculate the score
        query_with_time_axis = tf.expand_dims(query, 1)

    # score shape == (batch_size, max_length, 1)
    # we get 1 at the last axis because we are applying score to self.V
    # the shape of the tensor before applying self.V is (batch_size, max_length, units)
        score = self.V(tf.nn.tanh(
            self.W1(query_with_time_axis) + self.W2(values)))

    # attention_weights shape == (batch_size, max_length, 1)
        attention_weights = tf.nn.softmax(score, axis=1)

    # context_vector shape after sum == (batch_size, hidden_size)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights

In [20]:
attention_layer = BahdanauAttention(10)
attention_result, attention_weights = attention_layer(sample_hidden, sample_output)

print("Attention result shape: (batch size, units) {}".format(attention_result.shape))
print("Attention weights shape: (batch_size, sequence_length, 1) {}".format(attention_weights.shape))

Attention result shape: (batch size, units) (64, 1024)
Attention weights shape: (batch_size, sequence_length, 1) (64, 20, 1)


In [21]:
class Decoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
        super(Decoder, self).__init__()
        self.batch_sz = batch_sz
        self.dec_units = dec_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(self.dec_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')
        self.fc = tf.keras.layers.Dense(vocab_size)

    # used for attention
        self.attention = BahdanauAttention(self.dec_units)

    def call(self, x, hidden, enc_output):
    # enc_output shape == (batch_size, max_length, hidden_size)
        context_vector, attention_weights = self.attention(hidden, enc_output)

    # x shape after passing through embedding == (batch_size, 1, embedding_dim)
        x = self.embedding(x)

    # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

    # passing the concatenated vector to the GRU
        output, state = self.gru(x)

    # output shape == (batch_size * 1, hidden_size)
        output = tf.reshape(output, (-1, output.shape[2]))

    # output shape == (batch_size, vocab)
        x = self.fc(output)

        return x, state, attention_weights

In [22]:
decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE)

sample_decoder_output, _, _ = decoder(tf.random.uniform((BATCH_SIZE, 1)),
                                      sample_hidden, sample_output)

print ('Decoder output shape: (batch_size, vocab size) {}'.format(sample_decoder_output.shape))

Decoder output shape: (batch_size, vocab size) (64, 50959)


In [23]:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask

    return tf.reduce_mean(loss_)

In [24]:
checkpoint_dir = './nmt_with_attention_movie/training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=encoder,
                                 decoder=decoder)

In [26]:
@tf.function
def train_step(inp, targ, enc_hidden):
    loss = 0

    with tf.GradientTape() as tape:
        enc_output, enc_hidden = encoder(inp, enc_hidden)

        dec_hidden = enc_hidden

        dec_input = tf.expand_dims([targ_lang.word_index['<start>']] * BATCH_SIZE, 1)

    # Teacher forcing - feeding the target as the next input
        for t in range(1, targ.shape[1]):
      # passing enc_output to the decoder
            predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)

          
            loss += loss_function(targ[:, t], predictions)

      # using teacher forcing
            dec_input = tf.expand_dims(targ[:, t], 1)

  
    batch_loss = (loss / int(targ.shape[1]))

    variables = encoder.trainable_variables + decoder.trainable_variables

    gradients = tape.gradient(loss, variables)

    optimizer.apply_gradients(zip(gradients, variables))

    return batch_loss

In [28]:
EPOCHS = 61

for epoch in range(EPOCHS):
  
    start = time.time()

    enc_hidden = encoder.initialize_hidden_state()
    total_loss = 0

    for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
        batch_loss = train_step(inp, targ, enc_hidden)
        total_loss += batch_loss

        if batch % 100 == 0:
            print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                   batch,
                                                   batch_loss.numpy()))
  # saving (checkpoint) the model every 2 epochs
    if epoch%10 == 0:
        checkpoint.save(file_prefix = checkpoint_prefix)

    print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                      total_loss / steps_per_epoch))
    print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

Epoch 1 Batch 0 Loss 4.4599
Epoch 1 Batch 100 Loss 2.6495
Epoch 1 Batch 200 Loss 2.3718
Epoch 1 Batch 300 Loss 2.0927
Epoch 1 Batch 400 Loss 2.1782
Epoch 1 Batch 500 Loss 2.4712
Epoch 1 Batch 600 Loss 2.2472
Epoch 1 Batch 700 Loss 2.0815
Epoch 1 Batch 800 Loss 2.1333
Epoch 1 Batch 900 Loss 2.1804
Epoch 1 Batch 1000 Loss 2.0736
Epoch 1 Batch 1100 Loss 1.6815
Epoch 1 Batch 1200 Loss 2.2273
Epoch 1 Batch 1300 Loss 2.2084
Epoch 1 Batch 1400 Loss 2.2673
Epoch 1 Batch 1500 Loss 2.1853
Epoch 1 Batch 1600 Loss 1.7654
Epoch 1 Batch 1700 Loss 2.2884
Epoch 1 Batch 1800 Loss 1.7410
Epoch 1 Batch 1900 Loss 1.9838
Epoch 1 Loss 2.2219
Time taken for 1 epoch 619.507169008255 sec

Epoch 2 Batch 0 Loss 2.0046
Epoch 2 Batch 100 Loss 1.7256
Epoch 2 Batch 200 Loss 1.8624
Epoch 2 Batch 300 Loss 1.7494
Epoch 2 Batch 400 Loss 2.0797
Epoch 2 Batch 500 Loss 1.9574
Epoch 2 Batch 600 Loss 2.1162
Epoch 2 Batch 700 Loss 1.9051
Epoch 2 Batch 800 Loss 1.8920
Epoch 2 Batch 900 Loss 2.0497
Epoch 2 Batch 1000 Loss 2.364

Epoch 13 Batch 200 Loss 1.0034
Epoch 13 Batch 300 Loss 1.1884
Epoch 13 Batch 400 Loss 1.0150
Epoch 13 Batch 500 Loss 0.9828
Epoch 13 Batch 600 Loss 1.1105
Epoch 13 Batch 700 Loss 0.9494
Epoch 13 Batch 800 Loss 1.0155
Epoch 13 Batch 900 Loss 0.9488
Epoch 13 Batch 1000 Loss 0.9140
Epoch 13 Batch 1100 Loss 0.9869
Epoch 13 Batch 1200 Loss 1.1031
Epoch 13 Batch 1300 Loss 1.0958
Epoch 13 Batch 1400 Loss 1.0469
Epoch 13 Batch 1500 Loss 1.0812
Epoch 13 Batch 1600 Loss 0.9983
Epoch 13 Batch 1700 Loss 0.9860
Epoch 13 Batch 1800 Loss 1.0148
Epoch 13 Batch 1900 Loss 1.1546
Epoch 13 Loss 1.0016
Time taken for 1 epoch 617.4965665340424 sec

Epoch 14 Batch 0 Loss 1.0265
Epoch 14 Batch 100 Loss 0.8882
Epoch 14 Batch 200 Loss 0.8830
Epoch 14 Batch 300 Loss 0.8035
Epoch 14 Batch 400 Loss 0.8302
Epoch 14 Batch 500 Loss 0.8704
Epoch 14 Batch 600 Loss 0.9871
Epoch 14 Batch 700 Loss 0.9409
Epoch 14 Batch 800 Loss 0.9995
Epoch 14 Batch 900 Loss 0.9523
Epoch 14 Batch 1000 Loss 1.0269
Epoch 14 Batch 1100 Loss 

Epoch 24 Loss 0.4538
Time taken for 1 epoch 616.5075840950012 sec

Epoch 25 Batch 0 Loss 0.3794
Epoch 25 Batch 100 Loss 0.3140
Epoch 25 Batch 200 Loss 0.2739
Epoch 25 Batch 300 Loss 0.4048
Epoch 25 Batch 400 Loss 0.4092
Epoch 25 Batch 500 Loss 0.4508
Epoch 25 Batch 600 Loss 0.4197
Epoch 25 Batch 700 Loss 0.3901
Epoch 25 Batch 800 Loss 0.4358
Epoch 25 Batch 900 Loss 0.4151
Epoch 25 Batch 1000 Loss 0.4163
Epoch 25 Batch 1100 Loss 0.3530
Epoch 25 Batch 1200 Loss 0.4784
Epoch 25 Batch 1300 Loss 0.4374
Epoch 25 Batch 1400 Loss 0.4661
Epoch 25 Batch 1500 Loss 0.4458
Epoch 25 Batch 1600 Loss 0.4544
Epoch 25 Batch 1700 Loss 0.4385
Epoch 25 Batch 1800 Loss 0.3998
Epoch 25 Batch 1900 Loss 0.5003
Epoch 25 Loss 0.4237
Time taken for 1 epoch 618.5713312625885 sec

Epoch 26 Batch 0 Loss 0.3529
Epoch 26 Batch 100 Loss 0.3424
Epoch 26 Batch 200 Loss 0.3604
Epoch 26 Batch 300 Loss 0.4514
Epoch 26 Batch 400 Loss 0.3721
Epoch 26 Batch 500 Loss 0.3113
Epoch 26 Batch 600 Loss 0.3135
Epoch 26 Batch 700 Loss

Epoch 36 Batch 1600 Loss 0.2338
Epoch 36 Batch 1700 Loss 0.2512
Epoch 36 Batch 1800 Loss 0.2471
Epoch 36 Batch 1900 Loss 0.2931
Epoch 36 Loss 0.2219
Time taken for 1 epoch 614.6444888114929 sec

Epoch 37 Batch 0 Loss 0.2020
Epoch 37 Batch 100 Loss 0.1895
Epoch 37 Batch 200 Loss 0.2883
Epoch 37 Batch 300 Loss 0.2062
Epoch 37 Batch 400 Loss 0.1588
Epoch 37 Batch 500 Loss 0.1786
Epoch 37 Batch 600 Loss 0.2655
Epoch 37 Batch 700 Loss 0.2627
Epoch 37 Batch 800 Loss 0.2267
Epoch 37 Batch 900 Loss 0.1638
Epoch 37 Batch 1000 Loss 0.1816
Epoch 37 Batch 1100 Loss 0.1733
Epoch 37 Batch 1200 Loss 0.2417
Epoch 37 Batch 1300 Loss 0.1939
Epoch 37 Batch 1400 Loss 0.2384
Epoch 37 Batch 1500 Loss 0.2319
Epoch 37 Batch 1600 Loss 0.2033
Epoch 37 Batch 1700 Loss 0.2429
Epoch 37 Batch 1800 Loss 0.2119
Epoch 37 Batch 1900 Loss 0.2025
Epoch 37 Loss 0.2114
Time taken for 1 epoch 613.204639673233 sec

Epoch 38 Batch 0 Loss 0.2022
Epoch 38 Batch 100 Loss 0.2334
Epoch 38 Batch 200 Loss 0.1489
Epoch 38 Batch 300 L

Epoch 48 Batch 1200 Loss 0.1659
Epoch 48 Batch 1300 Loss 0.1940
Epoch 48 Batch 1400 Loss 0.1296
Epoch 48 Batch 1500 Loss 0.1608
Epoch 48 Batch 1600 Loss 0.1433
Epoch 48 Batch 1700 Loss 0.1608
Epoch 48 Batch 1800 Loss 0.1213
Epoch 48 Batch 1900 Loss 0.1916
Epoch 48 Loss 0.1477
Time taken for 1 epoch 613.8163080215454 sec

Epoch 49 Batch 0 Loss 0.0850
Epoch 49 Batch 100 Loss 0.0979
Epoch 49 Batch 200 Loss 0.1201
Epoch 49 Batch 300 Loss 0.1329
Epoch 49 Batch 400 Loss 0.1123
Epoch 49 Batch 500 Loss 0.1212
Epoch 49 Batch 600 Loss 0.1556
Epoch 49 Batch 700 Loss 0.1415
Epoch 49 Batch 800 Loss 0.1388
Epoch 49 Batch 900 Loss 0.1313
Epoch 49 Batch 1000 Loss 0.1111
Epoch 49 Batch 1100 Loss 0.1986
Epoch 49 Batch 1200 Loss 0.1213
Epoch 49 Batch 1300 Loss 0.1314
Epoch 49 Batch 1400 Loss 0.1795
Epoch 49 Batch 1500 Loss 0.1676
Epoch 49 Batch 1600 Loss 0.1496
Epoch 49 Batch 1700 Loss 0.1700
Epoch 49 Batch 1800 Loss 0.2126
Epoch 49 Batch 1900 Loss 0.1609
Epoch 49 Loss 0.1447
Time taken for 1 epoch 613.2

Epoch 60 Batch 800 Loss 0.1380
Epoch 60 Batch 900 Loss 0.1230
Epoch 60 Batch 1000 Loss 0.1396
Epoch 60 Batch 1100 Loss 0.1652
Epoch 60 Batch 1200 Loss 0.1594
Epoch 60 Batch 1300 Loss 0.1086
Epoch 60 Batch 1400 Loss 0.1101
Epoch 60 Batch 1500 Loss 0.1832
Epoch 60 Batch 1600 Loss 0.1459
Epoch 60 Batch 1700 Loss 0.1481
Epoch 60 Batch 1800 Loss 0.1509
Epoch 60 Batch 1900 Loss 0.1236
Epoch 60 Loss 0.1223
Time taken for 1 epoch 612.675630569458 sec

Epoch 61 Batch 0 Loss 0.1248
Epoch 61 Batch 100 Loss 0.0719
Epoch 61 Batch 200 Loss 0.0887
Epoch 61 Batch 300 Loss 0.0692
Epoch 61 Batch 400 Loss 0.0936
Epoch 61 Batch 500 Loss 0.1302
Epoch 61 Batch 600 Loss 0.1223
Epoch 61 Batch 700 Loss 0.1170
Epoch 61 Batch 800 Loss 0.1413
Epoch 61 Batch 900 Loss 0.1204
Epoch 61 Batch 1000 Loss 0.1066
Epoch 61 Batch 1100 Loss 0.1387
Epoch 61 Batch 1200 Loss 0.1406
Epoch 61 Batch 1300 Loss 0.0926
Epoch 61 Batch 1400 Loss 0.1314
Epoch 61 Batch 1500 Loss 0.1124
Epoch 61 Batch 1600 Loss 0.1016
Epoch 61 Batch 1700 

In [25]:
def evaluate(sentence):
    attention_plot = np.zeros((max_length_targ, max_length_inp))

    sentence = preprocess_sentence(sentence)

    inputs = [inp_lang.word_index[i] for i in sentence.split(' ')]
    inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs],
                                                         maxlen=max_length_inp,
                                                         padding='post')
    inputs = tf.convert_to_tensor(inputs)

    result = ''

    hidden = [tf.zeros((1, units))]
    enc_out, enc_hidden = encoder(inputs, hidden)

    dec_hidden = enc_hidden
    dec_input = tf.expand_dims([targ_lang.word_index['<start>']], 0)

    for t in range(max_length_targ):
        predictions, dec_hidden, attention_weights = decoder(dec_input,
                                                         dec_hidden,
                                                         enc_out)

    # storing the attention weights to plot later on
        attention_weights = tf.reshape(attention_weights, (-1, ))
        attention_plot[t] = attention_weights.numpy()

        predicted_id = tf.argmax(predictions[0]).numpy()

        result += targ_lang.index_word[predicted_id] + ' '

        if targ_lang.index_word[predicted_id] == '<end>':
            return result, sentence, attention_plot

    # the predicted ID is fed back into the model
        dec_input = tf.expand_dims([predicted_id], 0)

    return result, sentence, attention_plot

In [35]:
def translate(sentence):
    result, sentence, _ = evaluate(sentence)

    print('Input: %s' % (sentence))
    print('Predicted translation: {}'.format(result))
    return result[:-7]
  #attention_plot = attention_plot[:len(result.split(' ')), :len(sentence.split(' '))]
  #plot_attention(attention_plot, sentence.split(' '), result.split(' '))

In [27]:
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
max_length_targ, max_length_inp = target_tensor.shape[1], input_tensor.shape[1]
print(max_length_targ, max_length_inp)

20 20


In [36]:
translate("What are you doing here?")

Input: <start> what are you doing here <end>
Predicted translation: i have come to protect my investment thank you <end> 


'i have come to protect my investment thank you'

In [34]:
translate("i do not get it")

Input: <start> i do not get it <end>
Predicted translation: it is poison gas this is the antidote drink it <end> 


In [35]:
translate("it just does not seem right")

Input: <start> it just does not seem right <end>
Predicted translation: do not fuck that guy that is why i do not know what the fuck are you gonna get the 


In [36]:
translate("guys can i talk to you")

Input: <start> guys can i talk to you <end>
Predicted translation: huh <end> 


In [28]:
questions = ['Hello!',
'How are you?',
'What’s your name?',
'When were you born?',
'Where are you from?',
'Are you a man or a woman?',
'Why are you here?',
'Okay bye!',
'See you later.',
'Happy birthday!',
'Have a nice day!',
'How old are you?',
'Would you like some tea?',
'What a nice day today!',
'How do you do?',
'Hey.',
'What’s up?',
'Good morning.',
'How are you doing?',
'Nice to meet you.',
'Thank you.',
'Where are you going?',
'Good luck!',
'Sounds good?',
'Talk to you later.',
'How can i help you?',
'I do not feel very well.',
'I miss you.',
'What are you going to do?',
'I do not understand.',
'Who is Bill clinton?',
'Is the sky blue or black?',
'Does a cat have a ear?',
'Can a cat fly?',
'How many legs does a cat have?',
'How many legs does a spider have?',
'What is the color of the sky?',
'What is the color of water?',
'How much is two plus two?',
'How much is ten add two?',
'What do you like to talk about?',
'What is your job?',
'Tell me something about you.',
'What do you think about coffee?\xa0',
'Do you like math?',
'Can you tell me a joke?',
'I really like the song. What do you think?',
'Do you like basketball?',
'What kind of music do you like best?',
'What’s your favorite sport?']

In [37]:
i = 0
answers = []
for q in questions:
    print(i)
    answers.append(translate(q))
    i = i + 1

0
Input: <start> hello <end>
Predicted translation: hello chris this is knox overstress <end> 
1
Input: <start> how are you <end>
Predicted translation: fine <end> 
2
Input: <start> what s your name <end>
Predicted translation: call me what have you been the wrong man who is business to meet your service sir <end> 
3
Input: <start> when were you born <end>
Predicted translation: that was another one <end> 
4
Input: <start> where are you from <end>
Predicted translation: helsinki which is the capital of finland <end> 
5
Input: <start> are you a man or a woman <end>
Predicted translation: all right what she is at the man mr case is at all this <end> 
6
Input: <start> why are you here <end>
Predicted translation: shitbela that is with the gun <end> 
7
Input: <start> okay bye <end>
Predicted translation: i will see you therebye <end> 
8
Input: <start> see you later <end>
Predicted translation: anyway you are crazy <end> 
9
Input: <start> happy birthday <end>
Predicted translation: what do 

In [43]:
import pandas as pd
d = {'questions': questions, 'answers': answers}
df = pd.DataFrame(data=d)

In [42]:
df.to_csv("cornell_results.csv", index=False, header=False)