<h3>Importing the Libraries</h3>

In [1]:
import numpy as np
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import re
import time

<h3>Data Preprocessing</h3>

In [2]:
lines = open("movie_lines.txt", encoding = "utf-8", errors = "ignore").read().split("\n")
conversations = open("movie_conversations.txt", encoding = "utf-8", errors = "ignore").read().split("\n")

<h4>Creating a dictionary that maps each line and its id</h4>

In [3]:
id2line = {}
for line in lines:
    _line = line.split(" +++$+++ ")
    if(len(_line) == 5):
        id2line[_line[0]] = _line[4]

<h4>Creating a list of all of the conversations</h4>

In [4]:
conversations_ids = []
for conversation in conversations[:-1]:
    _conversation = conversation.split(" +++$+++ ")[-1][1:-1].replace("'", "").replace(" ", "")
    conversations_ids.append(_conversation.split(","))

<h4>Getting Separately the Questions and Answers</h4>

In [5]:
questions = []
answers = []
for conversation in conversations_ids:
    for i in range(len(conversation) - 1):
        questions.append(id2line[conversation[i]])
        answers.append(id2line[conversation[i+1]])

<h4>First Cleaning of the Texts</h4>

In [6]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "what is", text)
    text = re.sub(r"where's", "where is", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'re", "are", text)
    text = re.sub(r"\'d", "would", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"[~()\"#/@;:<>{}+=-|.?,]", "", text)
    return text

<h4>Cleaning the Questions</h4>

In [7]:
clean_questions = []
for question in questions:
    clean_questions.append(clean_text(question))

<h4>Cleaning the Answers</h4>

In [8]:
clean_answers = []
for answer in answers:
    clean_answers.append(clean_text(answer))

<h4>Creating a dictionary that maps each word to its number of occurances</h4>

In [9]:
word2count = {}
for question in clean_questions:
    for word in question.split():
        if word not in word2count:
            word2count[word] = 1
        else:
            word2count[word] += 1
for answer in clean_answers:
    for word in answer.split():
        if word not in word2count:
            word2count[word] = 1
        else:
            word2count[word] += 1

<h4>Creating two Dictionaries that map the Questions Words and the Answers Words to a unique integer</h4>

In [10]:
threshold = 20
questionswords2int = {}
word_number = 0
for word, count in word2count.items():
    if count >= threshold:
        questionswords2int[word] = word_number
        word_number += 1
answerswords2int = {}
for word, count in word2count.items():
    if count >= threshold:
        answerswords2int[word] = word_number
        word_number += 1

<h4>Adding the Last Tokens to these two Dictionaries</h4>

In [11]:
tokens = ['<PAD>', '<EOS>', '<OUT>', '<SOS>']
for token in tokens:
    questionswords2int[token] = len(questionswords2int) + 1
for token in tokens:
    answerswords2int[token] = len(answerswords2int) + 1

<h4>Creating the inverse dictionary of the answerswords2int dictionary</h4>

In [12]:
answersints2word = {w_i: w for w, w_i in answerswords2int.items()}

<h4>Adding the End Of String token to the end of every answer</h4>

In [13]:
for i in range (len(clean_answers)):
    clean_answers[i] += ' <EOS>'

<h4>Translating all the questions and the answers into integers  and Replacing all the words that were filtered out </h4>

In [14]:
questions_to_int = []
for question in clean_questions:
    ints = []
    for word in question.split():
        if word not in questionswords2int:
            ints.append(questionswords2int['<OUT>'])
        else:
            ints.append(questionswords2int[word])
    questions_to_int.append(ints)
answers_to_int = []
for answer in clean_answers:
    ints = []
    for word in answer.split():
        if word not in answerswords2int:
            ints.append(answerswords2int['<OUT>'])
        else:
            ints.append(answerswords2int[word])
    answers_to_int.append(ints)

<h4>Sorting Questions and Answers by length of Questions</h4>

In [15]:
sorted_clean_questions = []
sorted_clean_answers = []
for length in range(1, 25 + 1):
    for i in enumerate(questions_to_int):
        if(len(i[1]) == length):
            sorted_clean_questions.append(questions_to_int[i[0]])
            sorted_clean_answers.append(answers_to_int[i[0]])

<h3>Building the Seq2seq Model</h3>

<h4>Creating placehoders for the input and the targets</h4>

In [16]:
def model_inputs():
    inputs = tf.compat.v1.placeholder(tf.int32, [None, None], name='input')
    targets = tf.compat.v1.placeholder(tf.int32, [None, None], name='target')
    lr = tf.compat.v1.placeholder(tf.float32, name='learning_rate')
    keep_prob = tf.compat.v1.placeholder(tf.float32, name='keep_prob')
    return inputs, targets, lr, keep_prob

<h4>Preprocessing the targets</h4>

In [17]:
def preprocess_targets(targets, word2int, batch_size):
    left_side = tf.fill([batch_size, 1], word2int['<SOS>'])
    right_side = tf.strided_slice(targets, [0, 0], [batch_size, -1], [1, 1])
    preprocessed_targets = tf.concat([left_side, right_side], 1)
    return preprocessed_targets

<h4>Creating the Encoder RNN Layer</h4>

In [18]:
def encoder_rnn_layer(rnn_inputs, rnn_size, num_layers, keep_prob, sequence_length):
    lstm = tf.compat.v1.nn.rnn_cell.BasicLSTMCell(rnn_size)
    lstm_dropout = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob = keep_prob)
    encoder_cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell([lstm_dropout] * num_layers)
    encoder_output, encoder_state = tf.compat.v1.nn.bidirectional_dynamic_rnn(cell_fw = encode_cell,
                                                      cell_bw = encoder_cell,
                                                      sequence_length = sequence_length,
                                                      inputs = rnn_inputs,
                                                      dtype = tf.float32)
    return encoder_state

<h4>Decoding the Training Set</h4>

In [19]:
def decode_training_set(encoder_state, decoder_cell, decoder_embedded_input, sequence_length, decoding_scope, output_function, keep_prob, batch_size):
    attention_states = tf.zeroes([batch_size, 1, decoder_cell.output_size])
    attention_keys, attention_values, attention_score_function, attention_construct_function = tf.contrib.seq2seq.prepare_attention(attention_states, attention_option = 'bahdanau', num_units = decoder_cell.output_size)
    training_decoder_function = tf.contrib.seq2seq.attention_decoder_fn_train(encoder_state[0],
                                                                              attention_keys,
                                                                              attention_values,
                                                                              attention_score_function,
                                                                              attention_construct_function,
                                                                             name = 'attn_dec_train')
    decoder_output, decoder_final_state, decoder_final_context_state = tf.contrib.seq2seq.dynamic_rnn_decoder(decoder_cell,
                                                                                                             training_decoder_function,
                                                                                                             decoder_embedded_inputs,
                                                                                                             sequence_length,
                                                                                                             scope = decoding_scope)
    decoder_output_dropout = tf.nn.dropout(decoder_output, rate=1 - (keep_prob))
    return output_function(decoder_output_dropout)

<h4>Decoding the Test / Validation Set</h4>

In [20]:
def decode_test_set(encoder_state, decoder_cell, decoder_embeddings_matrix, sos_id, eos_id, maximum_length, num_words, sequence_length, decoding_scope, output_function, keep_prob, batch_size):
    attention_states = tf.zeroes([batch_size, 1, decoder_cell.output_size])
    attention_keys, attention_values, attention_score_function, attention_construct_function = tf.contrib.seq2seq.prepare_attention(attention_states, attention_option = 'bahdanau', num_units = decoder_cell.output_size)
    test_decoder_function = tf.contrib.seq2seq.attention_decoder_fn_inference(output_function,
                                                                                  encoder_state[0],
                                                                                  attention_keys,
                                                                                  attention_values,
                                                                                  attention_score_function,
                                                                                  attention_construct_function,
                                                                                  decoder_embeddings_matrix,
                                                                                  sos_id,
                                                                                  eos_id,
                                                                                  maximum_length,
                                                                                  num_words,
                                                                                  name = 'attn_dec_inf')
    test_predictions, decoder_final_state, decoder_final_context_state = tf.contrib.seq2seq.dynamic_rnn_decoder(decoder_cell,
                                                                                                                test_decoder_function,
                                                                                                                scope = decoding_scope)
    return test_predictions

<h4>Creating the Decoder RNN</h4>

In [21]:
def decoder_rnn(decoder_embedded_input, decoder_embeddings_matrix, encoder_state, num_words, sequence_length, rnn_size, num_layers, word2int, keep_prob, batch_size):
    with tf.compat.v1.variable_scope('decoding') as decoding_scope:
        lstm = tf.compat.v1.nn.rnn_cell.BasicLSTMCell(rnn_size)
        lstm_dropout = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob = keep_prob)
        decoder_cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell([lstm_dropout] * num_layers)
        weights = tf.compat.v1.truncated_normal_initializer(stddev = 0.1)
        biases = tf.zeroes_initializer()
        output_function = lambda x: tf.contrib.layers.fully_connected(x,
                                                                     num_words,
                                                                     activation_function = 'relu',
                                                                     normallization = None,
                                                                     scope = decoding_scope,
                                                                     weights_initializers = weights,
                                                                     biases_initializer = biases)
        training_predictions = decode_training_set(encoder_state,
                                                  decoder_cell,
                                                  decoder_embedded_input,
                                                  sequence_length,
                                                  decoding_scope,
                                                  output_function,
                                                  keep_prob,
                                                  batch_size)
        decoding_scope.reuse_variables()
        test_predictions = decode_test_set(encoder_state,
                                          decoder_cell,
                                          decoder_embedded_matrix,
                                          word2int['<SOS>'],
                                          word2int['<EOS>'],
                                          sequence_length - 1,
                                          num_words,
                                          decoding_scope,
                                          output_function,
                                          keep_prob,
                                          batch_size)
        return training_predictions, test_predictions

<h4>Building the Seq2Seq Model</h4>

In [29]:
def seq2seq_model(inputs, targets, keep_prob, batch_size, sequence_length, answers_num_words, questions_num_words, encoder_embedding_size, decoder_embedding_size, rnn_size, num_layers, questionswords2int):
    # encoder_embedded_input = tf.contrib.layers.embed_sequence(inputs,
    #                                                           answers_num_words + 1,
    #                                                           encoder_embedding_size,
    #                                                           initializer = tf.compat.v1.random_uniform_initializer(0, 1))

    
    
    encoder_embedded_input = tf.keras.layers.Embedding(answers_num_words + 1,
                                                       encoder_embedding_size,
                                                       embeddings_initializer = tf.random_uniform_initializer(0, 1))(inputs)
    
    # encoder_state = encoder_rnn(encoder_embedded_input, rnn_size, num_layers, keep_prob, sequence_length)
    gru = tf.keras.layers.GRU(rnn_size,
                             return_sequences=True,
                             return_state=True,
                             dropout=1 - keep_prob,
                             recurrent_dropout=1 - keep_prob)

    encoder_outputs, encoder_state = gru(encoder_embedded_input,
                                       sequence_length=sequence_length)
    
    preprocessed_targets = preprocess_targets(targets, questionswords2int, batch_size)
    decoder_embeddings_matrix = tf.Variable(tf.random.uniform([questions_num_words + 1, decoder_embedding_size], 0, 1))
    decoder_embedded_input = tf.nn.embedding_lookup(decoder_embeddings_matrix, preprocessed_targets)
    training_predictions, test_predictions = decoder_rnn(decoder_embedded_input,
                                                         decoder_embeddings_matrix,
                                                         encoder_state,
                                                         questions_num_words,
                                                         sequence_length,
                                                         rnn_size,
                                                         num_layers,
                                                         questionswords2int,
                                                         keep_prob,
                                                         batch_size)
    return training_predictions, test_predictions

<h3>Training the Seq2Seq Model</h3>

<h4>Setting the Hyperparameters</h4>

In [23]:
epochs = 100
batch_size = 64
rnn_size = 512
num_layers = 3
encoding_embedding_size = 512
decoding_embedding_size = 512
learning_rate = 0.01
learning_rate_decay = 0.9
keep_probability = 0.5

<h4>Defining a Session</h4>

In [24]:
tf.compat.v1.reset_default_graph()
session = tf.compat.v1.Session()

<h4>Loading the Model Inputs</h4>

In [25]:
inputs, targets, lr, keep_prob = model_inputs()

<h4>Setting the Sequence Length</h4>

In [26]:
sequence_length = tf.compat.v1.placeholder_with_default(25, None, name = 'sequence_length')

<h4>Getting the shapes of the input tensor</h4>

In [27]:
input_shape = tf.shape(inputs)

<h4>Getting the Training and the Test Predictions</h4>

In [30]:
training_predictions, test_predictions = seq2seq_model(tf.reverse(inputs, [-1]),
                                                      targets,
                                                      keep_prob,
                                                      batch_size,
                                                      sequence_length,
                                                      len(answerswords2int),
                                                      len(questionswords2int),
                                                      encoding_embedding_size,
                                                      decoding_embedding_size,
                                                      rnn_size,
                                                      num_layers,
                                                      questionswords2int)

ValueError: tf.enable_eager_execution must be called at program startup.