# Part - 1 ( Data Preprocessing )

## Importing the libraries

In [1]:
import numpy as np
import tensorflow as tf
import re
import time

In [2]:
tf.__version__

'1.0.0'

## Importing the dataset

In [3]:
lines = open('dataset/movie_lines.txt', encoding='utf-8', errors='ignore').read().split('\n')
conversations = open('dataset/movie_conversations.txt', encoding='utf-8', errors='ignore').read().split('\n')

## Create a dictionary that maps each lines and its id

In [4]:
id2line = {}

for line in lines:
    _line = line.split(' +++$+++ ') # _line : throwaway variables
    if len(_line) == 5:
        id2line[_line[0]] = _line[4]

## Create a list of all the conversation

In [5]:
conversations_ids = []

for conversation in conversations[:-1]: # last row of conversations is empty
    _conversation = conversation.split(' +++$+++ ')[-1][1:-1].replace("'","").replace(" ", "")
    #                                               |     |             |               |______ remove spaces
    #                                               |     |             |______________________ remove '
    #                                               |     |____________________________________ remove the '[' , ']'
    #                                               |__________________________________________ get last element of split (ids)
    conversations_ids.append(_conversation.split(','))

## Get questions and answers

In [6]:
questions = []
answers = []

for conversation in conversations_ids:
    for i in range(len(conversation) -1 ):
        questions.append( id2line[conversation[i]] )
        answers.append( id2line[conversation[i+1]] )

In [7]:
def printQA(questions, answers, startIndex, endIndex):
    for i in range(startIndex, endIndex):
        print(' Q : ',questions[i],'\n', 'A : ' ,answers[i], '\n')

In [8]:
printQA(questions, answers, 0, 10)

 Q :  Can we make this quick?  Roxanne Korrine and Andrew Barrett are having an incredibly horrendous public break- up on the quad.  Again. 
 A :  Well, I thought we'd start with pronunciation, if that's okay with you. 

 Q :  Well, I thought we'd start with pronunciation, if that's okay with you. 
 A :  Not the hacking and gagging and spitting part.  Please. 

 Q :  Not the hacking and gagging and spitting part.  Please. 
 A :  Okay... then how 'bout we try out some French cuisine.  Saturday?  Night? 

 Q :  You're asking me out.  That's so cute. What's your name again? 
 A :  Forget it. 

 Q :  No, no, it's my fault -- we didn't have a proper introduction --- 
 A :  Cameron. 

 Q :  Cameron. 
 A :  The thing is, Cameron -- I'm at the mercy of a particularly hideous breed of loser.  My sister.  I can't date until she does. 

 Q :  The thing is, Cameron -- I'm at the mercy of a particularly hideous breed of loser.  My sister.  I can't date until she does. 
 A :  Seems like she could ge

## Cleaning the text

In [9]:
def cleanText(text):
    text = text.lower()
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "what is", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"[-()\"#/@;:<>{}+=~|.?,]", "", text)
    return text

In [10]:
clean_questions = []
for question in questions:
    clean_questions.append(cleanText(question))

In [11]:
clean_answers = []
for answer in answers:
    clean_answers.append(cleanText(answer))

In [12]:
printQA(clean_questions, clean_answers, 0, 10)

 Q :  can we make this quick  roxanne korrine and andrew barrett are having an incredibly horrendous public break up on the quad  again 
 A :  well i thought we would start with pronunciation if that is okay with you 

 Q :  well i thought we would start with pronunciation if that is okay with you 
 A :  not the hacking and gagging and spitting part  please 

 Q :  not the hacking and gagging and spitting part  please 
 A :  okay then how 'bout we try out some french cuisine  saturday  night 

 Q :  you're asking me out  that is so cute what is your name again 
 A :  forget it 

 Q :  no no it's my fault  we didn't have a proper introduction  
 A :  cameron 

 Q :  cameron 
 A :  the thing is cameron  i am at the mercy of a particularly hideous breed of loser  my sister  i can't date until she does 

 Q :  the thing is cameron  i am at the mercy of a particularly hideous breed of loser  my sister  i can't date until she does 
 A :  seems like she could get a date easy enough 

 Q :  wh

## Vectorize words

In [13]:
word2count = {}

for question in clean_questions:
    for word in question.split():
        if word not in word2count:
            word2count[word] = 1
        else:
            word2count[word] += 1

for answer in clean_answers:
    for word in answer.split():
        if word not in word2count:
            word2count[word] = 1
        else:
            word2count[word] += 1

### Give uniqu int to word and remove non frequent words

In [14]:
threshold = 20

questionsWords2ints = {}

word_num = 0
for word, count in word2count.items():
    if count >= threshold:
        questionsWords2ints[word] = word_num
        word_num += 1

answersWords2ints = {}

word_num = 0
for word, count in word2count.items():
    if count >= threshold:
        answersWords2ints[word] = word_num
        word_num += 1

### Adding last two tokens to these two dictionaries

In [15]:
tokens = ['<PAD>', '<EOS>', '<OUT>', '<SOS>'] 
# SOS : start of string
# EOS : end of string
# OUT : lest frequent words
# PAD : padding to make same length

for token in tokens:
    questionsWords2ints[token] = len(questionsWords2ints) + 1

for token in tokens:
    answersWords2ints[token] = len(answersWords2ints) + 1

### Create the inverse dictionary of answersWords2int

In [16]:
answersInts2Words = {w_i : w for w, w_i in answersWords2ints.items()}

In [17]:
i = 0
for w, w_i in answersWords2ints.items():
    print(w, w_i)
    i += 1
    if i == 5:
        break

conscious 7735
powell 7782
over! 8553
while 3332
soze 5581


In [18]:
i = 0
for w_i, w in answersInts2Words.items():
    print(w_i,w)
    i += 1
    if i == 5:
        break

0 hero
1 haircut
2 giant
3 insane!
4 will


### Add EOS token to the end of every answer

In [19]:
for i in range(len(clean_answers)):
    clean_answers[i] += ' <EOS>'

In [20]:
printQA(clean_questions, clean_answers, 0, 1)

 Q :  can we make this quick  roxanne korrine and andrew barrett are having an incredibly horrendous public break up on the quad  again 
 A :  well i thought we would start with pronunciation if that is okay with you <EOS> 



### Translate all the questions and answers into integers
### Replace all the filtered words by <OUT>

In [21]:
questions_into_int = []

for question in clean_questions:
    ints = []
    for word in question.split():
        if word not in questionsWords2ints:
            ints.append(questionsWords2ints['<OUT>'])
        else:
            ints.append(questionsWords2ints[word])
    questions_into_int.append(ints)

answers_into_int = []

for answer in clean_answers:
    ints = []
    for word in answer.split():
        if word not in questionsWords2ints:
            ints.append(answersWords2ints['<OUT>'])
        else:
            ints.append(answersWords2ints[word])
    answers_into_int.append(ints)

## Short questions and answers by length of quections

### This speedup training process and helps reduse the loss
### Reduse the amount of padding during the training 

In [22]:
sorted_clean_quections = []
sorted_clean_answers = []

for length in range(1, 25 + 1): # upto 25
    for i in enumerate(questions_into_int): # i[0] : index, i[1] : quection
        if len(i[1]) == length:
            sorted_clean_quections.append(questions_into_int[i[0]])
            sorted_clean_answers.append(answers_into_int[i[0]])

# Part - 2 ( Building Seq-2-Seq model )

## Creating placeholders for the inputs and the targets

In [23]:
def model_inputs():
    inputs = tf.placeholder(tf.int32, shape=[None, None], name='input')
    #                                          |_________ Two dimensional matrix
    targets = tf.placeholder(tf.int32, shape=[None, None], name='traget')
    lr = tf.placeholder(tf.float32, name='learning_rate')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob') # control the dropout rate
    return inputs, targets, lr, keep_prob

## Preprocessing the traget

### Neural network expect targets with batches (answers)

In [24]:
def preprocess_tragets(targets, word2int, batch_size):
    left_side = tf.fill([batch_size, 1], word2int['<SOS>']) # (dims, values, name=None)
    right_side = tf.strided_slice(targets, [0,0], [batch_size,-1], strides=[1,1]) # [batch_size,-1] : all lines except the last element
    #(input_, begin, end, strides=None, begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=0, var=None, name=None)
    preprocessed_tragets = tf.concat([left_side, right_side], 1) # (values, axis, name='concat')
    return preprocessed_tragets

## Creating the encoder RNN layer

In [25]:
def encoder_rnn(rnn_inputs, rnn_size, num_layers, keep_prob, sequence_length):
    lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
    lstm_dropout = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob=keep_prob)
    encoder_cell = tf.contrib.rnn.MultiRNNCell([lstm_dropout] * num_layers )
#   _ -> encoder output
    _, encoder_state = tf.nn.bidirectional_dynamic_rnn(
        cell_fw=encoder_cell,
        cell_bw=encoder_cell,
        sequence_length=sequence_length,
        inputs=rnn_inputs,
        dtype=tf.float32
        ) # Creates a dynamic version of bidirectional recurrent neural network
    return encoder_state

## Decoding the training set

In [26]:
def decode_training_set(
    encoder_state, 
    decoder_cell, 
    decoder_embedded_input, 
    sequence_length, 
    decoding_scope,
    output_function,
    keep_prob,
    batch_size):

    attention_states = tf.zeros([batch_size, 1, decoder_cell.output_size])
    attention_keys, attention_values, attention_score_function, attention_construct_function = tf.contrib.seq2seq.prepare_attention(
        attention_states,
        attention_option='bahdanau',
        num_units=decoder_cell.output_size
    )
    training_decoder_function = tf.contrib.seq2seq.attention_decoder_fn_train(
        encoder_state[0],
        attention_keys,
        attention_values,
        attention_score_function,
        attention_construct_function,
        name='attn_dec_train'
    )
    #               _,   -> decoder final state
    #                , _ -> decoder final context state
    decoder_output, _, _ = tf.contrib.seq2seq.dynamic_rnn_decoder(
        decoder_cell, 
        training_decoder_function, 
        decoder_embedded_input, 
        sequence_length,
        scope = decoding_scope,
    )
    decoder_output_dropout = tf.nn.dropout(decoder_output, keep_prob)
    return output_function(decoder_output_dropout)

## Decoding the test/validation set

In [27]:
def decode_test_set(
    encoder_state, 
    decoder_cell, 
    decoder_embeddings_matrix,
    sos_id,
    eos_id,
    maxumum_length,
    num_words,
    sequence_length, 
    decoding_scope,
    output_function,
    keep_prob,
    batch_size):

    attention_states = tf.zeros([batch_size, 1, decoder_cell.output_size])
    attention_keys, attention_values, attention_score_function, attention_construct_function = tf.contrib.seq2seq.prepare_attention(
        attention_states,
        attention_option='bahdanau',
        num_units=decoder_cell.output_size
    )
    test_decoder_function = tf.contrib.seq2seq.attention_decoder_fn_inference(
        output_function,
        encoder_state[0],
        attention_keys,
        attention_values,
        attention_score_function,
        attention_construct_function,
        decoder_embeddings_matrix,
        sos_id,
        eos_id,
        maxumum_length,
        num_words,
        name='attn_dec_inf'
    )
    #                  ______ decoder final state
    #                 |   ___ decoder final context state
    #                 |  |
    test_predictions, _, _ = tf.contrib.seq2seq.dynamic_rnn_decoder(
        decoder_cell, 
        test_decoder_function, 
        scope = decoding_scope,
    )

    return test_predictions

## Creating the decoder RNN

In [28]:
def decoder_rnn(
    decoder_embedded_input,
    decoder_embedding_matrix,
    encoder_state,
    num_words,
    sequence_length,
    rnn_size,
    num_layers,
    word2int,
    keep_prob,
    batch_size
    ):

    with tf.variable_scope('decoding') as decoding_scope:

        lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
        lstm_dropout = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob=keep_prob)
        decoder_cell = tf.contrib.rnn.MultiRNNCell([lstm_dropout] * num_layers )
        weights = tf.truncated_normal_initializer(stddev=0.1) # stddev : standard deviation
        biases = tf.zeros_initializer()
        output_function = lambda x: tf.contrib.layers.fully_connected(
            x,
            num_words,
            None,
            scope=decoding_scope,
            weights_initializer=weights,
            biases_initializer=biases
            )
        training_predictions = decode_training_set(
            encoder_state, 
            decoder_cell, 
            decoder_embedded_input, 
            sequence_length, 
            decoding_scope,
            output_function,
            keep_prob,
            batch_size
            )
        decoding_scope.reuse_variables()
        test_predictions = decode_test_set(
            encoder_state, 
            decoder_cell, 
            decoder_embedding_matrix, 
            word2int['<SOS>'], 
            word2int['<EOS>'], 
            sequence_length - 1,
            num_words, 
            sequence_length,
            decoding_scope,
            output_function,
            keep_prob, 
            batch_size
            )

    return training_predictions, test_predictions

## Building the seq2seq model

In [29]:
def seq2seq_model(
    inputs, 
    targets, 
    keep_prob, 
    batch_size, 
    sequence_length, 
    answers_num_words, 
    quections_num_words, 
    encoder_embedding_size,
    decoder_embedding_size,
    rnn_size,
    num_layers,
    questionsWords2ints):
    
    encoder_embedded_input = tf.contrib.layers.embed_sequence(
        inputs,
        answers_num_words + 1,
        encoder_embedding_size,
        initializer=tf.random_uniform_initializer(0,1)
        )
    
    encoder_state = encoder_rnn(
        encoder_embedded_input,
        rnn_size,
        num_layers,
        keep_prob,
        sequence_length
    )

    preprocessed_tragets = preprocess_tragets(targets, questionsWords2ints, batch_size)

    decoder_embeddings_matrix = tf.Variable(tf.random_uniform([quections_num_words+1, decoder_embedding_size], 0, 1))

    decoder_embedded_input = tf.nn.embedding_lookup(decoder_embeddings_matrix, preprocessed_tragets)

    training_predictions, test_predictions = decoder_rnn(
        decoder_embedded_input, 
        decoder_embeddings_matrix, 
        encoder_state, 
        quections_num_words, 
        sequence_length, 
        rnn_size, 
        num_layers, 
        questionsWords2ints, 
        keep_prob, 
        batch_size
    )

    return training_predictions, test_predictions

# Part - 3 ( Training the seq2seq model )

## Settings the hyper parameters

In [30]:
epochs = 1 #100
batch_size = 124 #64
rnn_size = 512
num_layers = 3
encodeing_embedding_size = 512
decoding_embedding_size = 512
learning_rate = 0.01
learning_rate_decay = 0.9 # 90%
min_learning_rate = 0.0001
keep_probability = 0.5

## Definition a session

In [31]:
tf.reset_default_graph()
session = tf.InteractiveSession()

## Loading the model inputs

In [32]:
inputs, targets, lr, keep_prob = model_inputs()

## Setting the sequence length 

In [33]:
sequence_length = tf.placeholder_with_default(25, None, name='sequence_length')

## Getting the shape of the inputs tensor

In [34]:
input_shape = tf.shape(inputs)

## Getting the training and test predictions

In [35]:
training_predictions, test_predictions = seq2seq_model(
    tf.reverse(inputs, [-1]),
    targets,
    keep_prob,
    batch_size,
    sequence_length,
    len(answersWords2ints),
    len(questionsWords2ints),
    encodeing_embedding_size,
    decoding_embedding_size,
    rnn_size,
    num_layers,
    questionsWords2ints
)

## Setting up the loss error, the optimizer and gradient clipping

In [36]:
with tf.name_scope('optimization'):
    loss_error = tf.contrib.seq2seq.sequence_loss(
        training_predictions,
        targets,
        tf.ones([input_shape[0], sequence_length])
        )
    optimizer = tf.train.AdamOptimizer(learning_rate)
    gradients = optimizer.compute_gradients(loss_error)
    clipped_gradients = [(tf.clip_by_value(grad_tensor, -5.0, 5.0), grad_variable) for grad_tensor, grad_variable in gradients if grad_tensor is not None]
    optimizer_gradient_clipping = optimizer.apply_gradients(clipped_gradients)

## Padding the sequences with <PAD> token

In [37]:
# Q = ['who'  , 'are', 'you', '<PAD>',  '<PAD>', '<PAD>', '<PAD>', '<PAD>']
# A = ['<SOS>', 'I'  , 'am ', 'a'    ,  'bot'  , '.'    , '<EOS>', '<PAD>']

def apply_padding(batch_of_sequences, word2int):
    max_sequence_length = max([len(sequence) for sequence in batch_of_sequences])
    return [sequence + [word2int['<PAD>']] * (max_sequence_length-len(sequence)) for sequence in batch_of_sequences]

## Splitting the data into batches of questions and answers

In [38]:
def split_into_batches(questions, answers, batch_size):
    for batch_index in range(0, len(questions) // batch_size): # // : divide and get int
        start_index = batch_index * batch_size
        questions_in_batch = questions[start_index : start_index+batch_size]
        answers_in_batch = answers[start_index : start_index+batch_size]
        padded_questions_in_batch = np.array(apply_padding(questions_in_batch, questionsWords2ints))
        padded_answers_in_batch = np.array(apply_padding(answers_in_batch, answersWords2ints))
        yield padded_questions_in_batch, padded_answers_in_batch

## Splitting the questions and answers into training and validation sets

In [39]:
training_validation_split = int(len(sorted_clean_quections) * 0.15)

training_questions = sorted_clean_quections[training_validation_split:]
training_answers = sorted_clean_answers[training_validation_split:]

validation_questions = sorted_clean_quections[:training_validation_split]
validation_answers = sorted_clean_answers[:training_validation_split]

## Training

In [40]:
batch_index_check_training_loss = 100
batch_index_check_validation_loss = (len(training_questions) // batch_size // 2) - 1
total_training_loss_error = 0
list_validation_loss_error = []
early_stopping_check = 0
early_stopping_stop = 1000
checkpoint = 'chatbot_weights.ckpt'
session.run(tf.global_variables_initializer())

In [41]:
for epoch in range(1, epochs+1):
    for batch_index, (padded_questions_in_batch, padded_answers_in_batch) in enumerate(split_into_batches(training_questions, training_answers, batch_size)):
        starting_time = time.time()
        _, batch_training_loss_error = session.run(
            [optimizer_gradient_clipping, loss_error],
            {
                inputs : padded_questions_in_batch, 
                targets : padded_answers_in_batch, 
                lr : learning_rate,
                sequence_length : padded_answers_in_batch.shape[1],
                keep_prob : keep_probability
                }
            )
        total_training_loss_error += batch_training_loss_error
        ending_time = time.time()
        batch_time = ending_time - starting_time

        if batch_index % batch_index_check_training_loss == 0:
            print('Epoch: {:>3}/{}, Batch: {:>4}/{}, Training Loss Error : {:>6.3f}, Training Time On 100 Batches: {:d} seconds'.format(
                epoch,
                epochs,
                batch_size,
                len(training_questions) // batch_size,
                total_training_loss_error / batch_index_check_training_loss,
                int(batch_time * batch_index_check_training_loss)
            ))
            total_training_loss_error = 0
        
        if batch_index % batch_index_check_validation_loss == 0 and batch_index > 0:
            total_validation_loss_error = 0
            starting_time = timte.time()

            for batch_index_validation, (padded_questions_in_batch, padded_answers_in_batch) in enumerate(split_into_batches(validation_questions, validation_answers, batch_size)):
                batch_validation_loss_error = session.run(
                loss_error,
                {
                    inputs : padded_questions_in_batch, 
                    targets : padded_answers_in_batch, 
                    lr : learning_rate,
                    sequence_length : padded_answers_in_batch.shape[1],
                    keep_prob : 1
                    }
                )
                total_validation_loss_error += batch_validation_loss_error

            ending_time = time.time()
            batch_time = ending_time - starting_time

            avarage_validation_loss_error = total_validation_loss_error / (len(validation_questions)/batch_size)
            print('Validation Loss Error : {:>6.3f}, Batch Validation Time : {:d} seconds'.format(avarage_validation_loss_error, int(batch_time)))
            learning_rate *= learning_rate_decay
            if learning_rate < min_learning_rate:
                learning_rate = min_learning_rate
            
            list_validation_loss_error.append(avarage_validation_loss_error)
            if avarage_validation_loss_error <= min(list_validation_loss_error):
                print('I Speek Better Now!!')
                early_stopping_check = 0
                saver = tf.train.Saver()
                saver.save(session, checkpoint)
            else:
                print('Sorry I Do Not Speek Better, I Need To Practice More.')
                early_stopping_check += 1
                if early_stopping_check == early_stopping_stop:
                    break

    if early_stopping_check == early_stopping_stop:
        print('my apology i cannot speak better anymore this is the best i can do.')
        break

print('game over')

Epoch:   1/1, Batch:  124/1399, Training Loss Error :  0.090, Training Time On 100 Batches: 4466 seconds


# Part - 4 ( Testing the seq2seq model )

## Loading the weights and running the session

In [44]:
checkpoint = "./weights/seq2seq_model.ckpt"
session = tf.InteractiveSession()
session.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(session, checkpoint)

NotFoundError: Unsuccessful TensorSliceReader constructor: Failed to find any matching files for ./weights/seq2seq_model.ckpt
	 [[Node: save_4/RestoreV2_47 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_save_4/Const_0, save_4/RestoreV2_47/tensor_names, save_4/RestoreV2_47/shape_and_slices)]]

Caused by op 'save_4/RestoreV2_47', defined at:
  File "c:\Users\kavinda\.vscode\extensions\ms-toolsai.jupyter-2021.8.1013163132\pythonFiles\vscode_datascience_helpers\kernel_prewarm_starter.py", line 31, in <module>
    runpy.run_module(module, run_name="__main__", alter_sys=False)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\runpy.py", line 208, in run_module
    return _run_code(code, {}, init_globals, run_name, mod_spec)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\ipykernel\kernelapp.py", line 486, in start
    self.io_loop.start()
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\tornado\platform\asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\asyncio\base_events.py", line 421, in run_forever
    self._run_once()
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\asyncio\base_events.py", line 1425, in _run_once
    handle._run()
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\asyncio\events.py", line 127, in _run
    self._callback(*self._args)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\tornado\platform\asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\zmq\eventloop\zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\zmq\eventloop\zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\zmq\eventloop\zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\ipykernel\kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\ipykernel\ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\ipykernel\zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\IPython\core\interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\IPython\core\interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\IPython\core\interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\IPython\core\interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-44-5557d8b8a6b1>", line 4, in <module>
    saver = tf.train.Saver()
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\tensorflow\python\training\saver.py", line 1051, in __init__
    self.build()
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\tensorflow\python\training\saver.py", line 1081, in build
    restore_sequentially=self._restore_sequentially)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\tensorflow\python\training\saver.py", line 675, in build
    restore_sequentially, reshape)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\tensorflow\python\training\saver.py", line 402, in _AddRestoreOps
    tensors = self.restore_op(filename_tensor, saveable, preferred_shard)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\tensorflow\python\training\saver.py", line 242, in restore_op
    [spec.tensor.dtype])[0])
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\tensorflow\python\ops\gen_io_ops.py", line 668, in restore_v2
    dtypes=dtypes, name=name)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 763, in apply_op
    op_def=op_def)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\tensorflow\python\framework\ops.py", line 2395, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "C:\Users\kavinda\anaconda3\envs\chatbot\lib\site-packages\tensorflow\python\framework\ops.py", line 1264, in __init__
    self._traceback = _extract_stack()

NotFoundError (see above for traceback): Unsuccessful TensorSliceReader constructor: Failed to find any matching files for ./weights/seq2seq_model.ckpt
	 [[Node: save_4/RestoreV2_47 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_save_4/Const_0, save_4/RestoreV2_47/tensor_names, save_4/RestoreV2_47/shape_and_slices)]]


## Convert the questions from string to list of encoded integers

In [None]:
def convert_string2int(question, word2int):
    question = cleanText(question)
    return [word2int.get(word, word2int['<OUT>']) for word in question.split()]

## Setting up the chat

In [None]:
while(True):
    quection = input('You : ')
    if question == 'goodbye':
        break
    question = convert_string2int(quection, questionsWords2ints)
    question = question + [questionsWords2ints['<PAD>']] * (20 - len(quection))
    fake_batch = np.zeros((batch_size, 20))
    fake_batch[0] = quection
    predicted_answer = session.run(test_predictions, {inputs: fake_batch, keep_prob: 0.5})[0]
    answer = ''
    for i in np.argmax(predicted_answer,1):
        if answersInts2Words[i] == 'i':
            token = 'I'
        elif answersInts2Words[i] == '<EOS>':
            token = '.'
        elif answersInts2Words[i] == '<OUT>':
            token = 'out'
        else:
            token = ' ' + answersInts2Words[i]
        answer += 1
        if token == '.':
            break
    print('ChatBot : ' + answer)