# Building a Chatbot with Deep Learning

![image](https://user-images.githubusercontent.com/35156624/126909072-47c9be9e-549c-420f-ac4b-f9bbd2a4de22.png)

In [1]:
import numpy as np
import tensorflow as tf
import re
import time

## We need to import the dataset for data preprocessing

In [2]:
movie_lines = open('movie_lines.txt', encoding = 'utf-8', errors = 'ignore').read().split('\n')
conversations = open('movie_conversations.txt', encoding = 'utf-8', errors = 'ignore').read().split('\n')

In [3]:
print()
print("Raw movie lines:")
print()
movie_lines[1:5]


Raw movie lines:



['L1044 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ They do to!',
 'L985 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I hope so.',
 'L984 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ She okay?',
 "L925 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Let's go."]

In [4]:
print()
print("Raw Conversations:")
print()
conversations[1:5]


Raw Conversations:



["u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L198', 'L199']",
 "u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L200', 'L201', 'L202', 'L203']",
 "u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L204', 'L205', 'L206']",
 "u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L207', 'L208']"]

## Create a dictionary to map movie line and id

In [5]:
id_2_movieline = {}
for line in movie_lines:
    _line = line.split(" +++$+++ ")
    if len(_line) == 5:
        id_2_movieline[_line[0]] = _line[4]

In [6]:
print()
print("Movie Lines of data set:")
print()
print(dict(list(id_2_movieline.items())[1:10]))


Movie Lines of data set:

{'L548581': 'Her...', 'L627784': 'Keep it.', 'L341938': 'Officer Starling. Welcome back.', 'L131568': "Don't trouble yourself about it, Homer--this ain't your business.", 'L508193': 'You were unhappy?', 'L265853': 'I..', 'L100213': 'Frannie.', 'L399762': "What's that?", 'L553260': "Oh, some of 'em. But it's mostly just strokes and shit. I mean, most of 'em just sleep all the time and get kind of yellow. Usually they die id they're, you know, really yellow."}


## Create a list of all the conversations

In [7]:
conversations_ids = []
for conversation in conversations[:-1]:
    _conversation = conversation.split(" +++$+++ ")[-1][1:-1].replace("'", "").replace(" ", "")
    conversations_ids.append(_conversation.split(","))
print()
print("List of conversations:")
print()
conversations_ids[:10]


List of conversations:



[['L194', 'L195', 'L196', 'L197'],
 ['L198', 'L199'],
 ['L200', 'L201', 'L202', 'L203'],
 ['L204', 'L205', 'L206'],
 ['L207', 'L208'],
 ['L271', 'L272', 'L273', 'L274', 'L275'],
 ['L276', 'L277'],
 ['L280', 'L281'],
 ['L363', 'L364'],
 ['L365', 'L366']]

In [8]:
print("Split the questions and answers")
print()
ques = []
answ = []
for convs in conversations_ids:
    for i in range(len(convs) - 1):
        ques.append(id_2_movieline[convs[i]])
        answ.append(id_2_movieline[convs[i + 1  ]])
print("Questions:")
print()
print(ques[:10])
print()
print("Answers:")
print()
print(answ[:10])

Split the questions and answers

Questions:

['Can we make this quick?  Roxanne Korrine and Andrew Barrett are having an incredibly horrendous public break- up on the quad.  Again.', "Well, I thought we'd start with pronunciation, if that's okay with you.", 'Not the hacking and gagging and spitting part.  Please.', "You're asking me out.  That's so cute. What's your name again?", "No, no, it's my fault -- we didn't have a proper introduction ---", 'Cameron.', "The thing is, Cameron -- I'm at the mercy of a particularly hideous breed of loser.  My sister.  I can't date until she does.", 'Why?', 'Unsolved mystery.  She used to be really popular when she started high school, then it was just like she got sick of it or something.', 'Gosh, if only we could find Kat a boyfriend...']

Answers:

["Well, I thought we'd start with pronunciation, if that's okay with you.", 'Not the hacking and gagging and spitting part.  Please.', "Okay... then how 'bout we try out some French cuisine.  Saturday?

## Now we need to clean the text

In [9]:
def clean(text):
    """
    function: clean
    params: String text
    does: cleans the text removing stop words, punctuation, lower case.
    returns: String clean text 
    """
    text = text.lower()
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "what is", text)
    text = re.sub(r"where's", "where is", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"[-()\"#/@;:<>{}+=~|.?,]", "", text)
    return text

In [10]:
clean_ques = []
clean_answ = []
for question in ques:
    clean_ques.append(clean(question))
for answer in answ:
    clean_answ.append(clean(answer))
print()
print("Cleaned Questions:")
print(clean_ques[:10])
print()
print("Cleaned Answers:")
print(clean_answ[:10])


Cleaned Questions:
['can we make this quick  roxanne korrine and andrew barrett are having an incredibly horrendous public break up on the quad  again', 'well i thought we would start with pronunciation if that is okay with you', 'not the hacking and gagging and spitting part  please', 'you are asking me out  that is so cute what is your name again', "no no it's my fault  we didn't have a proper introduction ", 'cameron', 'the thing is cameron  i am at the mercy of a particularly hideous breed of loser  my sister  i cannot date until she does', 'why', 'unsolved mystery  she used to be really popular when she started high school then it was just like she got sick of it or something', 'gosh if only we could find kat a boyfriend']

Cleaned Answers:
['well i thought we would start with pronunciation if that is okay with you', 'not the hacking and gagging and spitting part  please', "okay then how 'bout we try out some french cuisine  saturday  night", 'forget it', 'cameron', 'the thing is

## Remove less frequent words

Find the number of occurunces of each word and remove the lowers 5%, this is to speed up the process of training the data in the neural network and to focus on the most impactful words in the corpus.

In [11]:
count_words = {}
for ques in clean_ques:
    for word in ques.split():
        if word not in count_words:
            count_words[word] = 1
        else:
            count_words[word] += 1

for answ in clean_answ:
    for word in answ.split():
        if word not in count_words:
            count_words[word] = 1
        else:
            count_words[word] += 1
print()
print("Word count hash table:")
print()
print(dict(list(count_words.items())[1:10]))


Word count hash table:

{'nineminute': 1, 'palyou': 1, 'delicacy': 4, 'deion': 1, 'demonstrate': 20, 'jimhow': 2, 'angels': 48, 'mafucka': 3, 'rigfort!': 1}


## Tokenize and create a threshold 
Tokenize to get all words and filter out words that do not meet the threshold. The threshold is set at 20%, this hyperparamater can be attuned at different levels to improve the model. Map the words to a unique number.

In [12]:
threshold = 20
questions_mapping = {}
w_count = 0
for word, count in count_words.items():
    if count >= threshold:
        questions_mapping[word] = w_count
        w_count += 1

threshold_answ = 20
answers_mapping = {}
w_count = 0
for word, count in count_words.items():
    if count >= threshold_answ:
        answers_mapping[word] = w_count
        w_count += 1

print()
print("Questions Mapping:")
print()
print(dict(list(questions_mapping.items())[1:10]))
print()
print("Answers Mapping")
print()
print(dict(list(answers_mapping.items())[1:10]))


Questions Mapping:

{'israel': 2279, 'demonstrate': 0, 'election': 6973, 'angels': 1, 'me': 4406, 'pudding': 5549, 'be!': 1154, 'gekko': 3354, 'beat': 3355}

Answers Mapping

{'israel': 2279, 'demonstrate': 0, 'election': 6973, 'angels': 1, 'me': 4406, 'pudding': 5549, 'be!': 1154, 'gekko': 3354, 'beat': 3355}


In [13]:
## TODO: LEFT OFF HERE, WORKS ABOVE. 
tokens = ['<PAD>', '<EOS>', '<OUT>','<SOS>']

for token in tokens:
    questions_mapping[token] = len(questions_mapping) + 1

for token in tokens:
    answers_mapping[token] = len(answers_mapping) + 1

In [14]:
inverse_answers = {w_i: w for w, w_i in answers_mapping.items()}

Now we need to add the EOS token to end of every answer

In [15]:
for i in range(len(clean_answ)):
    clean_answ[i] += ' <EOS>'
print()
print("EOS token at the end of each answer, this is used for the decoding part of the seq2seq model:")
print()
clean_answ[:10]


EOS token at the end of each answer, this is used for the decoding part of the seq2seq model:



['well i thought we would start with pronunciation if that is okay with you <EOS>',
 'not the hacking and gagging and spitting part  please <EOS>',
 "okay then how 'bout we try out some french cuisine  saturday  night <EOS>",
 'forget it <EOS>',
 'cameron <EOS>',
 'the thing is cameron  i am at the mercy of a particularly hideous breed of loser  my sister  i cannot date until she does <EOS>',
 'seems like she could get a date easy enough <EOS>',
 'unsolved mystery  she used to be really popular when she started high school then it was just like she got sick of it or something <EOS>',
 'that is a shame <EOS>',
 'let me see what i can do <EOS>']

## Map the questions and answers for the RNN

We need to map the questions and answers to integers in order to train the RNN. This is required as categorical data cannot be trained this way. 

In [16]:
quest_mapping_ints = []

for ques in clean_ques:
    words_map = []
    for word in ques.split():
        if word not in questions_mapping:
            words_map.append(questions_mapping['<OUT>'])
        else:
            words_map.append(questions_mapping[word])
    quest_mapping_ints.append(words_map)
    
answ_mapping_ints = []

for answ in clean_answ:
    words_map = []
    for word in answ.split():
        if word not in answers_mapping:
            words_map.append(answers_mapping['<OUT>'])
        else:
            words_map.append(answers_mapping[word])
    answ_mapping_ints.append(words_map)


In [17]:
print()
print("Questions Map to integers:")
print()
print(quest_mapping_ints[:10])
print()
print("Answers Map to integers:")
print()
print(answ_mapping_ints[:10])


Questions Map to integers:

[[6796, 8421, 3030, 7358, 3095, 8824, 8824, 2690, 3288, 8824, 3311, 7636, 6595, 6996, 8824, 5685, 6976, 6320, 1276, 1451, 8824, 556], [1681, 3327, 7651, 8421, 7614, 360, 4915, 8824, 6093, 7421, 2977, 4350, 4915, 1873], [3706, 1451, 8824, 2690, 8824, 2690, 8824, 612, 8544], [1873, 3311, 5438, 4406, 5963, 7421, 2977, 6149, 3558, 2224, 2977, 21, 8185, 556], [6143, 6143, 8705, 4940, 6092, 8421, 6594, 5087, 7805, 2774, 8824], [3345], [1451, 523, 2977, 3345, 3327, 7246, 2580, 1451, 3076, 230, 7805, 5181, 8824, 6026, 230, 5658, 4940, 2145, 3327, 1738, 326, 5668, 8406, 5844], [3983], [8824, 920, 8406, 6904, 2782, 8409, 1716, 5102, 2059, 8406, 5355, 2827, 1750, 2839, 5086, 5140, 3376, 2972, 8406, 5297, 3304, 230, 5086, 2502, 3633], [4008, 6093, 2835, 8421, 4894, 7798, 7649, 7805, 4485]]

Answers Map to integers:

[[1681, 3327, 7651, 8421, 7614, 360, 4915, 8824, 6093, 7421, 2977, 4350, 4915, 1873, 8823], [3706, 1451, 8824, 2690, 8824, 2690, 8824, 612, 8544, 8823], [4

Now we need to sort the questions and answers by the length of the questions. This will speed up the training in optimization stage. We can set the length of answer and question as 25, as a threshold.

In [18]:
sorted_questions = []
sorted_answers = []
for length in range(1, 26):
    for indx in enumerate(quest_mapping_ints):
        if len(indx[1]) == length:
            sorted_questions.append(quest_mapping_ints[indx[0]])
            sorted_answers.append(answ_mapping_ints[indx[0]])

print()
print("Sorted Questions: ")
print()
print(sorted_questions[:10])
print()
print("Sorted Answers: ")
print()
print(sorted_answers[:10])


Sorted Questions: 

[[3345], [3983], [2631], [6755], [6665], [6143], [4985], [6143], [5218], [1920]]

Sorted Answers: 

[[1451, 523, 2977, 3345, 3327, 7246, 2580, 1451, 3076, 230, 7805, 5181, 8824, 6026, 230, 5658, 4940, 2145, 3327, 1738, 326, 5668, 8406, 5844, 8823], [8824, 920, 8406, 6904, 2782, 8409, 1716, 5102, 2059, 8406, 5355, 2827, 1750, 2839, 5086, 5140, 3376, 2972, 8406, 5297, 3304, 230, 5086, 2502, 3633, 8823], [8289, 8823], [5502, 2972, 119, 3432, 5963, 8222, 1351, 8823], [1873, 8165, 5201, 7358, 6286, 8823], [4350, 1873, 3311, 4443, 4232, 2782, 5253, 84, 2782, 4973, 8823], [166, 3343, 8823], [1873, 3867, 3057, 7396, 5267, 5086, 8823], [6366, 8823], [7614, 1873, 1541, 6336, 4406, 7805, 5626, 3345, 8823]]


## Building the Seq2Seq Model

![image](https://user-images.githubusercontent.com/35156624/129961372-28461497-60d4-4748-81a8-dba523f6a78a.png)

In [19]:
def model_inputs():
    """
    function: model_inputs
    params: none
    returns: int inputs, int target, float learning rate, float drop_out
    """
    inputs = tf.placeholder(tf.int32, [None, None], name = 'input')
    targets = tf.placeholder(tf.int32, [None, None], name = 'target')
    lr = tf.placeholder(tf.float32, name = 'learning_rate')
    keep_prob = tf.placeholder(tf.float32, name = 'keep_prob')
    return inputs, targets, lr, keep_prob

In [20]:
def process_targets(targets, words_mapping, batch_size):
    """
    function: process_targets
    params: targets tenor, hash_words hash table, batch_size int
    returns: tensors targets
    """
    left = tf.fill([batch_size, 1], words_mapping['<SOS>'])
    right = tf.strided_slice(targets, [0,0], [batch_size, -1], [1,1])
    targets = tf.concat([left, right], 1)
    return targets

In [21]:
def encoder_rnn(inputs, rnn_size, rnn_num_layers, keep_prob, sequence_length):
    """
    function: encoder_layer
    params: rnn_inputs, int size number input size, rnn_num_layers int, dropout rate int, seq_len
    int length of list in batch
    returns: encoder layer
    """
    lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
    dropout_lstm = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob = keep_prob)
    encod_cell = tf.contrib.rnn.MultiRNNCell([dropout_lstm] * rnn_num_layers)
    encod_output, encoder_state = tf.nn.bidirectional_dynamic_rnn(cell_fw = encod_cell,
                                                                    cell_bw = encod_cell,
                                                                    sequence_length = sequence_length,
                                                                    inputs = inputs,
                                                                    dtype = tf.float32)
    return encoder_state

In [22]:
def decode_train(encode_state, decode_cell, decode_input, sequence_length, decode_scope, output_fun, keep_prob, batch_size):
    """
    function: decoder_training
    params: encoder_state returned from encoder_layer, decoder cell in RNN, decoder_input embedding,
    seq len int, decode scope, out output function, drop_out drop out rate, batch size int)
    returns: decoder output with drop out
    """
    states = tf.zeros([batch_size, 1, decode_cell.output_size])
    keys, vals, score_func, attent_func = tf.contrib.seq2seq.prepare_attention(states,
                                                                               attention_option = "bahdanau",
                                                                               num_units = decode_cell.output_size)
    train_decoder = tf.contrib.seq2seq.attention_decoder_fn_train(encode_state[0],
                                                                              keys,
                                                                              vals,
                                                                              score_func,
                                                                              attent_func,
                                                                              name = "attn_dec_train")
    decode_out, decode_final_state, decode_context_state = tf.contrib.seq2seq.dynamic_rnn_decoder(decode_cell,
                                                                                                train_decoder,
                                                                                                decode_input,
                                                                                                sequence_length,
                                                                                                scope = decode_scope)
    decoder_out_dropout = tf.nn.dropout(decode_out, keep_prob)
    return output_fun(decoder_out_dropout)

In [23]:
def decode_test(encode_state, decode_cell, decode_matrix, sos_id, eos_id, maximum_length, num_words, decode_scope, output_fun, keep_prob, batch_size):
    """
    function: decode_validation_set
    params: encoder_state returned from encoder_layer, decoder cell in RNN, decoder_input embedding,
    seq len int, decode scope, out output function, drop_out drop out rate, batch size int
    returns: test_predictions
    """
    states = tf.zeros([batch_size, 1, decode_cell.output_size])
    keys, vals, score_func, attent_func = tf.contrib.seq2seq.prepare_attention(states, attention_option = "bahdanau", num_units = decode_cell.output_size)
    test_decoder_fun = tf.contrib.seq2seq.attention_decoder_fn_inference(output_fun,
                                                                              encode_state[0],
                                                                              keys,
                                                                              vals,
                                                                              score_func,
                                                                              attent_func,
                                                                              decode_matrix,
                                                                              sos_id,
                                                                              eos_id,
                                                                              maximum_length,
                                                                              num_words,
                                                                              name = "attn_dec_inf")
    test_preds, decoder_final_state, decoder_final_context_state = tf.contrib.seq2seq.dynamic_rnn_decoder(decode_cell,
                                                                                                                test_decoder_fun,
                                                                                                                scope = decode_scope)
    return test_preds

In [24]:
def smartbot_rnn(decoder_input, decoder_matrix, encode_state, num_words, sequence_length, rnn_size, num_layers, words_mapping, keep_prob, batch_size):
    """
    function: smartbot_rnn
    params: decoder_input, decoder_matrix, encoder_state, total_words_corpus int, seq_len int, rnn_size int,
    num_layers_rnn int, hash_words hashtable, drop_out float, batch_size int
    returns:
    """
    with tf.variable_scope("decoding") as decoding_scope:
        lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
        lstm_dropout = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob = keep_prob)
        decode_cell = tf.contrib.rnn.MultiRNNCell([lstm_dropout] * num_layers)
        weights = tf.truncated_normal_initializer(stddev = 0.1)
        biases = tf.zeros_initializer()
        output_fun = lambda x: tf.contrib.layers.fully_connected(x,
                                                                      num_words,
                                                                      None,
                                                                      scope = decoding_scope,
                                                                      weights_initializer = weights,
                                                                      biases_initializer = biases)
        training_preds = decode_train(encode_state,
                                                   decode_cell,
                                                   decoder_input,
                                                   sequence_length,
                                                   decoding_scope,
                                                   output_fun,
                                                   keep_prob,
                                                   batch_size)
        decoding_scope.reuse_variables()
        test_preds = decode_test(encode_state,
                                           decode_cell,
                                           decoder_matrix,
                                           words_mapping['<SOS>'],
                                           words_mapping['<EOS>'],
                                           sequence_length - 1,
                                           num_words,
                                           decoding_scope,
                                           output_fun,
                                           keep_prob,
                                           batch_size)
    return training_preds, test_preds

In [25]:
def smartbot_model(inputs, targets, keep_prob, batch_size, sequence_length, answers_num_words, questions_num_words, encoder_size, decoder_size, rnn_size, num_layers, questions_mapping):
    """
    function: smartbot_model
    params: inputs questions vector, targets answers vector, dropout rate float, batch_size int, seq_len int,
    num_words_answers int, num_words_questions int, encoder_size int, decoder_size int, rnn_size int,
    rnn_num_layers int, questions_hash hashtable
    returns: seq2seq RNN model
    """
    encoder_input = tf.contrib.layers.embed_sequence(inputs,
                                                              answers_num_words + 1,
                                                              encoder_size,
                                                              initializer = tf.random_uniform_initializer(0, 1))
    
    encoder_state = encoder_rnn(encoder_input, rnn_size, num_layers, keep_prob, sequence_length)
    targets = process_targets(targets, questions_mapping, batch_size)
    decoder_matrix = tf.Variable(tf.random_uniform([questions_num_words + 1, decoder_size], 0, 1))
    decoder_input = tf.nn.embedding_lookup(decoder_matrix, targets)
    training_preds, test_preds = smartbot_rnn(decoder_input,
                                                         decoder_matrix,
                                                         encoder_state,
                                                         questions_num_words,
                                                         sequence_length,
                                                         rnn_size,
                                                         num_layers,
                                                         questions_mapping,
                                                         keep_prob,
                                                         batch_size)
    return training_preds, test_preds

## Train the model - Set up the hyperparamaters

In [26]:
epochs = 3
batch_size = 64
rnn_size = 512
num_layers = 3
encoding_embedding_size = 512
decoding_embedding_size = 512
learning_rate = 0.01
learning_rate_decay = 0.9
min_learning_rate = 0.0001
keep_probability = 0.5

## Create the tensorflow object

In [27]:
tf.reset_default_graph()
session = tf.InteractiveSession()

inputs, targets, lr, keep_prob = model_inputs()
 
sequence_length = tf.placeholder_with_default(25, None, name = 'sequence_length')
 
input_shape = tf.shape(inputs)

## Start outputting the training and test predictions 

In [28]:
training_preds, test_preds = smartbot_model(tf.reverse(inputs, [-1]),
                                                       targets,
                                                       keep_prob,
                                                       batch_size,
                                                       sequence_length,
                                                       len(answers_mapping),
                                                       len(questions_mapping),
                                                       encoding_embedding_size,
                                                       decoding_embedding_size,
                                                       rnn_size,
                                                       num_layers,
                                                       questions_mapping)

## Set up the loss Error and Optimizes. Apply gradient clipping to the optimizer. 

In [29]:
with tf.name_scope("optimization"):
    loss = tf.contrib.seq2seq.sequence_loss(training_preds,
                                                  targets,
                                                  tf.ones([input_shape[0], sequence_length]))
    optim = tf.train.AdamOptimizer(learning_rate)
    grads = optim.compute_gradients(loss)
    clipped_grads = [(tf.clip_by_value(tensor, -5., 5.), var) for tensor, var in grads if tensor is not None]
    optimizer_gradient_clipping = optim.apply_gradients(clipped_grads)

In [30]:
def padding(batch_of_sequences, words_hash):
    """
    function: padding
    params: batch seqs, hash_words_ints hash table words to integers
    returns: sequence with <PAD> token
    does: Complete sentences with pad tokens, so all tokens have the same length
    """
    max_sequence_length = max([len(seq) for seq in batch_of_sequences])
    return [seq + [words_hash['<PAD>']] * (max_sequence_length - len(seq)) for seq in batch_of_sequences]
 

In [31]:
def split_into_batches(ques, answ, batch_size):
    """
    function: split_into_batches
    params: list ques, list ans, int batch size
    does: splits data into batches
    returns: batches of data
    """
    for batch_index in range(0, len(ques) // batch_size):
        start_index = batch_index * batch_size
        questions_in_batch = ques[start_index : start_index + batch_size]
        answers_in_batch = answ[start_index : start_index + batch_size]
        padded_questions_in_batch = np.array(padding(questions_in_batch, questions_mapping))
        padded_answers_in_batch = np.array(padding(answers_in_batch, answers_mapping))
        yield padded_questions_in_batch, padded_answers_in_batch

## Cross validation, for training and test sets

In [32]:
# Splitting the questions and answers into training and validation sets
training_validation_split = int(len(sorted_questions) * 0.15)
training_ques = sorted_questions[training_validation_split:]
training_answ = sorted_answers[training_validation_split:]
validation_ques = sorted_questions[:training_validation_split]
validation_answ = sorted_answers[:training_validation_split]

In [None]:
batch_training_loss = 100
batch_valid_loss = ((len(training_ques)) // batch_size // 2) - 1
total_training_loss_error = 0
list_validation_loss_error = []
early_stop = 0
early_stopping_stop = 1000
checkpoint = "./chatbot_weights_smartbot.ckpt" 
session.run(tf.global_variables_initializer())
for epoch in range(1, epochs + 1):
    for batch_index, (padded_questions_in_batch, padded_answers_in_batch) in enumerate(split_into_batches(training_ques, training_answ, batch_size)):
        starting_time = time.time()
        _, batch_loss_error = session.run([optimizer_gradient_clipping, loss], {inputs: padded_questions_in_batch,
                                                                                               targets: padded_answers_in_batch,
                                                                                               lr: learning_rate,
                                                                                               sequence_length: padded_answers_in_batch.shape[1],
                                                                                               keep_prob: keep_probability})
        total_training_loss_error += batch_loss_error
        ending_time = time.time()
        batch_time = ending_time - starting_time
        if batch_index % batch_training_loss == 0:
            print('Epoch: {:>3}/{}, Batch: {:>4}/{}, Loss Error: {:>6.3f}, Time on 100 Batches: {:d} seconds'.format(epoch,
                                                                                                                                       epochs,
                                                                                                                                       batch_index,
                                                                                                                                       len(training_ques) // batch_size,
                                                                                                                                       total_training_loss_error / batch_training_loss,
                                                                                                                                       int(batch_time * batch_training_loss)))
            total_training_loss_error = 0
        if batch_index % batch_valid_loss == 0 and batch_index > 0:
            total_validation_loss_error = 0
            starting_time = time.time()
            for batch_index_validation, (padded_questions_in_batch, padded_answers_in_batch) in enumerate(split_into_batches(validation_ques, validation_answ, batch_size)):
                batch_validation_loss_error = session.run(loss_error, {inputs: padded_questions_in_batch,
                                                                       targets: padded_answers_in_batch,
                                                                       lr: learning_rate,
                                                                       sequence_length: padded_answers_in_batch.shape[1],
                                                                       keep_prob: 1})
                total_validation_loss_error += batch_validation_loss_error
            ending_time = time.time()
            batch_time = ending_time - starting_time
            average_validation_loss_error = total_validation_loss_error / (len(validation_ques) / batch_size)
            print('Loss Error: {:>6.3f}, Batch Time: {:d} seconds'.format(average_validation_loss_error, int(batch_time)))
            learning_rate *= learning_rate_decay
            if learning_rate < min_learning_rate:
                learning_rate = min_learning_rate
            list_validation_loss_error.append(average_validation_loss_error)
            if average_validation_loss_error <= min(list_validation_loss_error):
                print('I am improving!')
                early_stop = 0
                saver = tf.train.Saver()
                saver.save(session, checkpoint)
            else:
                print("Sorry I need more training.")
                early_stop += 1
                if early_stop == early_stopping_stop:
                    break
    if early_stop == early_stopping_stop:
        print("Smartbot is ready. Training is complete!")
        break
print("Smartbot is tired! Goodbye!")

Epoch:   1/3, Batch:    0/2708, Loss Error:  0.089, Time on 100 Batches: 797 seconds


In [57]:
batch_training_loss = 100
batch_valid_loss = ((len(training_ques)) // batch_size // 2) - 1
total_training_loss_error = 0
list_validation_loss_error = []
early_stop = 0
early_stopping_stop = 1000
checkpoint = "./chatbot_weights.ckpt" 
session.run(tf.global_variables_initializer())

In [33]:
checkpoint = "./chatbot_weights.ckpt"
session = tf.InteractiveSession()
session.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(session, checkpoint)

In [34]:
def convert_words_to_int(question, words_hash):
    """
    fucntion: convert_words_to_int
    parms: questions string, words_hash hash table
    does: converts words to ints
    returns: list ints
    """
    question = clean(question)
    return [words_hash.get(word, words_hash['<OUT>']) for word in question.split()]

In [35]:
while(True):
    question = input("User: ")
    if question == 'Goodbye':
        break
    question = convert_words_to_int(question, questions_mapping)
    question = question + [questions_mapping['<PAD>']] * (25 - len(question))
    fake_batch = np.zeros((batch_size, 25))
    fake_batch[0] = question
    predicted_answer = session.run(test_preds, {inputs: fake_batch, keep_prob: 0.5})[0]
    answer = ''
    for i in np.argmax(predicted_answer, 1):
        if inverse_answers[i] == 'i':
            token = ' I'
        elif inverse_answers[i] == '<EOS>':
            token = '.'
        elif inverse_answers[i] == '<OUT>':
            token = 'out'
        else:
            token = ' ' + inverse_answers[i]
        answer += token
        if token == '.':
            break
    print('Smartbot: ' + answer)

User: Hello
Smartbot:  girl's sod hallout.
User: What is the weather?
Smartbot:  girl's sod invisible hallout.
User: hmmm... needs more training?
Smartbot:  girl's sodout.
User: Goodbye


# End of SmartBot!