In [1]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/cpu:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 3696521069324384001
, name: "/gpu:0"
device_type: "GPU"
memory_limit: 3866165248
locality {
  bus_id: 1
}
incarnation: 15544132128716349263
physical_device_desc: "device: 0, name: GeForce GTX 1050 Ti, pci bus id: 0000:01:00.0"
]


In [2]:
# Importing the libraries
import numpy as np
import tensorflow as tf
import re
import time
import pandas as pd
import random

In [3]:
path = '../dataset/cornell movie-dialogs corpus/'

In [4]:
def clean_data(convs):
    conv = []
    for text in convs:
        text = text.lower()
        text = re.sub(r"i'm", "i am", text)
        text = re.sub(r"he's", "he is", text)
        text = re.sub(r"she's", "she is", text)
        text = re.sub(r"that's", "that is", text)
        text = re.sub(r"what's", "what is", text)
        text = re.sub(r"where's", "where is", text)
        text = re.sub(r"there's", "there is", text)
        text = re.sub(r"how's", "how is", text)
        text = re.sub(r"let's", "let us", text)
        text = re.sub(r"it's", "it is", text)
        text = re.sub(r"\'ll", " will", text)
        text = re.sub(r"\'ve", " have", text)
        text = re.sub(r"\'re", " are", text)
        text = re.sub(r"\'d", " would", text)
        text = re.sub(r"n't", " not", text)
        text = re.sub(r"won't", "will not", text)
        text = re.sub(r"can't", "cannot", text)
        text = re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,]", "", text)
        conv.append(text)
    return conv

In [5]:
def filter_length(convs):
    final_convs = []
    for text in convs:
        if 2 <= len(text.split()) <= 30:
            final_convs.append(text)
            
    return final_convs

In [6]:

def extract_data():
    id2line = {}
    with open(path + 'movie_lines.txt', 'rb') as file:
        for line in file:
            list1 = str(line).split(" +++$+++ ")
            id2line[list1[0][2::]] = list1[-1][:-3:]

    # print(id2line)


    conversations = []
    with open(path + 'movie_conversations.txt', 'r') as file:
        for line in file:
            list1 = str(line).split(" +++$+++ ")
            list_of_conv = list1[-1].replace("'", "").replace("[","").replace("]","").replace("\n","").replace(" ", "").split(",")
            conversations.append([id2line[x] for x in list_of_conv])

    #print(conversations)
    encoders = []
    decoders = []

    for convs in conversations:
        
        clean_d = clean_data(convs)
        conv = filter_length(clean_d)
        
        if len(conv) %2 != 0:
            conv = conv[:-1]
        for i in range(len(conv)):
            if i%2 == 0:
                encoders.append(conv[i])
            else:
                decoders.append(conv[i])


    TESTSET_SIZE = 30000
    train_enc = open(path + 'train.enc','w')
    train_dec = open(path + 'train.dec','w')
    test_enc  = open(path + 'test.enc', 'w')
    test_dec  = open(path + 'test.dec', 'w')

    # choose 30,000 (TESTSET_SIZE) items to put into testset
    test_ids = random.sample([i for i in range(len(encoders))],TESTSET_SIZE)

    for i in range(len(encoders)):
        if i in test_ids:
            test_enc.write(encoders[i]+'\n')
            test_dec.write(decoders[i]+ '\n' )
        else:
            train_enc.write(encoders[i]+'\n')
            train_dec.write(decoders[i]+ '\n' )
        if i%10000 == 0:
            print('\n>> written %d lines' %(i))

    # close files
    train_enc.close()
    train_dec.close()
    test_enc.close()
    test_dec.close()


In [7]:
questions = []
answers = []
with open(path + "train.enc") as f:
    for line in f.readlines():
        questions.append(line)

with open(path + "train.dec") as f:
    for line in f.readlines():
        answers.append(line)

In [8]:
word2count = {}
for question in questions:
    for word in question.split():
        if word not in word2count:
            word2count[word] = 1
        else:
            word2count[word] += 1
for answer in answers:
    for word in answer.split():
        if word not in word2count:
            word2count[word] = 1
        else:
            word2count[word] += 1

In [9]:
# Creating two dictionaries that map the questions words and the answers words to a unique integer
threshold_questions = 15
questionswords2int = {}
word_number = 0
for word, count in word2count.items():
    if count >= threshold_questions:
        questionswords2int[word] = word_number
        word_number += 1
threshold_answers = 15
answerswords2int = {}
word_number = 0
for word, count in word2count.items():
    if count >= threshold_answers:
        answerswords2int[word] = word_number
        word_number += 1

In [10]:
# Adding the last tokens to these two dictionaries
tokens = ['<PAD>', '<EOS>', '<OUT>', '<SOS>']
for token in tokens:
    questionswords2int[token] = len(questionswords2int) + 1
for token in tokens:
    answerswords2int[token] = len(answerswords2int) + 1

In [11]:
# Creating the inverse dictionary of the answerswords2int dictionary
answersints2word = {w_i: w for w, w_i in answerswords2int.items()}

In [12]:
# Adding the End Of String token to the end of every answer
for i in range(len(answers)):
    answers[i] += ' <EOS>'

In [13]:
print(questions[1])
print(answers[1])

no no it is my fault  we did not have a proper introduction 

the thing is cameron  i am at the mercy of a particularly hideous breed of loser  my sister  i ca not date until she does
 <EOS>


In [14]:
# Translating all the questions and the answers into integers
# and Replacing all the words that were filtered out by <OUT> 
questions_into_int = []
for question in questions:
    ints = []
    for word in question.split():
        if word not in questionswords2int:
            ints.append(questionswords2int['<OUT>'])
        else:
            ints.append(questionswords2int[word])
    questions_into_int.append(ints)
answers_into_int = []
for answer in answers:
    ints = []
    for word in answer.split():
        if word not in answerswords2int:
            ints.append(answerswords2int['<OUT>'])
        else:
            ints.append(answerswords2int[word])
    answers_into_int.append(ints)

In [15]:
print(questions_into_int[1])
print(answers_into_int[1])

[15, 15, 16, 17, 18, 19, 1, 20, 21, 22, 23, 24, 2682]
[13, 500, 17, 2682, 52, 121, 56, 13, 1684, 46, 23, 1204, 2682, 2682, 46, 1777, 18, 247, 52, 164, 21, 34, 256, 42, 115, 2681]


In [16]:
# Creating placeholders for the inputs and the targets
def model_inputs():
    inputs = tf.placeholder(tf.int32, [None, None], name = 'input')
    targets = tf.placeholder(tf.int32, [None, None], name = 'target')
    lr = tf.placeholder(tf.float32, name = 'learning_rate')
    keep_prob = tf.placeholder(tf.float32, name = 'keep_prob')
    return inputs, targets, lr, keep_prob

In [17]:
# Preprocessing the targets
def preprocess_targets(targets, word2int, batch_size):
    left_side = tf.fill([batch_size, 1], word2int['<SOS>'])
    right_side = tf.strided_slice(targets, [0,0], [batch_size, -1], [1,1])
    preprocessed_targets = tf.concat([left_side, right_side], 1)
    return preprocessed_targets

In [18]:
# Creating the Encoder RNN
def encoder_rnn(rnn_inputs, rnn_size, num_layers, keep_prob, sequence_length):
    lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
    lstm_dropout = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob = keep_prob)
    encoder_cell = tf.contrib.rnn.MultiRNNCell([lstm_dropout] * num_layers)
    encoder_output, encoder_state = tf.nn.bidirectional_dynamic_rnn(cell_fw = encoder_cell,
                                                                    cell_bw = encoder_cell,
                                                                    sequence_length = sequence_length,
                                                                    inputs = rnn_inputs,
                                                                    dtype = tf.float32)
    return encoder_state

In [19]:
# Decoding the training set
def decode_training_set(encoder_state, decoder_cell, decoder_embedded_input, sequence_length, decoding_scope, output_function, keep_prob, batch_size):
    attention_states = tf.zeros([batch_size, 1, decoder_cell.output_size])
    attention_keys, attention_values, attention_score_function, attention_construct_function = tf.contrib.seq2seq.prepare_attention(attention_states, attention_option = "bahdanau", num_units = decoder_cell.output_size)
    training_decoder_function = tf.contrib.seq2seq.attention_decoder_fn_train(encoder_state[0],
                                                                              attention_keys,
                                                                              attention_values,
                                                                              attention_score_function,
                                                                              attention_construct_function,
                                                                              name = "attn_dec_train")
    decoder_output, decoder_final_state, decoder_final_context_state = tf.contrib.seq2seq.dynamic_rnn_decoder(decoder_cell,
                                                                                                              training_decoder_function,
                                                                                                              decoder_embedded_input,
                                                                                                              sequence_length,
                                                                                                              scope = decoding_scope)
    decoder_output_dropout = tf.nn.dropout(decoder_output, keep_prob)
    return output_function(decoder_output_dropout)

# Decoding the test/validation set
def decode_test_set(encoder_state, decoder_cell, decoder_embeddings_matrix, sos_id, eos_id, maximum_length, num_words, decoding_scope, output_function, keep_prob, batch_size):
    attention_states = tf.zeros([batch_size, 1, decoder_cell.output_size])
    attention_keys, attention_values, attention_score_function, attention_construct_function = tf.contrib.seq2seq.prepare_attention(attention_states, attention_option = "bahdanau", num_units = decoder_cell.output_size)
    test_decoder_function = tf.contrib.seq2seq.attention_decoder_fn_inference(output_function,
                                                                              encoder_state[0],
                                                                              attention_keys,
                                                                              attention_values,
                                                                              attention_score_function,
                                                                              attention_construct_function,
                                                                              decoder_embeddings_matrix,
                                                                              sos_id,
                                                                              eos_id,
                                                                              maximum_length,
                                                                              num_words,
                                                                              name = "attn_dec_inf")
    test_predictions, decoder_final_state, decoder_final_context_state = tf.contrib.seq2seq.dynamic_rnn_decoder(decoder_cell,
                                                                                                                test_decoder_function,
                                                                                                                scope = decoding_scope)
    return test_predictions

# Creating the Decoder RNN
def decoder_rnn(decoder_embedded_input, decoder_embeddings_matrix, encoder_state, num_words, sequence_length, rnn_size, num_layers, word2int, keep_prob, batch_size):
    with tf.variable_scope("decoding") as decoding_scope:
        lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
        lstm_dropout = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob = keep_prob)
        decoder_cell = tf.contrib.rnn.MultiRNNCell([lstm_dropout] * num_layers)
        weights = tf.truncated_normal_initializer(stddev = 0.1)
        biases = tf.zeros_initializer()
        output_function = lambda x: tf.contrib.layers.fully_connected(x,
                                                                      num_words,
                                                                      None,
                                                                      scope = decoding_scope,
                                                                      weights_initializer = weights,
                                                                      biases_initializer = biases)
        training_predictions = decode_training_set(encoder_state,
                                                   decoder_cell,
                                                   decoder_embedded_input,
                                                   sequence_length,
                                                   decoding_scope,
                                                   output_function,
                                                   keep_prob,
                                                   batch_size)
        decoding_scope.reuse_variables()
        test_predictions = decode_test_set(encoder_state,
                                           decoder_cell,
                                           decoder_embeddings_matrix,
                                           word2int['<SOS>'],
                                           word2int['<EOS>'],
                                           sequence_length - 1,
                                           num_words,
                                           decoding_scope,
                                           output_function,
                                           keep_prob,
                                           batch_size)
    return training_predictions, test_predictions

#Now that we have both the encoder RNN and the decoder RNN we'll use them to build our seq2seq model.

# Building the seq2seq model
def seq2seq_model(inputs, targets, keep_prob, batch_size, sequence_length, answers_num_words, questions_num_words, encoder_embedding_size, decoder_embedding_size, rnn_size, num_layers, questionswords2int):
    encoder_embedded_input = tf.contrib.layers.embed_sequence(inputs,
                                                              answers_num_words + 1,
                                                              encoder_embedding_size,
                                                              initializer = tf.random_uniform_initializer(0, 1))
    encoder_state = encoder_rnn(encoder_embedded_input, rnn_size, num_layers, keep_prob, sequence_length)
    preprocessed_targets = preprocess_targets(targets, questionswords2int, batch_size)
    decoder_embeddings_matrix = tf.Variable(tf.random_uniform([questions_num_words + 1, decoder_embedding_size], 0, 1))
    decoder_embedded_input = tf.nn.embedding_lookup(decoder_embeddings_matrix, preprocessed_targets)
    training_predictions, test_predictions = decoder_rnn(decoder_embedded_input,
                                                         decoder_embeddings_matrix,
                                                         encoder_state,
                                                         questions_num_words,
                                                         sequence_length,
                                                         rnn_size,
                                                         num_layers,
                                                         questionswords2int,
                                                         keep_prob,
                                                         batch_size)
    return training_predictions, test_predictions


In [20]:
# Setting the Hyperparameters
epochs = 10
batch_size = 200
rnn_size = 512
num_layers = 5
encoding_embedding_size = 512
decoding_embedding_size = 512
learning_rate = 0.001
learning_rate_decay = 0.9
min_learning_rate = 0.0001
keep_probability = 0.5

In [21]:
# Defining a session
tf.reset_default_graph()
session = tf.InteractiveSession()

# Loading the model inputs
inputs, targets, lr, keep_prob = model_inputs()

# Setting the sequence length
sequence_length = tf.placeholder_with_default(25, None, name = 'sequence_length')

# Getting the shape of the inputs tensor
input_shape = tf.shape(inputs)


In [22]:
# Getting the training and test predictions

training_predictions, test_predictions = seq2seq_model(tf.reverse(inputs, [-1]),
                                                       targets,
                                                       keep_prob,
                                                       batch_size,
                                                       sequence_length,
                                                       len(answerswords2int),
                                                       len(questionswords2int),
                                                       encoding_embedding_size,
                                                       decoding_embedding_size,
                                                       rnn_size,
                                                       num_layers,
                                                       questionswords2int)

In [23]:
# Setting up the Loss Error, the Optimizer and Gradient Clipping
with tf.name_scope("optimization"):
    loss_error = tf.contrib.seq2seq.sequence_loss(training_predictions,
                                                  targets,
                                                  tf.ones([input_shape[0], sequence_length]))
    optimizer = tf.train.AdamOptimizer(learning_rate)
    gradients = optimizer.compute_gradients(loss_error)
    clipped_gradients = [(tf.clip_by_value(grad_tensor, -5., 5.), grad_variable) for grad_tensor, grad_variable in gradients if grad_tensor is not None]
    optimizer_gradient_clipping = optimizer.apply_gradients(clipped_gradients)

In [24]:
# Padding the sequences with the <PAD> token
def apply_padding(batch_of_sequences, word2int):
    max_sequence_length = max([len(sequence) for sequence in batch_of_sequences])
    return [sequence + [word2int['<PAD>']]* (max_sequence_length - len(sequence)) for sequence in batch_of_sequences]

In [25]:
# Splitting the data into batches of questions and answers
def split_into_batches(questions, answers, batch_size):
    for batch_index in range(0, len(questions) // batch_size):
        start_index = batch_index * batch_size
        questions_in_batch = questions[start_index : start_index + batch_size]
        answers_in_batch = answers[start_index : start_index + batch_size]
        padded_questions_in_batch = np.array(apply_padding(questions_in_batch, questionswords2int))
        padded_answers_in_batch = np.array(apply_padding(answers_in_batch, answerswords2int))
        yield padded_questions_in_batch, padded_answers_in_batch

In [26]:
# Splitting the questions and answers into training and validation sets
training_validation_split = int(len(questions) * 0.15)
training_questions = questions_into_int[training_validation_split:]
training_answers = answers_into_int[training_validation_split:]
validation_questions = questions_into_int[:training_validation_split]
validation_answers = answers_into_int[:training_validation_split]

In [27]:
# Training
batch_index_check_training_loss = 100
batch_index_check_validation_loss = ((len(training_questions)) // batch_size // 2) - 1
total_training_loss_error = 0
list_validation_loss_error = []
early_stopping_check = 0
early_stopping_stop = 1000
checkpoint = path + "chatbot_weights.ckpt"
session.run(tf.global_variables_initializer())

In [28]:
# Training

batch_index_check_training_loss = 100
batch_index_check_validation_loss = ((len(training_questions)) // batch_size // 2) - 1
total_training_loss_error = 0
list_validation_loss_error = []
early_stopping_check = 0
early_stopping_stop = 1000
checkpoint = path + "chatbot_weights.ckpt"
session.run(tf.global_variables_initializer())
for epoch in range(1, epochs + 1):
    for batch_index, (padded_questions_in_batch, padded_answers_in_batch) in enumerate(split_into_batches(training_questions, training_answers, batch_size)):
        starting_time = time.time()
        batch_training_loss_errors = session.run([optimizer_gradient_clipping, loss_error],{inputs: padded_questions_in_batch, targets: padded_answers_in_batch, lr: learning_rate, sequence_length: padded_answers_in_batch.shape[1], keep_prob: keep_probability})
        batch_training_loss_error = batch_training_loss_errors[1]
        print(batch_training_loss_error)
        total_training_loss_error += batch_training_loss_error
        ending_time = time.time()
        batch_time = ending_time - starting_time
        if batch_index % batch_index_check_training_loss == 0:
            print('Epoch: {:>3}/{}, Batch: {:>4}/{}, Training Loss Error: {:>6.3f}, Training Time on 100 Batches: {:d} seconds'.format(epoch,
                                                                                                                                       epochs,
                                                                                                                                       batch_index,
                                                                                                                                       len(training_questions) // batch_size,
                                                                                                                                       total_training_loss_error / batch_index_check_training_loss,
                                                                                                                                       int(batch_time * batch_index_check_training_loss)))
            total_training_loss_error = 0
        if batch_index % batch_index_check_validation_loss == 0 and batch_index > 0:
            total_validation_loss_error = 0
            starting_time = time.time()
            for batch_index_validation, (padded_questions_in_batch, padded_answers_in_batch) in enumerate(split_into_batches(validation_questions, validation_answers, batch_size)):
                batch_validation_loss_error = session.run(loss_error, {inputs: padded_questions_in_batch,
                                                                       targets: padded_answers_in_batch,
                                                                       lr: learning_rate,
                                                                       sequence_length: padded_answers_in_batch.shape[1],
                                                                       keep_prob: 1})
                total_validation_loss_error += batch_validation_loss_error
            ending_time = time.time()
            batch_time = ending_time - starting_time
            average_validation_loss_error = total_validation_loss_error / (len(validation_questions) / batch_size)
            print('Validation Loss Error: {:>6.3f}, Batch Validation Time: {:d} seconds'.format(average_validation_loss_error, int(batch_time)))
            learning_rate *= learning_rate_decay
            if learning_rate < min_learning_rate:
                learning_rate = min_learning_rate
            list_validation_loss_error.append(average_validation_loss_error)
            if average_validation_loss_error <= min(list_validation_loss_error):
                print('I speak better now!!')
                early_stopping_check = 0
                saver = tf.train.Saver()
                saver.save(session, checkpoint)
            else:
                print("Sorry I do not speak better, I need to practice more.")
                early_stopping_check += 1
                if early_stopping_check == early_stopping_stop:
                    break
    if early_stopping_check == early_stopping_stop:
        print("My apologies, I cannot speak better anymore. This is the best I can do.")
        break
print("Game Over")

7.8862514
Epoch:   1/10, Batch:    0/161, Training Loss Error:  0.079, Training Time on 100 Batches: 124 seconds
5.617066
5.2972474
3.2152195
4.3598475
3.168849
2.9504247
3.2844799
2.4666042
2.4919713
2.5107875
2.7537305
2.493053
2.5523977
2.3941803
2.6658041
2.5992239
2.3773713
2.6943977
2.6216278
2.3781357
2.5950842
2.7951267
2.2950833
2.360455
2.1624236
2.1825795
2.281942
2.0776675
2.3054476
2.261247
2.0158615
1.9809529
2.0945783
2.016881
2.683223
2.1090534
2.4279785
2.1707423
2.3007677
1.8343419
2.118326
1.837468
2.4132328
2.3157463
2.1947303
3.0831695
2.378959
2.1718001
2.1070669
2.150984
2.7158237
1.9260279
2.258096
2.0943503
2.2563376
2.279701
2.1321602
2.0782459
2.4891722
2.4079847
1.912127
2.3412812
2.4223092
2.2449982
1.889391
2.1587431
2.3004515
2.625587
2.3609798
1.9253596
2.4199238
2.2198138
2.0683856
2.184271
2.2191231
1.9968404
2.2704563
1.8054627
2.2975576
Validation Loss Error:  2.077, Batch Validation Time: 8 seconds
I speak better now!!
2.417588
2.8303742
2.0055592
2

1.7191671
1.5358166
1.4544157
1.6044387
1.5393308
2.0364034
1.6223773
1.8811123
1.6471316
1.7697561
1.4038365
1.6495581
1.4010878
1.7421412
1.7365495
1.673135
2.38072
1.800456
1.5430545
1.4933891
1.6570352
2.061416
1.4512265
1.6931758
1.577569
1.692321
1.7608318
1.6818393
1.5554351
1.9247929
1.873421
1.4703051
1.784062
1.8391343
1.7354863
1.4732386
1.682557
1.7930336
2.045377
1.8254943
1.4224519
1.9068512
1.7144814
1.5795466
1.6808002
1.7256104
1.5296929
1.7156267
1.3834436
1.7832896
Validation Loss Error:  1.638, Batch Validation Time: 8 seconds
I speak better now!!
1.8700981
2.2511718
1.4987217
1.8993322
1.8542337
1.7612207
1.5992143
1.8603944
1.6639906
1.7629876
1.5946159
1.5627254
1.7485598
1.8798001
1.6876163
1.6594183
1.7828647
1.645161
1.5321541
1.6507171
1.6825856
Epoch:   5/10, Batch:  100/161, Training Loss Error:  1.725, Training Time on 100 Batches: 87 seconds
1.6605706
1.347811
1.7229034
1.5603935
1.7222985
1.5608934
1.5385451
1.6783557
1.827502
1.9169846
1.5783813
1.98078

1.3862754
1.5845618
1.6761101
1.9263896
1.7191637
1.3304553
1.8052671
1.6316534
1.4967133
1.571353
1.6187487
1.4334177
1.6090307
1.3072065
1.677396
Validation Loss Error:  1.545, Batch Validation Time: 8 seconds
I speak better now!!
1.7413223
2.122792
1.4004749
1.784185
1.7578367
1.6601355
1.5242469
1.7472802
1.5557327
1.6624727
1.4850094
1.4724233
1.6540034
1.761316
1.5875505
1.5469112
1.6856465
1.5360141
1.4461958
1.5463222
1.5738672
Epoch:   9/10, Batch:  100/161, Training Loss Error:  1.615, Training Time on 100 Batches: 88 seconds
1.5544881
1.2702503
1.6196831
1.4645222
1.6238881
1.4760706
1.4398676
1.5781877
1.7238955
1.8187854
1.4965327
1.8610823
1.5041641
1.4106965
1.4892565
1.8370339
1.5934244
1.6322145
1.6739533
1.7005147
1.5601429
1.643641
1.8196589
1.5664117
1.3651301
1.5351056
1.6784868
1.8027171
1.747512
1.3490235
1.5776907
1.6759783
1.4141952
1.6374688
1.4831305
1.8848504
1.5316693
1.2195606
1.7517061
1.3520027
1.4496771
1.5138204
1.6350774
1.6514362
1.69641
1.5950907
1.

In [32]:
# Loading the weights and Running the session
checkpoint = path + "chatbot_weights.ckpt"
session = tf.InteractiveSession()
session.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(session, checkpoint)

In [33]:
# Converting the questions from strings to lists of encoding integers
def convert_string2int(question, word2int):
    question = clean_data(question)
    return [word2int.get(word, word2int['<OUT>']) for word in question]

In [34]:
# Setting up the chat
while(True):
    question = input("You: ")
    if question == 'Goodbye':
        break
    question = convert_string2int(question, questionswords2int)
    question = question + [questionswords2int['<PAD>']] * (25 - len(question))
    fake_batch = np.zeros((batch_size, 25))
    fake_batch[0] = question
    predicted_answer = session.run(test_predictions, {inputs: fake_batch, keep_prob: 0.5})[0]
    answer = ''
    for i in np.argmax(predicted_answer, 1):
        if answersints2word[i] == 'i':
            token = ' I'
        elif answersints2word[i] == '<EOS>':
            token = '.'
        elif answersints2word[i] == '<OUT>':
            token = 'out'
        else:
            token = ' ' + answersints2word[i]
        answer += token
        if token == '.':
            break
    print('ChatBot: ' + answer)
    

You: hello
ChatBot:  I am notout.
You: what
ChatBot:  I am not aout.
You: okay
ChatBot:  I know you.
You: how do you know me
ChatBot:  I am notout.
You: okay
ChatBot:  I am not aout.
You: why
ChatBot:  I am notout.
You: well gotta go
ChatBot:  I am not aout.
You: bye bye
ChatBot:  I am not aout.
You: Goodbye
