In [1]:
import tensorflow as tf
import numpy as np
from collections import Counter
import pickle
import time
from IPython.display import clear_output, Image, display, HTML

tf.reset_default_graph()

In [2]:
special_words = ['GO','PAD','EOS','UNK']

In [3]:
def build_dataset(filename, appendEOS = False, EOS = None):
    with open(filename) as f:
        lines = f.read().lower().split("\n")
    
    all_words = ' '.join(lines).split()
    vocabulary = list(set(all_words))
    vocabulary_size = len(vocabulary) + len(special_words)
    
    dictionary = dict()
    for w in special_words:
        dictionary[w] = len(dictionary)
    
    for w in vocabulary:
        dictionary[w] = len(dictionary)
        
    reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    
    data = []
    for line in lines:
        line_data = []
        for w in line.split():
            index = dictionary.get(w, dictionary.get("UNK"))
            line_data.append(index)
        if appendEOS:
            line_data.append(EOS)
        data.append(line_data)
        
    return data, vocabulary_size, dictionary, reversed_dictionary

In [4]:
from_data, from_vocabulary_size,from_dictionary,from_reversed_dictionary  = build_dataset("./data/from.txt")

In [5]:
GO = from_dictionary['GO']
PAD = from_dictionary['PAD']
EOS = from_dictionary['EOS']
UNK = from_dictionary['UNK']

embedded_size = 128
batch_size = 20
num_hidden_layers = 2
rnn_size = 128
learning_rate = 0.01
epochs = 130

In [6]:
to_data, to_vocabulary_size,to_dictionary,to_reversed_dictionary  = build_dataset("./data/to.txt")

In [7]:

pickle.dump((from_data,from_vocabulary_size,from_dictionary,from_reversed_dictionary),
            open('./data/from.pickle', 'wb'))
a,b,c,d = pickle.load(open('./data/from.pickle',"rb"))


In [8]:
X = tf.placeholder(tf.int32, [None, None])
Y = tf.placeholder(tf.int32, [None, None])

X_seq_len = tf.placeholder(tf.int32, [None])
Y_seq_len = tf.placeholder(tf.int32, [None])


In [9]:
with tf.variable_scope("encoder_embeddings"):
    # encoder_embeddings
    encoder_embeddings = tf.Variable(tf.random_uniform([from_vocabulary_size, embedded_size], -1, 1))
    encoder_embedded = tf.nn.embedding_lookup(encoder_embeddings, X)
    
    # Slice input
    X_slice = tf.strided_slice(X, [0, 0], [batch_size, -1], [1, 1])

Instructions for updating:
Colocations handled automatically by placer.


In [10]:
with tf.variable_scope("decoder_embeddings"):
    # Add GO to the beging of the input
    decoder_input = tf.concat([tf.fill([batch_size, 1], GO), X_slice], 1)
    
    # decoder_embeddings
    decoder_embeddings = tf.Variable(tf.random_uniform([to_vocabulary_size, embedded_size], -1, 1))
    decoder_embedded = tf.nn.embedding_lookup(decoder_embeddings, decoder_input)

In [11]:
# encoder
with tf.variable_scope("encoder"):
    rnn_cells = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.LSTMCell(rnn_size,initializer=tf.orthogonal_initializer(),reuse=False) for _ in range(num_hidden_layers)])

    _, last_state = tf.nn.dynamic_rnn(rnn_cells, encoder_embedded, dtype = tf.float32)


Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API


In [12]:
# decoder
with tf.variable_scope("decoder"):
    rnn_cells_dec = tf.nn.rnn_cell.MultiRNNCell(
        [tf.nn.rnn_cell.LSTMCell(rnn_size,initializer=tf.orthogonal_initializer(),reuse=False) for _ in range(num_hidden_layers)])
    
    outputs, _ = tf.nn.dynamic_rnn(rnn_cells_dec, decoder_embedded, 
                                           initial_state = last_state,
                                           dtype = tf.float32)


In [13]:
with tf.variable_scope("logits"):
    # predictions
    logits = tf.layers.dense(outputs, to_vocabulary_size)
    
# Returns a mask tensor representing the first N positions of each cell.
# it's used to remove pads
masks = tf.sequence_mask(Y_seq_len, tf.reduce_max(Y_seq_len), dtype=tf.float32)

Instructions for updating:
Use keras.layers.dense instead.


In [14]:
with tf.variable_scope("cost"):
    # Loss function - weighted softmax cross entropy
    cost = tf.contrib.seq2seq.sequence_loss(logits = logits,targets = Y,weights = masks)
with tf.variable_scope("optimizer"):
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)



For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Use tf.cast instead.


In [15]:
#Training
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [16]:
def pad_sentences_eos(sentences):
    max_len = 50
    return [sentence + [EOS] + [PAD] * (max_len - 1 - len(sentence)) for sentence in sentences]

In [17]:
def predict(sentence):
    X_in = []
    for word in sentence.split():
            X_in.append(from_dictionary[word])
        
    test = pad_sentences_eos([X_in])
    input_batch = np.zeros([batch_size,len(test[0])])
    input_batch[0] = test[0] 
        
    log = sess.run(tf.argmax(logits,2), 
                                      feed_dict={
                                              X:input_batch,
                                              X_seq_len:[len(sentence)],
                                              Y_seq_len:[len(sentence)]
                                              }
                                      )
 
    result = 'User: '+sentence+'\n' + 'Bot: '
    result  += ' '.join(to_reversed_dictionary[i] for i in log[0])
    if ( result.find('EOS') == -1 ):
        return 'Sorry i cannot understand you\n'
    return result[:result.index('EOS')] + '\n'

In [18]:
def check_accuracy(predicted, Y):
    acc = 0
    for i in range(predicted.shape[0]):
        internal_acc = 0
        for k in range(len(Y[i])):
            if Y[i][k] == predicted[i][k]:
                internal_acc += 1
        acc += (internal_acc / len(Y[i]))
    return acc / predicted.shape[0]

In [19]:
        
for i in range(0, epochs):
    total_loss, total_accuracy = 0, 0    
    for k in range(0, (len(from_data) // batch_size) * batch_size, batch_size):
        
                # Slice the right amount for the batch
                sources_batch = from_data[k:k + batch_size]
                targets_batch = to_data[k:k + batch_size]
        
                # Pad
                pad_sources_batch = pad_sentences_eos(sources_batch)
                pad_targets_batch = pad_sentences_eos(targets_batch)
        
                # length
                pad_sources_batch_length = [len(z) for z in pad_sources_batch]
                pad_targets_batch_length = [len(z) for z in pad_sources_batch]
        
                # arg max refers to the inputs, or arguments, at which the function outputs are as large as possible
                decoderPrediction = tf.argmax(logits, 2)
                
                predicted, loss, _ = sess.run([decoderPrediction, cost, optimizer], 
                                      feed_dict={X:pad_sources_batch,
                                                Y:pad_targets_batch,
                                                X_seq_len:pad_sources_batch_length,
                                                Y_seq_len:pad_targets_batch_length})
        
        
                total_loss += loss
                total_accuracy += check_accuracy(predicted,pad_targets_batch)
    
    total_loss /= (len(from_data) // batch_size)
    total_accuracy /= (len(from_data) // batch_size)
    print('epoch: %d, avg loss: %f, avg accuracy: %f'%(i, total_loss, total_accuracy))
print(predict('how are you'))


epoch: 0, avg loss: 4.135520, avg accuracy: 0.002000
epoch: 1, avg loss: 1.323181, avg accuracy: 0.870000
epoch: 2, avg loss: 0.829799, avg accuracy: 0.870000
epoch: 3, avg loss: 0.824497, avg accuracy: 0.870000
epoch: 4, avg loss: 0.704638, avg accuracy: 0.877000
epoch: 5, avg loss: 0.633479, avg accuracy: 0.878000
epoch: 6, avg loss: 0.672157, avg accuracy: 0.878000
epoch: 7, avg loss: 0.620989, avg accuracy: 0.873000
epoch: 8, avg loss: 0.589924, avg accuracy: 0.873000
epoch: 9, avg loss: 0.580789, avg accuracy: 0.879000
epoch: 10, avg loss: 0.573334, avg accuracy: 0.883000
epoch: 11, avg loss: 0.563481, avg accuracy: 0.884000
epoch: 12, avg loss: 0.551244, avg accuracy: 0.884000
epoch: 13, avg loss: 0.542167, avg accuracy: 0.884000
epoch: 14, avg loss: 0.533211, avg accuracy: 0.883000
epoch: 15, avg loss: 0.522701, avg accuracy: 0.883000
epoch: 16, avg loss: 0.512690, avg accuracy: 0.886000
epoch: 17, avg loss: 0.503954, avg accuracy: 0.891000
epoch: 18, avg loss: 0.493697, avg acc

In [24]:
print(predict('hi'))
print(predict('how are you'))
print(predict('what is your name'))
print(predict('where do you live'))
print(predict('where is your house'))

User: hi
Bot: good there 

User: how are you
Bot: i am thank thank you 

User: what is your name
Bot: good name is papaya and what do you want me to call you dear sir or 

User: where do you live
Bot: i florida 



KeyError: 'house'

In [21]:
def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))

In [22]:
show_graph(tf.get_default_graph().as_graph_def())