# RNN bot

In [1]:
import numpy as np
import tensorflow as tf

In [2]:
sentences = [['hi'],
             ['hello'],
             ['hi', 'there', 'how', 'are', 'you'],
             ['i', 'am', 'fine', 'how', 'do', 'you', 'do'],
             ['i', 'am', 'doing', 'fine'],
             ['what', 'is', 'your', 'name'],
             ['hi', 'my', 'name', 'is', 'strange'],
             ['that', 'is', 'indeed', 'strange'],
             ['hello', 'darkness', 'my', 'old', 'friend'],
             ['darkness', 'is', 'strange']]

In [3]:
person1 = []
person2 = []
for i in range(len(sentences)):
    if i%2 == 0:
        person1.append(sentences[i])
    else:
        person2.append(sentences[i])

In [4]:
person1

[['hi'],
 ['hi', 'there', 'how', 'are', 'you'],
 ['i', 'am', 'doing', 'fine'],
 ['hi', 'my', 'name', 'is', 'strange'],
 ['hello', 'darkness', 'my', 'old', 'friend']]

In [5]:
person2

[['hello'],
 ['i', 'am', 'fine', 'how', 'do', 'you', 'do'],
 ['what', 'is', 'your', 'name'],
 ['that', 'is', 'indeed', 'strange'],
 ['darkness', 'is', 'strange']]

In [6]:
words = []
[[words.append(w) for w in sent] for sent in sentences];
vocab = list(set(words))
vocab_size = len(vocab)

In [7]:
vocab_size

22

In [8]:
id_to_word = dict(enumerate(vocab))
word_to_id = {v:k for k,v in id_to_word.items()}

In [9]:
id_to_word[22] = ''

In [10]:
seq_len = max([len(s) for s in sentences])

In [11]:
seq_len

7

In [12]:
for i in range(len(person1)):
    person1[i] = [word_to_id[w] for w in person1[i]] + [vocab_size] * (seq_len - len(person1[i]))
    person2[i] = [word_to_id[w] for w in person2[i]] + [vocab_size] * (seq_len - len(person2[i]))

In [13]:
person1 = np.array(person1)

In [14]:
person2 = np.array(person2)

In [15]:
vec = np.zeros((vocab_size+1, vocab_size+1), dtype=np.float64)
for i in range(vocab_size+1):
    vec[i,i] = 1.0

In [16]:
input_shape = output_shape = vocab_size+1
hidden_shape = 64
learning_rate = 0.001
batch_size = person1.shape[0]

In [17]:
word_to_id

{'am': 18,
 'are': 19,
 'darkness': 8,
 'do': 12,
 'doing': 5,
 'fine': 3,
 'friend': 20,
 'hello': 11,
 'hi': 13,
 'how': 7,
 'i': 14,
 'indeed': 6,
 'is': 2,
 'my': 9,
 'name': 0,
 'old': 21,
 'strange': 17,
 'that': 10,
 'there': 15,
 'what': 16,
 'you': 1,
 'your': 4}

In [18]:
a = np.array([[13, 7, 19, 1, 22, 22, 22]])
[id_to_word[i] for i in a[0]]

['hi', 'how', 'are', 'you', '', '', '']

In [27]:
tf.reset_default_graph()
with tf.Graph().as_default() as graph:
    
    # placeholders for input and output
    X = tf.placeholder(shape=[None,seq_len], dtype=tf.int32, name="input")
    Y = tf.placeholder(shape=[None,seq_len], dtype=tf.int32, name="target")
    
    # embedding tensor
    embed = tf.constant(vec, name="embeddings", dtype=tf.float64)
    
    # input embeddings
    X_embed = tf.nn.embedding_lookup(embed, X, name="input_embeddings")
    X_embed = tf.transpose(X_embed, perm=[1,0,2])
    
    # output embeddings
    Y_embed = tf.nn.embedding_lookup(embed, Y, name="output_embeddings")
    
    # initial hidden state
    h_in = tf.placeholder(shape=[None, hidden_shape], dtype=tf.float64)
    
    # RNN Cell
    def RNN(x_t, 
            h_prev, 
            input_shape=input_shape, 
            hidden_shape=hidden_shape, 
            output_shape=output_shape):
        with tf.variable_scope('RNN'):
            
            # RNN input weight
            W_xh = tf.get_variable(name="W_xh", shape=[input_shape, hidden_shape], 
                                   initializer=tf.random_normal_initializer(mean=0.0, 
                                                                            stddev=0.1), 
                                   dtype=tf.float64)
            
            # RNN hidden state weight
            W_hh = tf.get_variable(name="W_hh", shape=[hidden_shape, hidden_shape], 
                                   initializer=tf.random_normal_initializer(mean=0.0, 
                                                                            stddev=0.1), 
                                   dtype=tf.float64)
            
            # RNN output weight
            W_yh = tf.get_variable(name="W_yh", shape=[hidden_shape, output_shape], 
                                   initializer=tf.random_normal_initializer(mean=0.0, 
                                                                            stddev=0.1), 
                                   dtype=tf.float64)
            
            # hidden state
            h_t = tf.tanh(tf.matmul(x_t, W_xh) + tf.matmul(h_prev, W_hh))
            
            # output
            y_t = tf.nn.softmax(tf.matmul(h_t, W_yh))
            
            # reshape hidden state
            h_t = tf.reshape(h_t, shape=[-1, hidden_shape])
            
            y_t = tf.reshape(y_t, shape=[-1, output_shape])
            
            # return list of hidden state and output
            return [h_t, y_t]
    
    # helper function for encoder
    def encoder_helper(h_prev, x_t):
        with tf.variable_scope("encoder_helper"):
            
            # pass current input and previous hidden state to RNN cell
            h_t, y_ = RNN(x_t, h_prev)
            
            # return hidden state
            return h_t
    
    # encoder
    def encoder(X_in, h_in):
        with tf.variable_scope("encoder"):
            # scan encoder helper over inputs 
            out_encoder = tf.scan(encoder_helper, 
                                  X_in, 
                                  initializer=h_in)
            
            # return last hidden state i.e. thought vector
            return out_encoder[-1]
    
    # helper function for decoder
    def decoder_helper(inputs, # a list of previous hidden state and current input i.e. previous output
                       x_t): # no use extra variable just to iterate over the decoder
        with tf.variable_scope("decoder_helper"):
            
            # previous hidden state and previous output (i.e softmax probabilities)
            h_prev, y_prev, y_prev_one_hot = inputs[0], inputs[1], inputs[2] 
            
            # reshape preious hidden state
            h_prev = tf.reshape(h_prev, shape=[-1, hidden_shape])
            
            # reshape previous output
            y_prev = tf.reshape(y_prev_one_hot, [-1, input_shape])
            
            h_t, y_t = RNN(y_prev_one_hot, h_prev)
            
            # convert previous output to one hot vectors
            y_out = tf.nn.embedding_lookup(embed, tf.argmax(y_t, axis=1))
            
            outputs = [h_t, y_t, y_out]
            
            return outputs
    
    
    def decoder(h_in, x_in, x_in_one_hot):
        with tf.variable_scope('decoder'):
            
            out_decoder = tf.scan(decoder_helper, 
                                  X_embed, 
                                  initializer=[h_in, x_in, x_in_one_hot])
            
            return out_decoder[1:]
    
    # thought vector output from encoder
    thought_vector = encoder(X_embed, h_in)
    
    # weight to calculate encoder output 
    W_y = tf.get_variable(name="W_y", shape=[hidden_shape, output_shape], 
                          initializer=tf.random_normal_initializer(mean=0.0, 
                                                                   stddev=0.1), 
                          dtype=tf.float64)
    
    # encoder output
    encoder_output = tf.nn.softmax(tf.matmul(thought_vector, W_y))
    
    # convert previous output to one hot vectors
    encoder_output_one_hot = tf.nn.embedding_lookup(embed, tf.argmax(encoder_output, axis=1))
    
    decoder_output, decoder_output_one_hot = decoder(thought_vector, encoder_output, encoder_output_one_hot)
    
    seq_output = tf.transpose(decoder_output, perm=[1, 0, 2])
    
    with tf.name_scope('loss'):
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y_embed, 
                                                                      logits=seq_output))
    
    with tf.name_scope('train'):
        optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    
    output = tf.argmax(decoder_output, axis=2)
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        
        for i in range(10001):
            
            _,l = sess.run([optimizer, loss], 
                           {X:person1, 
                            Y:person2, 
                            h_in: np.zeros((batch_size, hidden_shape))})
            
            if i%1000 == 0:
                print(l)
            
            if i%5000 == 0:
                pred = sess.run(output, {X:person1, Y:person2, h_in: np.zeros((batch_size, hidden_shape))})
                for j in range(batch_size):
                    print("person1 : ", [[id_to_word[w] for w in sent] for sent in person1][j])
                    print("person2 : ", [[id_to_word[w] for w in sent] for sent in pred.T][j])
                    print("====================================================")
                    
        
        out = sess.run(output, {X:a, Y:person2, h_in: np.zeros((1, hidden_shape))})
        print("personA : ", [[id_to_word[w] for w in sent] for sent in a])
        print("bot     : ", [[id_to_word[w] for w in sent] for sent in out.T])
        writer = tf.summary.FileWriter('tmp/1')
        writer.add_graph(sess.graph)

3.13583732289
person1 :  ['hi', '', '', '', '', '', '']
person2 :  ['doing', 'hello', 'you', 'how', 'there', 'indeed', 'indeed']
person1 :  ['hi', 'there', 'how', 'are', 'you', '', '']
person2 :  ['doing', 'my', 'indeed', 'indeed', 'name', 'name', 'do']
person1 :  ['i', 'am', 'doing', 'fine', '', '', '']
person2 :  ['doing', 'my', 'indeed', 'indeed', 'name', 'name', 'do']
person1 :  ['hi', 'my', 'name', 'is', 'strange', '', '']
person2 :  ['doing', 'hello', 'you', 'how', 'there', 'indeed', 'indeed']
person1 :  ['hello', 'darkness', 'my', 'old', 'friend', '', '']
person2 :  ['doing', 'my', 'indeed', 'indeed', 'name', 'name', 'do']
2.20804327694
2.20767953851
2.20760142856
2.20757246501
2.20755908839
person1 :  ['hi', '', '', '', '', '', '']
person2 :  ['hello', '', '', '', '', '', '']
person1 :  ['hi', 'there', 'how', 'are', 'you', '', '']
person2 :  ['i', 'am', 'fine', 'how', 'do', 'you', 'do']
person1 :  ['i', 'am', 'doing', 'fine', '', '', '']
person2 :  ['what', 'is', 'your', 'name'

In [None]:
! tensorboard --logdir ./tmp/1