In [1]:
import tensorflow as tf
import numpy as np

In [2]:
tf.reset_default_graph() # To prevent graph error of tensorflow

def seqtostr(input):
    return ''.join(ix_to_char[ch] for ch in input[:])

def sample(x, n, cell, Why, by):
    W = cell[0]
    Wxh = W[:vocab_size]
    Whh = W[vocab_size:]
    bh = cell[1]
    h = np.zeros((1, hidden_size))
    ixes = []

    for step in range(n):
        h = np.tanh(np.dot(x, Wxh) + np.dot(h, Whh) + bh)
        y = np.dot(h, Why) + by
        ix = np.argmax(y)
        x = np.zeros((1, vocab_size))
        x[0][ix] = 1
        ixes.append(ix)
    return ixes

# Karpathy's preprocessing
data = 'So I first came to Chicago when I was in my early twenties, and I was still trying to figure out who I was; still searching for a purpose to my life. And it was a neighborhood not far from here where I began working with church groups in the shadows of closed steel mills.'
chars= list(set(data))
vocab_size = len(chars)
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

hidden_size = 100
learning_rate = 1e-3 #0.001
seq_length = len(data)-1
batch_size = 1
inputs = [char_to_ix[ch] for ch in data[:-1]]
targets = [char_to_ix[ch] for ch in data[1:]]

X = tf.placeholder(tf.float32, [None, seq_length, vocab_size])
Y = tf.placeholder(tf.int32, [None, seq_length, vocab_size])

Why = tf.Variable(tf.random_normal([batch_size, hidden_size, vocab_size], name='weight_hy'))
by = tf.Variable(tf.random_normal([batch_size, 1, vocab_size]), name='bias_y')

cell = tf.nn.rnn_cell.BasicRNNCell(num_units=hidden_size)
state = cell.zero_state(batch_size, tf.float32)
outputs, state = tf.nn.dynamic_rnn(cell=cell, inputs=X,
                                   initial_state=state, dtype=tf.float32)
logit = tf.matmul(outputs, Why) + by
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logit, labels=Y))
train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    inputs_onehot = tf.one_hot(inputs, vocab_size)
    targets_onehot = tf.one_hot(targets, vocab_size)
    cost_val = 100
    for step in range(150):
        #Test
        if step % 10 == 0:
            # Start from the beginning
            X_test = sess.run(inputs_onehot[0]).reshape(1, vocab_size) 
            predtxt = sample(X_test, len(data)-1, sess.run(cell.variables), sess.run(Why), sess.run(by))
            print('step :', step, 
                  'cost :', cost_val, '\n',
                  'pred :', data[0] + seqtostr(predtxt), '\n','-'*50)
            
        # Train
        X_train = sess.run(inputs_onehot).reshape(batch_size, seq_length, vocab_size)
        Y_train = sess.run(targets_onehot).reshape(batch_size, seq_length, vocab_size)
        cost_val, _ = sess.run([cost, train], feed_dict={X:X_train, Y:Y_train})

step : 0 cost : 100 
 pred : Shuaakoau.hh,kkaaghhh,kkagghumkklaggyuc;kkaagoumkkkaaghyhm;kaaghuh,kkaaghhh,kkagghh,;kkagghh,kkkagghhhkkkagghhhkkkagghhhkkkagghhhkkkagghhhkkkagghhhkkkagghhhkkkagghhhkkkagghhhkkkagghhhkkkagghhhkkkagghhhkkkagghhhkkkagghhhkkkagghhhkkkagghhhkkkagghhhkkkagghhhkkkagghhhkkkagghh 
 --------------------------------------------------
step : 10 cost : 3.5538042 
 pred : Shuaageadu;kAcuramkyrc;CkldArhehoheh ue se ds                                                                                                                                                                                                                                    
 --------------------------------------------------
step : 20 cost : 2.556518 
 pred : Shuaageumf;lllhuragegd ;kAdh raioge ote, aued  at op  o     oi swien  otei  oi sr oa  oi s  or se  st ot ir                                                                                                                                             