In [1]:
import tensorflow as tf
import numpy as np

In [2]:
tf.reset_default_graph() # To prevent graph error of tensorflow

# For convenience
def seqtostr(input): 
    return ''.join(ix_to_char[ch] for ch in input[:])

# Kapathy's test function
def sample(x, n, cell, Why, by):
    W = cell[0]
    Wxh = W[:vocab_size]
    Whh = W[vocab_size:]
    bh = cell[1]
    h = np.zeros((1, hidden_size))
    ixes = []

    for step in range(n):
        h = np.tanh(np.dot(x, Wxh) + np.dot(h, Whh) + bh)
        y = np.dot(h, Why) + by
        # p = np.exp(y) / np.sum(np.exp(y))
        # ix = np.random.choice(list(range(vocab_size)), p=p.ravel())
        ix = np.argmax(y)
        x = np.zeros((1, vocab_size))
        x[0][ix] = 1
        ixes.append(ix)
    return ixes

# Setting lookup table
data = 'So I first came to Chicago when I was in my early twenties, and I was still trying to figure out who I was; still searching for a purpose to my life. And it was a neighborhood not far from here where I began working with church groups in the shadows of closed steel mills.'
chars= list(set(data))
vocab_size = len(chars)
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

# Preparing training
inputs = [[char_to_ix[ch] for ch in data[:-1]]]
targets = [[char_to_ix[ch] for ch in data[1:]]]
X_train = tf.one_hot(inputs, vocab_size)
Y_train = tf.one_hot(targets, vocab_size)

# Hyperparameters
hidden_size = 100
cost_val = 100 # initial cost
learning_rate = 1e-3 #0.001

# etc.
batch_size = 1
seq_length = len(data)-1

# Placeholder
X = tf.placeholder(tf.float32, [None, seq_length, vocab_size])
Y = tf.placeholder(tf.int32, [None, seq_length, vocab_size])

# Output layer of char-rnn
Why = tf.Variable(tf.random_normal([batch_size, hidden_size, vocab_size], name='weight_hy'))
by = tf.Variable(tf.random_normal([batch_size, 1, vocab_size]), name='bias_y')

# Setting cell, loss function, optimizer
cell = tf.nn.rnn_cell.BasicRNNCell(num_units=hidden_size)
state = cell.zero_state(batch_size, tf.float32)
outputs, state = tf.nn.dynamic_rnn(cell=cell, inputs=X,
                                   initial_state=state, dtype=tf.float32)
logit = tf.matmul(outputs, Why) + by
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logit, labels=Y))
train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())    
    for step in range(150):
        #Test
        if step % 10 == 0:
            X_test = sess.run(X_train[0][0]).reshape(1, vocab_size) # Start from the beginning
            predtxt = sample(X_test, seq_length, sess.run(cell.variables), sess.run(Why), sess.run(by))
            print('step :', step, 'cost :', cost_val,
                  '\npred :', data[0] + seqtostr(predtxt), '\n','-'*50)
            
        # Train
        cost_val, _ = sess.run([cost, train], feed_dict={X:sess.run(X_train), Y:sess.run(Y_train)})

step : 0 cost : 100 
pred : S;nm,eaieftIn.kba;pen,l;kmbppy;y;kedInopbpenmAypydy;d;denoeafeate;penaAeplnmpbppypy;y y;y;kenaIenmbn.k;g;pen,l;. nmpbphnyd;penmpyty;y y,yaienabef,eaieatef;epItp;kly;y;dp;denmpepyty;y;ydk;d;oeaheft mkybkkya.epenlnopadpyty;kebpy;ydk;d;,pa.entp;kmn;penyd;penep;plnypy;p;pedIy 
 --------------------------------------------------
step : 10 cost : 3.2600157 
pred : S;nmnepIno Iewup;o IenmAytk;y IenaAeilbkkbepcaydk;d;neSoeIlnormAoeIhoo;gcooy;neienteefI yty;k;nadenonoefe tso ilnoreilnormAopenmAeplny Ilnop;nmAeplnytk;d;n,ndk;o Ienmbiwn,ydkln;oeipwtp;y Ilnorm;nei;oenenhe iytk;d;ie tnlna.eitnwn,beiIno Ienwn;o IenmA;tln;pCnpeCwno acta;,f  
 --------------------------------------------------
step : 20 cost : 2.0086627 
pred : S;naneicnoo ithe hheea  is as  inoage Ito ihte h t lno Iio ests ah yanwahoeehhheeah;y  stal iogei wtt    was    wases  il aseaheilho ahefn i s is  s  as is is as as iino uese httll;oeaftiwno eith  h yah fiw so  lts ahteinwthe   w s s s aseas i geilnh