In [1]:
import os
import numpy as np
import tensorflow as tf
import tensorflow.contrib.rnn as rnn
import tensorflow.contrib.layers as layers
from text_loader import TextLoader 

In [2]:
num_layers  = 3
hidden_size = 512
batch_size  = 200
max_length  = 30
learning_rate = 0.001

loader = TextLoader("data/hamlet.txt")
vocab_size = len(loader.vocab)

if not os.path.exists("checkpoints/"):
    os.makedirs("checkpoints/")    

In [3]:
X = tf.placeholder(tf.int32, [None, max_length])
y = tf.placeholder(tf.int32, [None, max_length]) # [N, seqlne]

x_one_hot = tf.one_hot(X, vocab_size)
y_one_hot = tf.one_hot(y, vocab_size)            # [N, seqlen, vocab_size]

cells = [rnn.BasicLSTMCell(hidden_size) for _ in range(num_layers)]
cells = rnn.MultiRNNCell(cells, state_is_tuple=True)

initial_state = cells.zero_state(batch_size, tf.float32)
outputs, _ = tf.nn.dynamic_rnn(cells, x_one_hot, 
    initial_state=initial_state, dtype=tf.float32)

outputs = tf.reshape(outputs, [-1, hidden_size]) # [N x seqlen, hidden]
logits = layers.linear(outputs, vocab_size)      # [N x seqlen, vocab_size]
y_flat = tf.reshape(y_one_hot, [-1, vocab_size]) # [N x seqlen, vocab_size]

In [4]:
loss_op = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_flat) # [N x seqlen]
loss_op = tf.reduce_mean(loss_op)

opt = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_op)

y_softmax = tf.nn.softmax(logits)         # [N x seqlen, vocab_size]
pred = tf.argmax(y_softmax, axis=1)       # [N x seqlen]
pred = tf.reshape(pred, [batch_size, -1]) # [N, seqlen]

In [5]:
saver = tf.train.Saver()

sess_config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
with tf.Session(config=sess_config) as sess:
    sess.run(tf.global_variables_initializer())
    for step in range(30000):
        batch_X, batch_y = loader.next_batch(batch_size, max_length)
        loss, _ = sess.run([loss_op, opt], feed_dict={X: batch_X, y: batch_y})
        
        if (step+1) % 500 == 0:
            print("{:08d} step, loss:{:.4f}".format(step+1, loss))
            
            random = np.random.randint(0, batch_size)
            results = sess.run(pred, feed_dict={X: batch_X})
            words = [loader.words[word] for word in results[random]]
            print("".join(words))
     
        if (step+1) % 5000 == 0: 
            saver.save(sess, "checkpoints/char-rnn_"+str(step+1))

00000500 step, loss:1.8634
nd theeeee tn thes toalte 
	Ah
00001000 step, loss:1.3568
htoreng 
sieer ng  sr t lut to
00001500 step, loss:0.9023
n  t af bl aonlateral hand
	Th
00002000 step, loss:0.8368
td ttterdants]

KING CLAUDIUS	
00002500 step, loss:0.5624
er  thich tor to drevent,
	I h
00003000 step, loss:0.5573
o  	Toeak to me:

If there be 
00003500 step, loss:0.4503
AEUDE	Oid you assay him?
	To a
00004000 step, loss:0.4424
h she cueen the e, ho!

HORATI
00004500 step, loss:0.4331
		[Eetireng with HORATIO]

LAE
00005000 step, loss:0.3887
l  ty dlessing season this in 
00005500 step, loss:0.4157
ty sord,

HORD POLONIUS	Aarewe
00006000 step, loss:0.4259
 late is valenced since I saw 
00006500 step, loss:0.3739
i ap another skull]

HAMLET	Th
00007000 step, loss:0.3615
td r  will.

QUEEN GERTRUDE	O 
00007500 step, loss:0.4061
tnd tUILDENSTERN]

KAMLET	What
00008000 step, loss:0.2965
 toes,
	I will be brief: your 
00008500 step, loss:0.3789
h teave betimes?

	[Enter KING
00009000 step,