In [1]:
import numpy as np
import tensorflow as tf
import tensorflow.contrib.rnn as rnn
import tensorflow.contrib.layers as layers
from text_loader import TextLoader 

In [2]:
num_layers  = 3
hidden_size = 512
batch_size  = 200
max_length  = 30
learning_rate = 0.001

loader = TextLoader("data/hamlet.txt")
vocab_size = len(loader.vocab)

In [3]:
X = tf.placeholder(tf.int32, [None, max_length])
y = tf.placeholder(tf.int32, [None, max_length]) # [N, seqlne]

x_one_hot = tf.one_hot(X, vocab_size)
y_one_hot = tf.one_hot(y, vocab_size) # [N, seqlen, vocab_size]

cells = [rnn.BasicLSTMCell(hidden_size) for _ in range(num_layers)]
cells = rnn.MultiRNNCell(cells, state_is_tuple=True)

initial_state = cells.zero_state(batch_size, tf.float32)
outputs, _ = tf.nn.dynamic_rnn(cells, x_one_hot, 
                                    initial_state=initial_state, dtype=tf.float32)

outputs = tf.reshape(outputs, [-1, hidden_size])    # [N x seqlen, hidden]
logits = layers.fully_connected(outputs, vocab_size,
                                activation_fn=None) # [N x seqlen, vocab_size]
y_flat = tf.reshape(y_one_hot, [-1, vocab_size])    # [N x seqlen, vocab_size]

In [None]:
loss_op = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_flat) # [N x seqlen]
loss_op = tf.reduce_mean(loss_op)

opt = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_op)

y_softmax = tf.nn.softmax(logits)         # [N x seqlen, vocab_size]
pred = tf.argmax(y_softmax, axis=1)       # [N x seqlen]
pred = tf.reshape(pred, [batch_size, -1]) # [N, seqlen]

In [None]:
saver = tf.train.Saver()

sess_config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
with tf.Session(config=sess_config) as sess:
    sess.run(tf.global_variables_initializer())
    saver.save(sess, "checkpoints/char-rnn")
    for step in range(100000):
        batch_X, batch_y = loader.next_batch(batch_size, max_length)
        loss, _ = sess.run([loss_op, opt], feed_dict={X: batch_X, y: batch_y})
        
        if (step+1) % 500 == 0:
            print("{:08d} step, loss:{:.4f}".format(step+1, loss))
            
            random = np.random.randint(0, batch_size)
            results = sess.run(pred, feed_dict={X: batch_X})
            words = [loader.words[word] for word in results[random]]
            print("".join(words))
     
        if (step+1) % 5000 == 0: 
            saver.save(sess, "checkpoints/char-rnn_"+str(step+1))

00000500 step, loss:1.8795
t totr ttet tialnee  tot te t

00001000 step, loss:1.3221
heatun aonntut tlap to that It
00001500 step, loss:1.0667
	AMLET	Wi y do ntill, my the e
00002000 step, loss:0.6390
tes vophew's purpose, -to supp
00002500 step, loss:0.5246
ur af an. I'll have these play
00003000 step, loss:0.5171
ut iartted word:
	O heavy burt
00003500 step, loss:0.4784
lee a gpar i' the dirkest nigh
00004000 step, loss:0.4209
ueoou, shese are the stops.

G
00004500 step, loss:0.3246
 G CLAUDIUS	Ahanks, Rosencrant
00005000 step, loss:0.3956
tLAUDIUS	W  salls right.
	You 
00005500 step, loss:0.3701
 r ti hur watch up; and by my 
00006000 step, loss:0.3716
u be  since I am still possess
00006500 step, loss:0.3909
  tey he walk
	Than may be giv
00007000 step, loss:0.3962
taart 

KING CLAUDIUS	Thyself 
00007500 step, loss:0.3946
tet tr not to mine uncle's bed
00008000 step, loss:0.3965
 ahlc  well, well.

OPHELIA	My
00008500 step, loss:0.3322
  ntuJulius Caesar: I was kill
00009000 step,