## char-rnn using Tensorflow

Hands on rnn with Tensorflow. 

In [4]:
import numpy as np
import tensorflow as tf
import pandas as pd

### Loading characters from toy data set

In [5]:
data = open('data/shakespeare_tiny.txt', 'r').read()
chars = list(set(data))

DATASIZE, ALPHASIZE = len(data), len(chars)
print('data has %d characters, %d unique.' % (DATASIZE, ALPHASIZE))

char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

data_transformed = np.apply_along_axis(lambda x: char_to_ix[x[0]], 0, np.array(list(data)).reshape([1, DATASIZE]))

data has 1115394 characters, 65 unique.


### Hyper Parameters

In [6]:
CELLSIZE = 512
SEQLEN = 100
BATCHSIZE = 100
NLAYERS = 2

### Model

2 layer rnn with GRU cell

In [7]:
#we want flexible batch size and sequence length
inputs = tf.placeholder(tf.uint8, [None, None], name='inputs')           # [ BATCHSIZE, SEQLEN ]
inputs_encoded = tf.one_hot(inputs, ALPHASIZE, 1.0, 0.0)                 # [ BATCHSIZE, SEQLEN, ALPHASIZE ]
targets = tf.placeholder(tf.uint8, [None, None], name='target')          # [ BATCHSIZE, SEQLEN ]
targets_encoded = tf.one_hot(targets, ALPHASIZE, 1.0, 0.0)               # [ BATCHSIZE, SEQLEN, ALPHASIZE ]

# input state
hin1 = tf.placeholder(tf.float32, [None, CELLSIZE], name='hin1')         # [ BATCHSIZE, CELLSIZE]
hin2 = tf.placeholder(tf.float32, [None, CELLSIZE], name='hin2')

cell1 = tf.contrib.rnn.GRUCell(CELLSIZE)
cell2 = tf.contrib.rnn.GRUCell(CELLSIZE)
multi_cell = tf.contrib.rnn.MultiRNNCell([cell1, cell2])

ht, final_state = tf.nn.dynamic_rnn(multi_cell, inputs_encoded, dtype=tf.float32, initial_state=(hin1, hin2))
#ht [BATCHSIZE, SEQLEN, CELLSIZE], internal state for each rolled out cell
#final_state ([BATCHSIZE, CELLSIZE], [BATCHSIZE, CELLSIZE]), this is the last state in the sequence

### Loss Function

In [8]:
ht_flat = tf.reshape(ht, [-1, CELLSIZE])                                 # [BATCHSIZE * SEQLEN, CELLSIZE]
y_logits = tf.contrib.layers.linear(ht_flat, ALPHASIZE)                  # [BATCHSIZE * SEQLEN, ALPHASIZE]
targets_encoded_flat = tf.reshape(targets_encoded, [-1, ALPHASIZE])      # [BATCHSIZE * SEQLEN, ALPHASIZE]

loss = tf.nn.softmax_cross_entropy_with_logits(labels=targets_encoded_flat, logits= y_logits) # [BATCHSIZE * SEQLEN]
loss = tf.reshape(loss, [BATCHSIZE, -1])                                 # [ BATCHSIZE, SEQLEN ]

y_prob= tf.nn.softmax(y_logits, name='y_prob')                           # [ BATCHSIZE x SEQLEN, ALPHASIZE ]
y_predict = tf.argmax(y_prob, 1)                                         # [ BATCHSIZE x SEQLEN ]
y_predict = tf.reshape(y_predict, [BATCHSIZE, -1], name="y_predict")     # [ BATCHSIZE, SEQLEN ]

train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)

In [9]:
#code copied from https://github.com/martin-gorner/tensorflow-rnn-shakespeare
def rnn_minibatch_sequencer(data, batch_size, sequence_size, nb_epochs):

    data_len = data.shape[0]
    # using (data_len-1) because we must provide for the sequence shifted by 1 too
    nb_batches = (data_len - 1) // (batch_size * sequence_size)
    assert nb_batches > 0, "Not enough data, even for a single batch. Try using a smaller batch_size."
    rounded_data_len = nb_batches * batch_size * sequence_size
    xdata = np.reshape(data[0:rounded_data_len], [batch_size, nb_batches * sequence_size])
    ydata = np.reshape(data[1:rounded_data_len + 1], [batch_size, nb_batches * sequence_size])
    for epoch in range(nb_epochs):
        for batch in range(nb_batches):
            x = xdata[:, batch * sequence_size:(batch + 1) * sequence_size]
            y = ydata[:, batch * sequence_size:(batch + 1) * sequence_size]
            x = np.roll(x, -epoch, axis=0)  # to continue the text from epoch to epoch (do not reset rnn state!)
            y = np.roll(y, -epoch, axis=0)
            yield x, y, epoch

### Traning

In [10]:
istate = (np.zeros([BATCHSIZE, CELLSIZE]), np.zeros([BATCHSIZE, CELLSIZE]))  # initial zero input state
inn = tf.global_variables_initializer()

step = 0

sess = tf.InteractiveSession()

sess.run(inn)

for x, y_, epoch in rnn_minibatch_sequencer(data_transformed, BATCHSIZE, SEQLEN, 10):

    # train on one minibatch
    feed_dict = {inputs: x, targets: y_, (hin1,hin2): istate}
    _, y, ostate, c = sess.run([train_step, y_predict, final_state, loss], feed_dict=feed_dict)

    if step % (50*BATCHSIZE * SEQLEN) == 0:
        batchloss = np.mean(c)
        accuracy = np.mean(np.equal(y_, y))
        print("Epoch={} cost={:.4f} accuracy={:.4f}".format(epoch, batchloss, accuracy))
    # loop state around
    istate = ostate
    step += BATCHSIZE * SEQLEN

Epoch=0 cost=4.1774 accuracy=0.0174
Epoch=0 cost=3.3174 accuracy=0.1545
Epoch=0 cost=3.2951 accuracy=0.1512


In [17]:
saver = tf.train.Saver()
save_path = saver.save(sess, "rnn_char_seq.ckpt")
print("Model saved in file: %s" % save_path)

Model saved in file: rnn_char_seq.ckpt


### Inference

In [23]:
#code copied from https://github.com/martin-gorner/tensorflow-rnn-shakespeare
def sample_from_probabilities(probabilities, topn=ALPHASIZE):
    p = np.squeeze(probabilities)
    p[np.argsort(p)[:-topn]] = 0
    p = p / np.sum(p)
    return np.random.choice(ALPHASIZE, 1, p=p)[0]

gx = char_to_ix["L"]  #starting with random character 'L'
gx = np.array([[gx]])  # shape [BATCHSIZE, SEQLEN] with BATCHSIZE=1 and SEQLEN=1
ncnt = 0
# initial values
gy = gx
gh = (np.zeros([1, CELLSIZE], dtype=np.float32), np.zeros([1, CELLSIZE], dtype=np.float32))  # [ BATCHSIZE, INTERNALSIZE * NLAYERS]
for i in range(1000):
    gyo, gh = sess.run([y_prob, final_state], feed_dict={inputs: gy, (hin1,hin2): gh})

    # If sampling is be done from the topn most likely characters, the generated text
    # is more credible and more "english". If topn is not set, it defaults to the full
    # distribution (ALPHASIZE)

    # Recommended: topn = 10 for intermediate checkpoints, topn=2 or 3 for fully trained checkpoints

    gc = sample_from_probabilities(gyo, topn=2)
    gy = np.array([[gc]])  # shape [BATCHSIZE, SEQLEN] with BATCHSIZE=1 and SEQLEN=1
    gc = ix_to_char[gc]
    print(gc, end="")

    if gc == '\n':
        ncnt = 0
    else:
        ncnt += 1
    if ncnt == 100:
        print("")
        ncnt = 0

OUNCCEUSEUSSSUEESSZZUEEEUUUUUUZSEUZUUEEESSEEZZEEUUSZEZEUEZUEEUZZUEEEUZUSEZZEEEZEZEUSEZEUZUES:
TEO:
Whyour hare that heavist on marther are hear are.

COMINLEN:
I wall we the to mant than with hither her hath

Porent tore ard ant to mere.

PETEN:
A dine seare that the coure that thane shere hare.

CORINEO:
And to mere thee tore wore he prate to the there.

CARINCE:
Ard the that she the tore wored ard and thee the there and
And the the cande the prowe than whith the wither.

CARIOLA:
That to here the thes and thee to tour hish and and hither.

CORENEO:
And well, to the sore the withe so to to that hath

That In mant our that har her have to have the that here
To the har have the thes to hare the partherest tor here
That thathe thand hith that to the prouth soull.

PERENES:
Whan she the sore har have toust thee the wither and her
The come the cond to the paines to the panter here

Ther and tores in will her and are his that the wall
To that to to tous tore here her hit soures
That the par