In [38]:
# Adapted from : https://r2rt.com/recurrent-neural-networks-in-tensorflow-ii.html

import numpy as np
import tensorflow as tf
%matplotlib inline
import matplotlib.pyplot as plt
import time
import os
import urllib.request
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [39]:
def ptb_iterator(raw_data, batch_size, num_steps, steps_ahead=1):

    raw_data = np.array(raw_data, dtype=np.int32)
    data_len = len(raw_data)
    batch_len = data_len // batch_size
    data = np.zeros([batch_size, batch_len], dtype=np.int32)
    offset = 0
    if data_len % batch_size:
        offset = np.random.randint(0, data_len % batch_size)
    for i in range(batch_size):
        data[i] = raw_data[batch_len * i + offset:batch_len * (i + 1) + offset]
    epoch_size = (batch_len - steps_ahead) // num_steps
    if epoch_size == 0:
        raise ValueError("epoch_size == 0, decrease batch_size or num_steps")
    for i in range(epoch_size):
        x = data[:, i*num_steps:(i+1)*num_steps]
        y = data[:, i*num_steps+1:(i+1)*num_steps+steps_ahead]
        yield (x, y)
    if epoch_size * num_steps < batch_len - steps_ahead:
        yield (data[:, epoch_size*num_steps : batch_len - steps_ahead], data[:, epoch_size*num_steps + 1:])


def shuffled_ptb_iterator(raw_data, batch_size, num_steps):
    raw_data = np.array(raw_data, dtype=np.int32)
    r = len(raw_data) % num_steps
    if r:
        n = np.random.randint(0, r)
        raw_data = raw_data[n:n + len(raw_data) - r]
    raw_data = np.reshape(raw_data, [-1, num_steps])
    np.random.shuffle(raw_data)
    num_batches = int(np.ceil(len(raw_data) / batch_size))
    for i in range(num_batches):
        data = raw_data[i*batch_size:min(len(raw_data), (i+1)*batch_size),:]
        yield (data[:,:-1], data[:,1:])

In [40]:
saver = tf.train.Saver()

In [41]:
"""
Load and process data, utility functions
"""

file_url = 'https://raw.githubusercontent.com/jcjohnson/torch-rnn/master/data/tiny-shakespeare.txt'
file_name = 'tinyshakespeare.txt'
if not os.path.exists(file_name):
    urllib.request.urlretrieve(file_url, file_name)

with open(file_name,'r') as f:
    raw_data = f.read()
    print("Data length:", len(raw_data))

vocab = set(raw_data)
vocab_size = len(vocab)
idx_to_vocab = dict(enumerate(vocab))
vocab_to_idx = dict(zip(idx_to_vocab.values(), idx_to_vocab.keys()))

data = [vocab_to_idx[c] for c in raw_data]
del raw_data

def gen_epochs(n, num_steps, batch_size):
    for i in range(n):
        yield ptb_iterator(data, batch_size, num_steps)

def reset_graph():
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()

def train_network(g, num_epochs, num_steps = 200, batch_size = 32, verbose = True, save=False):
    tf.set_random_seed(2345)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        training_losses = []
        for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps, batch_size)):
            training_loss = 0
            steps = 0
            training_state = None
            for X, Y in epoch:
                steps += 1

                if X.shape[1] != g['x'].shape[1]:
                    break 
                    
                feed_dict={g['x']: X, g['y']: Y}
                if training_state is not None:
                    feed_dict[g['init_state']] = training_state
                training_loss_, training_state, _ = sess.run([g['total_loss'],
                                                      g['final_state'],
                                                      g['train_step']],
                                                             feed_dict)
                training_loss += training_loss_
            if verbose:
                print("Average training loss for Epoch", idx, ":", training_loss/steps)
            training_losses.append(training_loss/steps)

        g['saver'].save(sess, "model.ckpt")

    return training_losses

Data length: 1115394


In [42]:
def build_basic_rnn_graph_with_list(
    state_size = 100,
    num_classes = vocab_size,
    batch_size = 32,
    num_steps = 200,
    learning_rate = 1e-4):

    reset_graph()

    x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
    y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')

    x_one_hot = tf.one_hot(x, num_classes) # <n, t, f>
    rnn_inputs = [tf.squeeze(i,squeeze_dims=[1]) for i in tf.split(x_one_hot, num_steps, axis=1)]

    cell = tf.contrib.rnn.BasicRNNCell(state_size)
    init_state = cell.zero_state(batch_size, tf.float32)
    rnn_outputs, final_state = tf.contrib.rnn.static_rnn(cell, rnn_inputs, initial_state=init_state)

    with tf.variable_scope('softmax'):
        W = tf.get_variable('W', [state_size, num_classes])
        b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
    logits = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]
    predictions = [tf.nn.softmax(logit) for logit in logits]

    y_as_list = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(y, num_steps, axis=1)] # t arrays of tensor <n, o>
#     print(y_as_list)

    loss_weights = [tf.ones([batch_size]) for i in range(num_steps)]
    losses = tf.contrib.legacy_seq2seq.sequence_loss_by_example(logits, y_as_list, loss_weights)
    total_loss = tf.reduce_mean(losses)
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)
    saver = tf.train.Saver()

    return dict(
        x = x,
        y = y,
        init_state = init_state,
        final_state = final_state,
        total_loss = total_loss,
        train_step = train_step,
        preds = predictions,
        saver = saver
    )

In [52]:
t = time.time()
g = build_basic_rnn_graph_with_list()
print("It took", time.time() - t, "seconds to build the graph.")

It took 11.272886514663696 seconds to build the graph.


In [53]:
t = time.time()
train_network(g, 10)
print("It took", time.time() - t, "seconds to train for 10 epochs.")

Average training loss for Epoch 0 : 3.6946648407
Average training loss for Epoch 1 : 3.27801978929
Average training loss for Epoch 2 : 3.21488930293
Average training loss for Epoch 3 : 3.10075888089
Average training loss for Epoch 4 : 2.97092319897
Average training loss for Epoch 5 : 2.8610919748
Average training loss for Epoch 6 : 2.76816867692
Average training loss for Epoch 7 : 2.68949493544
Average training loss for Epoch 8 : 2.62147042274
Average training loss for Epoch 9 : 2.56234738895
It took 148.73711967468262 seconds to train for 10 epochs.


In [57]:

def generate_characters(g, checkpoint, num_chars, prompt='A', pick_top_chars=None):
    """ Accepts a current character, initial state"""

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        if checkpoint is None:
            g['saver'].restore(sess, tf.train.latest_checkpoint('.'))

        state = None
        current_char = vocab_to_idx[prompt]
        chars = [current_char]

        for i in range(num_chars):
            if state is not None:
                feed_dict={g['x']: [[current_char]], g['init_state']: state}
            else:
                feed_dict={g['x']: [[current_char]]}

            preds, state = sess.run([g['preds'],g['final_state']], feed_dict)

            if pick_top_chars is not None:
                p = np.squeeze(preds)
                p[np.argsort(p)[:-pick_top_chars]] = 0
                p = p / np.sum(p)
                current_char = np.random.choice(vocab_size, 1, p=p)[0]
            else:
                current_char = np.random.choice(vocab_size, 1, p=np.squeeze(preds))[0]

            chars.append(current_char)

    chars = map(lambda x: idx_to_vocab[x], chars)
    print("".join(chars))
    return("".join(chars))

In [50]:
# Training after 1 epoch
g = build_basic_rnn_graph_with_list(batch_size=1, num_steps = 1)
generate_characters(g, None, 750, prompt='A', pick_top_chars=5)

INFO:tensorflow:Restoring parameters from ./model.ckpt
[[13]]
Au,Iuh   tt e   tt  o eeoo eoth to o eeeo    oo eo ha  t o  ee e oo h e h e  t tr eto t toeeee eaeoa ee e  ot e e ete he toeaea oe t he   o  o t   etat ee  t teeoooo he oe hoe  eeetha eo  e  hae h ot httee t tet t  oa ht   het toa  o   e  eoe e  t  to ha to  o ha  eae   h teaet heo ttee eaaae  to e e h t       he     ea t  oa te eeeooh  e      ttoa ht  te  o t  ht hoothoeaoo e  ot  ot  o    oe ea ee e  t h hte oee ho  e heetoo eaaee het hothe eaa heo e e     h e oeeo  e     o oe tt  e tte hotto e  oot  t     totoot    eeotoo too  ottooeo  te  eteaeoa    o  t  o t tet taeoe e ootet    tea  e e  oee   heoo h   e to o e  hoe et  eeee t   et e th ooahaee eee ootet o hr   he eo   h  eaeoohe t   ea  ooo eooe  h e ho e e  t t e th o     h e te eo e


''

In [59]:
# Training after 10 epoch
g = build_basic_rnn_graph_with_list(batch_size=1, num_steps = 1)
generate_characters(g, None, 1000, prompt='A', pick_top_chars=5)

INFO:tensorflow:Restoring parameters from ./model.ckpt
ARd IC:
Wive weres tho se aor weale bott int and titt ee toe tou the thor sert an al sores or thir ait the th me tite wale the thime sare ton erithes

has soeeees ta the hh thit ao din  oud tald boertere sis andt ie th teor arate bathet tous the that our and seree 
aats tisl so lare be the wot tires,
I  er ine then sen sorte teathe thas tire senee 
on tin taas  ou hat thir, toe therthes  hin donge se mhount  or alt oo thirh mit and we than toun satder,er tor the soul tond toe tont
toe hheres thes ood tor te she lond ar wolle se mat aad thete ao the lald botle tind whal, th mirhen thimh wh lonet

oer ard ar  aothe the hor the sh seeer an ald wongh mous sare,
Th tor sisl oo sot ian andi t aod boe sothen misthas ao toe dar soust tate tor teond warling oratho  hatle wanthi des aad tous te lo lerere she taat ao sist

n tit aa sise tererat  oarss wou de tord ans sithet,

he  aat orerstond tie  orderes  aot aal tore tha  ootther  ie thes 

''