In [3]:
import time
from collections import namedtuple

import numpy as np
import tensorflow as tf


In [10]:
with open('anna.txt', 'r') as f:
    text=f.read()

vocab = set(text)
vocab_to_int = {c: i for i,c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
chars = np.array([vocab_to_int[c] for c in text], dtype=np.int32)

array([39, 59, 52, 50, 23, 17, 18,  2, 67, 53, 53, 53, 35, 52, 50, 50, 26,
        2, 47, 52, 80, 70,  3, 70, 17, 62,  2, 52, 18, 17,  2, 52,  3,  3,
        2, 52,  3, 70, 29, 17,  4,  2, 17, 42, 17, 18, 26,  2, 11, 81, 59,
       52, 50, 50, 26,  2, 47, 52, 80, 70,  3, 26,  2, 70, 62,  2, 11, 81,
       59, 52, 50, 50, 26,  2, 70, 81,  2, 70, 23, 62,  2, 57, 46, 81, 53,
       46, 52, 26, 65, 53, 53, 77, 42, 17, 18, 26, 23, 59, 70, 81])

In [11]:
np.max(chars)+1

83

In [18]:
def split_data(chars, batch_size, num_steps, split_frac=0.9):
    slice_size = batch_size * num_steps
    n_batches = int(len(chars) / slice_size)
    
    x = chars[: n_batches * slice_size]
    y = chars[1: n_batches*slice_size + 1]
    
    x = np.stack(np.split(x, batch_size))
    y = np.stack(np.split(y, batch_size))
    
    split_idx = int(n_batches*split_frac)
    train_x, train_y = x[:, :split_idx*num_steps], y[:, :split_idx*num_steps]
    val_x , val_y = x[:, split_idx*num_steps:], y[:, split_idx*num_steps:]
    
    return train_x, train_y, val_x, val_y

In [19]:
train_x, train_y, val_x, val_y = split_data(chars, 10, 50)

In [20]:
train_x.shape

(10, 178650)

In [21]:
def get_batch(arrs, num_steps):
    batch_size, slice_size = arrs[0].shape
    
    n_batches = int(slice_size/num_steps)
    for b in range(n_batches):
        yield [x[:, b*num_steps: (b+1)*num_steps] for x in arrs]

In [22]:
def build_rnn(num_classes, batch_size=50, num_steps=50,
              lstm_size=128, num_layers=2, learning_rate=0.001, grad_clip=5,
              sampling=False):
    if sampling == True:
        batch_size, num_steps = 1, 1
    
    tf.reset_default_graph()
    
    inputs = tf.placeholder(tf.int32, [batch_size, num_steps], name='inputs')
    targets = tf.placeholder(tf.int32, [batch_size, num_steps], name='targers')
    
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    
    x_one_hot = tf.one_hot(inputs, num_classes)
    y_one_hot = tf.one_hot(targets, num_classes)
    
    #lstm_size is the number of hidden units in the nn :D
    lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
    drop = tf.contrib.DropoutWrapper(lstm, output_keep_prob=keep_prob)
    
    cell = tf.contrib.rnn.MultiRNNCell([drop]*num_layers)
    initial_state = cell.zero_state(batch_size, tf.float32)
    
    rnn_inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(x_one_hot, num_steps, 1)]
    
    outputs, state = tf.contrib.rnn.static_rnn(cell, rnn_inputs, initial_state=initial_state)
    final_state = state
    
    seq_output = tf.concat(outputs, axix=1)
    output = tf.reshape(seq_output, [-1, lstm_size])
    
    with tf.variable_scope('softmax'):
        softmax_w = tf.Variable(tf.truncated_normal((lstm_size, num_classes)), stddev=0.1)
        softmax_b = tf.Variable(tf.zeros(num_classes))
        
    logits = tf.matmul(output, softmax_w) + softmax_b
    preds = tf.nn.softmax(logits, name='predictions')
    
    y_reshaped = tf.reshape(y_one_hot, [-1, num_classes])
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)
    cost = tf.reduce_mean(loss)
    
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(learning_rate)
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    export_nodes = ['inputs', 'targets', 'initial_state', 'final_state',
                    'keep_prob', 'cost', 'preds', 'optimizer']
    Graph = namedtuple('Graph', export_nodes)
    local_dict = locals()
    graph = Graph(*[local_dict[each] for each in export_nodes])
    
    return graph

In [23]:
batch_size = 100
num_steps = 100
lstm_size = 512
num_layers = 2
learning_rate = 0.001
keep_prob=0.5

In [None]:
epochs = 20
save_every_n = 200
train_x, train_y, val_x, val_y = split_data(chars, batch_size, num_steps)

model = build_rnn(len(vocab),
                  batch_size=batch_size,
                  num_steps=num_steps,
                  learning_rate=learning_rate,
                  lstm_size=lstm_size,
                  num_layers=num_layers)

saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    n_batches = int(train_x.shape[1]/num_steps)
    iterations = n_batches * epochs
    for e in range(epochs):
        
        new_state = sess.run(model.initial_state)
        loss = 0
        for b, (x,y) in enumerate(get_batch([train_x, train_y], num_steps), 1):
            iteration = e*n_batches + b
            start = time.time()
            feed = {model.inputs: x, 
                    model.targets: y, 
                    model.keep_prob: keep_prob,
                    model.initial_state: new_state}
            batch_loss, new_state, _ = sess.run([model.cost, model.final_state, model.optimizer],
                                               feed_dict=feed)
            loss += batch_loss
            end = time.time()
            print('Epoch {}/{} '.format(e+1, epochs),
                  'Iteration {}/{}'.format(iteration, iterations),
                  'Training loss: {:,.4f}'.format(loss/b),
                  '{:.4f} sec/batch'.format((end-start)))
            
            if (iteration%save_every_n == 0) or (iteration == iterations):
                val_loss = []
                new_state = sess.run(model.initial_state)
                for x,y in get_batch([val_x, val_y], num_steps):
                    feed = {model.inputs: x,
                            model.targets: y,
                            model.keep_prob: 1., 
                            model.initial_state: new_state}
                    batch_loss, new_state = sess.run([model.cost, model.final_state], feed_dict=feed)
                    val_loss.append(batch_loss)
                    
                print('Validation loss:', np.mean(val_loss),
                      'Saving checkpoint!')
                saver.save(sess, "checkpoints/i{}_v{:.3f}.ckpt".format(iteration, lstm_size, np.mean(val_loss)))