## Imports

In [None]:
import numpy as np
import tensorflow as tf
%matplotlib inline
import matplotlib.pyplot as plt
import time
import math
import os
import urllib.request
import ptb_iterator as reader

plt.rc('figure', figsize=(10, 7))

## Load and process data

In [None]:
if False:
    file_url = 'https://raw.githubusercontent.com/jcjohnson/torch-rnn/master/data/tiny-shakespeare.txt'
    file_name = 'tinyshakespeare.txt'

    if not os.path.exists(file_name):
        urllib.request.urlretrieve(file_url, file_name)
else:
#    file_name = 'majakovski.txt'
    file_name = 'vm.txt' # Leo Tolstoy

with open(file_name, 'r') as f:
    raw_data = f.read()
    print('Data lenght: {} bytes'.format(len(raw_data)))

vocab = set(raw_data)
vocab_size = len(vocab)
idx_to_vocab = dict(enumerate(vocab))
vocab_to_idx = dict(zip(idx_to_vocab.values(), idx_to_vocab.keys()))

# cross-validation set size in percents of the dataset size 
cv_size = int(len(raw_data) * 0.05)

data = [vocab_to_idx[c] for c in raw_data]

del raw_data

In [None]:
# utility functions
def gen_epochs(n, seq_len, batch_size):
    for i in range(n):
        yield reader.ptb_iterator(data, batch_size, seq_len)

In [None]:
cv_size

# Build the graph

## Cell factory

In [None]:
def create_rnn_cells(cell_type, state_size, num_layers, pkeep):
    state_is_tuple = False
    
    if cell_type == 'GRU':
        single_cell = lambda: tf.contrib.rnn.GRUCell(state_size)
    elif cell_type == 'LSTM':
        single_cell = lambda: tf.contrib.rnn.LSTMCell(state_size, state_is_tuple=True)
        state_is_tuple = True
    else:
        single_cell = lambda: tf.contrib.rnn.BasicRNNCell(state_size)

    # wrap a cell with dropout
    cell_creator = lambda: tf.contrib.rnn.DropoutWrapper(single_cell(), input_keep_prob=pkeep)

    cells = [cell_creator() for _ in range(num_layers)]
    return (cells, state_is_tuple)

In [None]:
# input parameters
CELLTYPE = 'Basic'
SEQLEN = 4
NCLASSES = vocab_size
NLAYERS = 3
INTERNALSIZE = 100
BATCHSIZE = 64

learning_rate = 1e-4
dropout_pkeep = 1.0

## The graph

In [None]:
# placeholders
lrate = tf.placeholder(tf.float32, name='learningrate')
pkeep = tf.placeholder(tf.float32, name='pkeep')
batchsize = tf.placeholder(tf.int32, name='batchsize')

# input/output
x = tf.placeholder(tf.int32, [None, None], name='input_placeholder') # [BATCHSIZE x SEQLEN]
rnn_inputs = tf.one_hot(x, NCLASSES, 1.0, 0.0) # [BATCHSIZE x SEQLEN x NUMCLASSES]

y = tf.placeholder(tf.int32, [None, None], name='labels_placeholder') # [BATCHSIZE x SEQLEN]
y_oh = tf.one_hot(y, NCLASSES, 1.0, 0.0) # [BATCHSIZE x SEQLEN x NUMCLASSES]

# RNN cells
cells, state_is_tuple = create_rnn_cells(cell_type=CELLTYPE,
                                         state_size=INTERNALSIZE,
                                         num_layers=NLAYERS,
                                         pkeep=pkeep)

# combine them to a multicell
multicell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=state_is_tuple)
multicell = tf.contrib.rnn.DropoutWrapper(multicell, input_keep_prob=pkeep)

init_state = tf.placeholder(tf.float32, [None, NLAYERS * INTERNALSIZE],
                            name='init_state') # [BATCHSIZE x NLAYERS * INTERNALSIZE]

# rnn_outputs: [BATCHSIZE x SEQLEN x INTERNALSIZE]
# final_state: [BATCHSIZE x NLAYERS * INTERNALSIZE]
rnn_outputs, final_state = tf.nn.dynamic_rnn(multicell, rnn_inputs, 
                                             dtype=tf.float32, initial_state=init_state)

# just to give it a name
final_state = tf.identity(final_state, name='final_state')

In [None]:
# softmax layer
rnn_outputs = tf.reshape(rnn_outputs, [-1, INTERNALSIZE]) # [BATCHSIZE * SEQLEN x INTERNALSIZE]
logits = tf.contrib.layers.linear(rnn_outputs, NCLASSES) # [BATCHSIZE * SEQLEN x NCLASSES]

y_reshaped = tf.reshape(y_oh, [-1, NCLASSES]) # [BATCHSIZE * SEQLEN x NCLASSES]

loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped) # [BATCHSIZE x SEQLEN]
loss = tf.reshape(loss, [batchsize, -1]) # [BATCHSIZE, SEQLEN]
train_step = tf.train.AdamOptimizer(lrate).minimize(loss)

In [None]:
# TODO: rename it
Yo = tf.nn.softmax(logits) # [BATCHSIZE x SEQLEN, NCLASSES]
Y = tf.argmax(Yo, axis=1) # [BATCHSIZE x SEQLEN]
Y = tf.reshape(Y, [batchsize, -1]) # [BATCHSIZE, SEQLEN]

In [None]:
# stats for display
seqloss = tf.reduce_mean(loss, axis=1)
batchloss = tf.reduce_mean(seqloss)
accuracy = tf.reduce_mean(tf.cast(tf.equal(y, tf.cast(Y, tf.int32)), tf.float32))

# summaries
loss_summary = tf.summary.scalar('batch_loss', batchloss)
acc_summary = tf.summary.scalar('batch_accuracy', accuracy)
summaries = tf.summary.merge([loss_summary, acc_summary])

# Training

## Init the session

In [None]:
# summary writer & saver
timestamp = str(math.trunc(time.time()))
summary_writer = tf.summary.FileWriter('log/' + timestamp + '-training')

# save me
save_dir = 'checkpoints/'

if not os.path.exists(save_dir):
    os.makedirs(save_dir)

saver = tf.train.Saver(max_to_keep=1)

In [None]:
# init session
session = tf.Session()
session.run(tf.global_variables_initializer())

## Helper function to generate a sample of text

In [None]:
def generate_characters(num_chars, prompt='A', pick_top_chars=None):
    state = np.zeros([1, INTERNALSIZE * NLAYERS])
    current_char = vocab_to_idx[prompt]
    chars = [current_char]

    for i in range(num_chars):
        feed_dict = {x: [[current_char]], init_state: state, pkeep: 1.0, batchsize: 1}

        preds, state = session.run([Yo, final_state], feed_dict)

        p = np.squeeze(preds)

        if pick_top_chars is not None:
            p[np.argsort(p)[:-pick_top_chars]] = 0
            p = p / np.sum(p)

        current_char = np.random.choice(vocab_size, 1, p=p)[0]

        chars.append(current_char)

    chars = map(lambda x: idx_to_vocab[x], chars)
    return "".join(chars)

## Training

In [None]:
# training
DISPLAY_FREQ = 200
ND_BATCHES = DISPLAY_FREQ * BATCHSIZE * SEQLEN

step = 0
istate = np.zeros([BATCHSIZE, NLAYERS * INTERNALSIZE])

for idx, epoch in enumerate(gen_epochs(1, SEQLEN, BATCHSIZE)):
    for xe, ye in epoch:
        if xe.shape[1] != SEQLEN or ye.shape[1] != SEQLEN:
            continue

        feed_dict = {x: xe, y: ye,
                     init_state: istate,
                     lrate: learning_rate,
                     pkeep: dropout_pkeep,
                     batchsize: BATCHSIZE
                    }
        _, y_, ostate, smm = session.run([train_step, Y, final_state, summaries], feed_dict=feed_dict)
        
        summary_writer.add_summary(smm, step)
        
        # display a short text generated with the current weights and biases
        if step % ND_BATCHES == 0:
            print('e: {0}, s: {1}: {2}'.format(idx, step,
                                               generate_characters(256, prompt=' ', pick_top_chars=10)))

        istate = ostate
        step += BATCHSIZE * SEQLEN

    print('epoch {} done'.format(idx))
    saver.save(session, save_dir + 'rnn_train_' + timestamp, global_step=step)

In [None]:
generate_characters(256, prompt=' ', pick_top_chars=10)