In [1]:
# import packages
import time
from collections import namedtuple

import numpy as np
import tensorflow as tf

# also import debugger
from IPython.core.debugger import set_trace
#set_trace()

In [2]:
'''
Load anna.txt file and start preprocessing
'''
# load file
with open('anna.txt', 'r') as f:
    text=f.read()

# select unique strings(alphabet)
vocab = set(text)

# assign numbers to each alphabet
vocab_to_int = {c: i for i, c in enumerate(vocab, 0)}
int_to_vocab = dict(enumerate(vocab))
# vocab_to_int = {}
# int_to_vocab = {}
# for i, c in enumerate(vocab, 0):
#     vocab_to_int[c] = i
#     int_to_vocab[i] = c

    
encoded = np.array( [vocab_to_int[c] for c in text], dtype=np.int32 )

In [3]:
def get_batches(arr, n_seqs, n_steps):
    '''Create a generator that returns batches of size
       n_seqs x n_steps from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       n_seqs: Batch size, the number of sequences per batch
       n_steps: Number of sequence steps per batch
    '''
    # Get the batch size and number of batches we can make
    batch_size = n_seqs * n_steps
    n_batches = len(arr)//batch_size
    
    # Keep only enough characters to make full batches
    arr = arr[:n_batches * batch_size]
    
    # Reshape into n_seqs rows
    arr = arr.reshape((n_seqs, -1))
    
    for n in range(0, arr.shape[1], n_steps):
        # The features
        x = arr[:, n:n+n_steps]
        # The targets, shifted by one
        y = np.zeros_like(x)
        y[:, :-1], y[:, -1] = x[:, 1:], x[:, 0]
        yield x, y

In [4]:
# test case
batches = get_batches(encoded, 10, 50)
x, y = next(batches)
print('x.shape: ', x.shape)
print('y.shape: ', y.shape)
print('x\n', x[:10, :10])
print('\ny\n', y[:10, :10])

x.shape:  (10, 50)
y.shape:  (10, 50)
x
 [[39  0 44  2 19  9 30 20 27 79]
 [20 44 76 20 67 29 19 20 56 29]
 [60 42 67 57 79 79 48 78  9 69]
 [67 20  6 28 30 42 67 56 20  0]
 [20 42 19 20 42 69 51 20 69 42]
 [20 49 19 20 52 44 69 79 29 67]
 [ 0  9 67 20 12 29 76  9 20 32]
 [55 20 70 28 19 20 67 29 52 20]
 [19 20 42 69 67 77 19 57 20 11]
 [20 69 44 42  6 20 19 29 20  0]]

y
 [[ 0 44  2 19  9 30 20 27 79 79]
 [44 76 20 67 29 19 20 56 29 42]
 [42 67 57 79 79 48 78  9 69 51]
 [20  6 28 30 42 67 56 20  0 42]
 [42 19 20 42 69 51 20 69 42 30]
 [49 19 20 52 44 69 79 29 67 21]
 [ 9 67 20 12 29 76  9 20 32 29]
 [20 70 28 19 20 67 29 52 20 69]
 [20 42 69 67 77 19 57 20 11  0]
 [69 44 42  6 20 19 29 20  0  9]]


In [5]:
def build_inputs(batch_size, num_steps):
    ''' Define placeholders for inputs, targets, and dropout 
    
        Arguments
        ---------
        batch_size: Batch size, number of sequences per batch
        num_steps: Number of sequence steps in a batch
        
    '''
    # Declare placeholders we'll feed into the graph
    inputs = tf.placeholder( tf.int32, [batch_size, num_steps], name='inputs' )
    targets = tf.placeholder( tf.int32, [batch_size, num_steps], name='targets' )
    
    # Keep probability placeholder for drop out layers
    keep_prob = tf.placeholder( tf.float32, name='keep_prob' )
    
    return inputs, targets, keep_prob

In [6]:
def build_lstm(lstm_size, num_layers, batch_size, keep_prob):
    ''' Build LSTM cell.
    
        Arguments
        ---------
        keep_prob: Scalar tensor (tf.placeholder) for the dropout keep probability
        lstm_size: Size of the hidden layers in the LSTM cells
        num_layers: Number of LSTM layers
        batch_size: Batch size

    '''
    ### Build the LSTM Cell
    # Use a basic LSTM cell
    lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
    
    # Add dropout to the cell
    drop_out = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
    
    # Stack up multiple LSTM layers, for deep learning
    cell = tf.contrib.rnn.MultiRNNCell( [drop_out] * num_layers )
    initial_state = cell.zero_state(batch_size, tf.float32)
    
    return cell, initial_state

In [7]:
def build_output(lstm_output, in_size, out_size):
    ''' Build a softmax layer, return the softmax output and logits.
    
        Arguments
        ---------
        
        x: Input tensor
        in_size: Size of the input tensor, for example, size of the LSTM cells
        out_size: Size of this softmax layer
    
    '''

    # Reshape output so it's a bunch of rows, one row for each step for each sequence.
    # That is, the shape should be batch_size*num_steps rows by lstm_size columns
    seq_output = tf.concat( lstm_output, axis=1 )
    x = tf.reshape( seq_output, [-1, in_size] )
    
    # Connect the RNN outputs to a softmax layer
    with tf.variable_scope('softmax'):
        softmax_w = tf.Variable( tf.truncated_normal( (in_size, out_size), stddev=0.1 ) )
        softmax_b = tf.Variable( tf.zeros(out_size) )   
    
    # Since output is a bunch of rows of RNN cell outputs, logits will be a bunch
    # of rows of logit outputs, one for each step and sequence
    logits = tf.matmul(x, softmax_w) + softmax_b
    
    # Use softmax to get the probabilities for predicted characters
    out = tf.nn.softmax(logits=logits, name='predictions')
    
    return out, logits

In [8]:
def build_loss(logits, targets, lstm_size, num_classes):
    ''' Calculate the loss from the logits and the targets.
    
        Arguments
        ---------
        logits: Logits from final fully connected layer
        targets: Targets for supervised learning
        lstm_size: Number of LSTM hidden units
        num_classes: Number of classes in targets
        
    '''
    # One-hot encode targets and reshape to match logits, one row per batch_size per step
    y_onehot = tf.one_hot( targets, num_classes )
    y_reshaped = tf.reshape( y_onehot, logits.get_shape() )
    
    # Softmax cross entropy loss
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)
    loss = tf.reduce_mean(loss)
    
    return loss

In [9]:
def build_optimizer(loss, learning_rate, grad_clip):
    ''' Build optmizer for training, using gradient clipping.
    
        Arguments:
        loss: Network loss
        learning_rate: Learning rate for optimizer
    
    '''
    
    # Optimizer for training, using gradient clipping to control exploding gradients
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(learning_rate=learning_rate)
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    return optimizer    

In [10]:
class CharRNN:
    def __init__(self, num_classes, batch_size=64, num_steps=50, 
                       lstm_size=128, num_layers=2, learning_rate=0.001, 
                       grad_clip=5, sampling=False):
    
        # When we're using this network for sampling later, we'll be passing in
        # one character at a time, so providing an option for that
        if sampling == True:
            batch_size, num_steps = 1, 1
        else:
            batch_size, num_steps = batch_size, num_steps

        tf.reset_default_graph()
        
        # Build the input placeholder tensors
        self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, num_steps)

        # Build the LSTM cell
        cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob)

        ### Run the data through the RNN layers
        # First, one-hot encode the input tokens
        x_one_hot = tf.one_hot(self.inputs, num_classes)
        
        # Run each sequence step through the RNN and collect the outputs
        outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state)
        self.final_state = state
        
        # Get softmax predictions and logits
        self.prediction, self.logits = build_output(outputs, lstm_size, num_classes)
        
        # Loss and optimizer (with gradient clipping)
        self.loss = build_loss(self.logits, self.targets, lstm_size, num_classes)
        self.optimizer = build_optimizer(self.loss, learning_rate, grad_clip)

In [11]:
# hyper parameters
batch_size = 100        # Sequences per batch
num_steps = 100         # Number of sequence steps per batch
lstm_size = 512         # Size of hidden layers in LSTMs
num_layers = 2          # Number of LSTM layers
learning_rate = 0.001   # Learning rate
keep_prob = 0.5         # Dropout keep probability

In [12]:
epochs = 20
# Save every N iterations
save_every_n = 200

model = CharRNN(len(vocab), batch_size=batch_size, num_steps=num_steps,
                lstm_size=lstm_size, num_layers=num_layers, 
                learning_rate=learning_rate)

saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    # Use the line below to load a checkpoint and resume training
    #saver.restore(sess, 'checkpoints/______.ckpt')
    counter = 0
    for e in range(epochs):
        # Train network
        new_state = sess.run(model.initial_state)
        loss = 0
        for x, y in get_batches(encoded, batch_size, num_steps):
            counter += 1
            start = time.time()
            feed = {model.inputs: x,
                    model.targets: y,
                    model.keep_prob: keep_prob,
                    model.initial_state: new_state}
            batch_loss, new_state, _ = sess.run([model.loss, 
                                                 model.final_state, 
                                                 model.optimizer], 
                                                 feed_dict=feed)
            
            end = time.time()
            print('Epoch: {}/{}... '.format(e+1, epochs),
                  'Training Step: {}... '.format(counter),
                  'Training loss: {:.4f}... '.format(batch_loss),
                  '{:.4f} sec/batch'.format((end-start)))
        
            if (counter % save_every_n == 0):
                saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))
    
    saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))

Epoch: 1/20...  Training Step: 1...  Training loss: 4.4208...  11.3082 sec/batch
Epoch: 1/20...  Training Step: 2...  Training loss: 4.3356...  0.3619 sec/batch
Epoch: 1/20...  Training Step: 3...  Training loss: 3.8645...  0.3150 sec/batch
Epoch: 1/20...  Training Step: 4...  Training loss: 4.9162...  0.3072 sec/batch
Epoch: 1/20...  Training Step: 5...  Training loss: 3.8808...  0.3077 sec/batch
Epoch: 1/20...  Training Step: 6...  Training loss: 3.8111...  0.3078 sec/batch
Epoch: 1/20...  Training Step: 7...  Training loss: 3.6907...  0.3070 sec/batch
Epoch: 1/20...  Training Step: 8...  Training loss: 3.5737...  0.3069 sec/batch
Epoch: 1/20...  Training Step: 9...  Training loss: 3.4688...  0.3086 sec/batch
Epoch: 1/20...  Training Step: 10...  Training loss: 3.4331...  0.3076 sec/batch
Epoch: 1/20...  Training Step: 11...  Training loss: 3.3377...  0.3077 sec/batch
Epoch: 1/20...  Training Step: 12...  Training loss: 3.3442...  0.3076 sec/batch
Epoch: 1/20...  Training Step: 13...

In [13]:
tf.train.get_checkpoint_state('checkpoints')

model_checkpoint_path: "checkpoints/i3960_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i600_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i800_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i1000_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i1200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i1400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i1600_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i1800_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i2000_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i2200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i2400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i2600_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i2800_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i3000_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i3200_l512.ckpt"
all_model_checkpoint_pa

In [14]:
def pick_top_n(preds, vocab_size, top_n=5):
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size, 1, p=p)[0]
    return c

In [15]:
def sample(checkpoint, n_samples, lstm_size, vocab_size, prime="The "):
    samples = [c for c in prime]
    model = CharRNN(len(vocab), lstm_size=lstm_size, sampling=True)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        new_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1, 1))
            x[0,0] = vocab_to_int[c]
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

        c = pick_top_n(preds, len(vocab))
        samples.append(int_to_vocab[c])

        for i in range(n_samples):
            x[0,0] = c
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

            c = pick_top_n(preds, len(vocab))
            samples.append(int_to_vocab[c])
        
    return ''.join(samples)

In [16]:
tf.train.latest_checkpoint('checkpoints')

'checkpoints/i3960_l512.ckpt'

In [17]:
checkpoint = tf.train.latest_checkpoint('checkpoints')
samp = sample(checkpoint, 2000, lstm_size, len(vocab), prime="Far")
print(samp)

Farce and his book, who had a conversation on the sorrest things that was
that, but the moment he saw that she was too, because he would be the footman or the
marshal, and at the more of the sumjerings to him, well and sense that had
not conceared that anyounce that his wife's all should be over the same,
but whispress his father-hearted heart to any and or humiliated the memory, and
she was almost of anything, he had not telegram that there was all of in
the first time human, and that soothe that it was that he had been surred to him. The
prestry was, to have been set to the same time, had transag about his high
strick a smile, she felt, as a little was his brother and, as though
they had stirned, and therefwen her own weeking of her sungrished, and he was
singling. She called her hands, and at the string of the same, and he
could not be so good he was closed to the prince, and she felt the
present of the same intension of the senselees and home and so
things, and which she saw it and

In [18]:
checkpoint = 'checkpoints/i200_l512.ckpt'
samp = sample(checkpoint, 1000, lstm_size, len(vocab), prime="Far")
print(samp)

Fardd,
, asthar te tor ondis he his onet hhe weto te se this ote whe he sis al the
tint to thir hot tires int hire thons ontind tou thas osist and as ho thes orotin hhe wos toritithe and ansing ansares he sorad, hor ate at ant in ther hirito her antese hores itherinngore wos timer th are winthes ton wet an to alithe himeris oon on the won an hot ha and ote whed whe ase on he thet hos ote ther sotinn he hh mot hed ant he sere sotor ale wand anth womhor, astithere asd hesis and he tisth thed he thon hedsand ton har hin ha seer ins inter hins wos oot an hered wint he to on toungesthe th an ore tot on wot has ated hon thas hor tor hin tar th and an himere her os he werind he wint heran ot heded hars ha whos ot ane hintetas whond
af ha thise he soreserann of times het an the wirth ser arid tot ou at hher asis of wote woto te te that he thes ane hed ans orethe he this ood this hass ifer in ser on an sare hir has hin ond that tot hor athe anto ad ale sher sot he so ont tor whe atinghe se hont

In [19]:
checkpoint = 'checkpoints/i600_l512.ckpt'
samp = sample(checkpoint, 1000, lstm_size, len(vocab), prime="Far")
print(samp)

Farned. Him, som she he deald
to doring to she
partite,
and that the
crimat that has well han stide, had she sead
ander oter his
sond ablut than stean that,
and his talkned
whord an the wan tere the she worded
shis what that that her
and she sand the chonterent that
he hat to sore tores
of it has that the soud as the her head, so thas header, and wist her thank ow and while wit him, buntion to har a to stre hord wall sered whe shers and and san with shor intinget at to the houd her sont ane had andes had seen the
mont att them, wheth alred
had the hers him. Ste he he whith at in and
the
prousing."

The reant, that if her had buct on said to her has, the seid homen thene the pillating of inte of an of the ruther of the camenten the prised her
his wall at the ward had, and ald and sterent of tha mele, when
ha wing her hede some in hit ane a to doring this thould and
and sho hus sade out were a that in the samestion of his his sand the
maring
his stace in the serong the her hed thas to a 

In [20]:
checkpoint = 'checkpoints/i1200_l512.ckpt'
samp = sample(checkpoint, 1000, lstm_size, len(vocab), prime="Far")
print(samp)

Farcties what to she was a lat of the courter that strood tractiat, her a childen and she was ataled and and her
had at the possan of with with the some of to she wasted to have a lowgle him,er and saw, too, while he had to him, he susted to all the propented and told the
seaming of which at the prosint him that
say hears the peacents who wad into the passiance. And he saw as he had, and so would be a compleant was and with sont and strange a so midation.

She wis that to she was not in herse a porten think was that his his was offering once his had heard and talking at the plate of a solt wholes had not seeth of the sarmina, whise he curce someter, which the samore saw at his fince at the plesser was though, a say of seemed and the saming the thish, but the seamed heard to alone, hapring, he would say, say a gond and with stright, but the peesance of his
his for he wilied to thick the stiliat from his
comprite as the tell than the propon was her happented, and the truse along a the sh