In [2]:
import time
import tensorflow as tf
import numpy as np
from collections import namedtuple

In [3]:
with open('anna.txt' , 'r') as f:
    text = f.read()
vocab = set(text)
vocab_to_int = {c : i for i , c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
encoded = np.array([vocab_to_int[c] for c in text], dtype = np.int32)

In [4]:
text[:200]

"Chapter 1\n\n\nHappy families are all alike; every unhappy family is unhappy in its own\nway.\n\nEverything was in confusion in the Oblonskys' house. The wife had\ndiscovered that the husband was carrying on"

In [5]:
encoded[:200]

array([51, 70, 22, 23, 12, 78, 50, 18, 41, 21, 21, 21, 36, 22, 23, 23, 64,
       18, 75, 22, 10,  2, 82,  2, 78, 38, 18, 22, 50, 78, 18, 22, 82, 82,
       18, 22, 82,  2, 55, 78, 54, 18, 78, 49, 78, 50, 64, 18, 46, 40, 70,
       22, 23, 23, 64, 18, 75, 22, 10,  2, 82, 64, 18,  2, 38, 18, 46, 40,
       70, 22, 23, 23, 64, 18,  2, 40, 18,  2, 12, 38, 18,  7, 33, 40, 21,
       33, 22, 64, 27, 21, 21, 66, 49, 78, 50, 64, 12, 70,  2, 40, 65, 18,
       33, 22, 38, 18,  2, 40, 18, 39,  7, 40, 75, 46, 38,  2,  7, 40, 18,
        2, 40, 18, 12, 70, 78, 18, 45, 19, 82,  7, 40, 38, 55, 64, 38, 20,
       18, 70,  7, 46, 38, 78, 27, 18,  5, 70, 78, 18, 33,  2, 75, 78, 18,
       70, 22, 52, 21, 52,  2, 38, 39,  7, 49, 78, 50, 78, 52, 18, 12, 70,
       22, 12, 18, 12, 70, 78, 18, 70, 46, 38, 19, 22, 40, 52, 18, 33, 22,
       38, 18, 39, 22, 50, 50, 64,  2, 40, 65, 18,  7, 40])

In [6]:
len(vocab)

83

In [7]:
def get_batches(arr , batch_size , n_steps):
    
    chars_per_batch = batch_size * n_steps
    n_batches = len(arr)//chars_per_batch
    
    # keeping only enough characters to make full batches
    arr = arr[:n_batches * chars_per_batch]
    # reshape into batch size rows
    arr = arr.reshape((batch_size , -1))
    
    for n in range(0 , arr.shape[1] , n_steps):
        x = arr[:, n : n + n_steps]
        y_temp = arr[: , n+1 : n + n_steps + 1]
        y = np.zeros(x.shape , dtype = x.dtype)
        y[: , :y_temp.shape[1]] = y_temp
        
        yield x , y

In [8]:
batches = get_batches(encoded , 10 , 50)
x , y = next(batches)

In [9]:
print('x\n' , x[:10 , :10])
print('y\n' , y[:10 , :10])

x
 [[51 70 22 23 12 78 50 18 41 21]
 [18 22 10 18 40  7 12 18 65  7]
 [49  2 40 27 21 21  1  6 78 38]
 [40 18 52 46 50  2 40 65 18 70]
 [18  2 12 18  2 38 37 18 38  2]
 [18 30 12 18 33 22 38 21  7 40]
 [70 78 40 18 39  7 10 78 18 75]
 [54 18 19 46 12 18 40  7 33 18]
 [12 18  2 38 40 20 12 27 18  5]
 [18 38 22  2 52 18 12  7 18 70]]
y
 [[70 22 23 12 78 50 18 41 21 21]
 [22 10 18 40  7 12 18 65  7  2]
 [ 2 40 27 21 21  1  6 78 38 37]
 [18 52 46 50  2 40 65 18 70  2]
 [ 2 12 18  2 38 37 18 38  2 50]
 [30 12 18 33 22 38 21  7 40 82]
 [78 40 18 39  7 10 78 18 75  7]
 [18 19 46 12 18 40  7 33 18 38]
 [18  2 38 40 20 12 27 18  5 70]
 [38 22  2 52 18 12  7 18 70 78]]


# Inputs

In [10]:
def build_inputs(batch_size , num_steps):
    inputs = tf.placeholder(tf.int32 , [batch_size , num_steps] , name = 'inputs')
    targets = tf.placeholder(tf.int32 , [batch_size , num_steps] , name = 'targets')
    
    keep_prob = tf.placeholder(tf.float32 , name = 'keep_prob')
    
    return inputs , targets , keep_prob

# LSTM Cells

In [11]:
def build_lstm(lstm_size , num_layers , batch_size , keep_prob):
    def build_cell(lstm_size , keep_prob):
        lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
        drop = tf.contrib.rnn.DropoutWrapper(lstm , output_keep_prob = keep_prob)
        return drop
    
    cell = tf.contrib.rnn.MultiRNNCell([build_cell(lstm_size , keep_prob) for _ in range(num_layers)])
    initial_state = cell.zero_state(batch_size , tf.float32)
    
    return cell , initial_state

# Output

In [12]:
def build_output(lstm_output, in_size, out_size):
    # Reshape output so it's a bunch of rows, one row for each step for each sequence.
    # That is, the shape should be batch_size*num_steps rows by lstm_size columns
    seq_output = tf.concat(lstm_output, axis=1)
    x = tf.reshape(seq_output, [-1, in_size])
    
    # Connect the RNN outputs to a softmax layer
    with tf.variable_scope('softmax'):
        softmax_w = tf.Variable(tf.truncated_normal((in_size, out_size), stddev=0.1))
        softmax_b = tf.Variable(tf.zeros(out_size))
    
    # Since output is a bunch of rows of RNN cell outputs, logits will be a bunch
    # of rows of logit outputs, one for each step and sequence
    logits = tf.matmul(x, softmax_w) + softmax_b
    
    # Use softmax to get the probabilities for predicted characters
    out = tf.nn.softmax(logits, name='predictions')
    
    return out, logits

# Training Loss

In [13]:
def build_loss(logits, targets, lstm_size, num_classes):
    # One-hot encode targets and reshape to match logits, one row per batch_size per step
    y_one_hot = tf.one_hot(targets, num_classes)
    y_reshaped = tf.reshape(y_one_hot, logits.get_shape())
    
    # Softmax cross entropy loss
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)
    loss = tf.reduce_mean(loss)
    return loss

# Optimizer

In [14]:
def build_optimizer(loss, learning_rate, grad_clip):
    # Optimizer for training, using gradient clipping to control exploding gradients
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(learning_rate)
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    return optimizer

# Build the network

In [15]:
class CharRNN:
    
    def __init__(self, num_classes, batch_size=64, num_steps=50, 
                       lstm_size=128, num_layers=2, learning_rate=0.001, 
                       grad_clip=5, sampling=False):
    
        # When we're using this network for sampling later, we'll be passing in
        # one character at a time, so providing an option for that
        if sampling == True:
            batch_size, num_steps = 1, 1
        else:
            batch_size, num_steps = batch_size, num_steps

        tf.reset_default_graph()
        
        # Build the input placeholder tensors
        self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, num_steps)

        # Build the LSTM cell
        cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob)

        ### Run the data through the RNN layers
        # First, one-hot encode the input tokens
        x_one_hot = tf.one_hot(self.inputs, num_classes)
        
        # Run each sequence step through the RNN and collect the outputs
        outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state)
        self.final_state = state
        
        # Get softmax predictions and logits
        self.prediction, self.logits = build_output(outputs, lstm_size, num_classes)
        
        # Loss and optimizer (with gradient clipping)
        self.loss = build_loss(self.logits, self.targets, lstm_size, num_classes)
        self.optimizer = build_optimizer(self.loss, learning_rate, grad_clip)

# Hyperparameters

In [16]:
batch_size = 100           # Sequences per batch
num_steps = 100            # Num of sequence steps per batch
lstm_size = 512            # Num of hidden layers in LSTM
num_layers = 2             # Num of lstm layers
learning_rate = 0.001
keep_prob = 0.5

In [17]:
epochs = 20
# Print losses every N interations
print_every_n = 50

# Save every N iterations
save_every_n = 200

model = CharRNN(len(vocab), batch_size=batch_size, num_steps=num_steps,
                lstm_size=lstm_size, num_layers=num_layers, 
                learning_rate=learning_rate)

saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    # Use the line below to load a checkpoint and resume training
    #saver.restore(sess, 'checkpoints/______.ckpt')
    counter = 0
    for e in range(epochs):
        # Train network
        new_state = sess.run(model.initial_state)
        loss = 0
        for x, y in get_batches(encoded, batch_size, num_steps):
            counter += 1
            start = time.time()
            feed = {model.inputs: x,
                    model.targets: y,
                    model.keep_prob: keep_prob,
                    model.initial_state: new_state}
            batch_loss, new_state, _ = sess.run([model.loss, 
                                                 model.final_state, 
                                                 model.optimizer], 
                                                 feed_dict=feed)
            if (counter % print_every_n == 0):
                end = time.time()
                print('Epoch: {}/{}... '.format(e+1, epochs),
                      'Training Step: {}... '.format(counter),
                      'Training loss: {:.4f}... '.format(batch_loss),
                      '{:.4f} sec/batch'.format((end-start)))
        
            if (counter % save_every_n == 0):
                saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))
    
    saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Epoch: 1/20...  Training 

Epoch: 19/20...  Training Step: 3650...  Training loss: 1.1769...  2.3198 sec/batch
Epoch: 19/20...  Training Step: 3700...  Training loss: 1.1715...  2.4814 sec/batch
Epoch: 19/20...  Training Step: 3750...  Training loss: 1.1535...  2.1562 sec/batch
Epoch: 20/20...  Training Step: 3800...  Training loss: 1.1395...  2.0535 sec/batch
Epoch: 20/20...  Training Step: 3850...  Training loss: 1.1542...  2.1024 sec/batch
Epoch: 20/20...  Training Step: 3900...  Training loss: 1.1845...  2.1742 sec/batch
Epoch: 20/20...  Training Step: 3950...  Training loss: 1.1482...  2.5183 sec/batch


# Saved checkpoints

In [18]:
tf.train.get_checkpoint_state('checkpoints')

model_checkpoint_path: "checkpoints\\i3960_l512.ckpt"
all_model_checkpoint_paths: "checkpoints\\i200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints\\i400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints\\i600_l512.ckpt"
all_model_checkpoint_paths: "checkpoints\\i800_l512.ckpt"
all_model_checkpoint_paths: "checkpoints\\i1000_l512.ckpt"
all_model_checkpoint_paths: "checkpoints\\i1200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints\\i1400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints\\i1600_l512.ckpt"
all_model_checkpoint_paths: "checkpoints\\i1800_l512.ckpt"
all_model_checkpoint_paths: "checkpoints\\i2000_l512.ckpt"
all_model_checkpoint_paths: "checkpoints\\i2200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints\\i2400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints\\i2600_l512.ckpt"
all_model_checkpoint_paths: "checkpoints\\i2800_l512.ckpt"
all_model_checkpoint_paths: "checkpoints\\i3000_l512.ckpt"
all_model_checkpoint_paths: "checkpoints\\i3200_l512.ckpt"
all_mo

# Sampling

In [24]:
def pick_top_n(preds , vocab_size , top_n = 5):
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size , 1 , p = p)[0]
    return c

In [25]:
def sample(checkpoint, n_samples, lstm_size, vocab_size, prime="The "):
    samples = [c for c in prime]
    model = CharRNN(len(vocab), lstm_size=lstm_size, sampling=True)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        new_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1, 1))
            x[0,0] = vocab_to_int[c]
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

        c = pick_top_n(preds, len(vocab))
        samples.append(int_to_vocab[c])

        for i in range(n_samples):
            x[0,0] = c
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

            c = pick_top_n(preds, len(vocab))
            samples.append(int_to_vocab[c])
        
    return ''.join(samples)

In [26]:
tf.train.latest_checkpoint('checkpoints')

'checkpoints\\i3960_l512.ckpt'

In [27]:
checkpoint = tf.train.latest_checkpoint('checkpoints')
samp = sample(checkpoint, 2000, lstm_size, len(vocab), prime="Far")
print(samp)

Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from checkpoints\i3960_l512.ckpt
Farcied her.... He's said to see her, but how she was not to
speak of the stoop into the bind and show he dad not answered all that
he was a persain than the sound of hearing, but was impossible, and
had to confused him. But he had this prepent of money would be a
plenty at last that he could not have cared as he could say. She
was not attractively to think that she saw why why they had baded
her.

His face was simply as though he done was so letter, with too this was a
partor of alternate, and all of the sense of the servent with the
chief sense of all the fact.

"Ah! they're not a mentile and man, thanks to see you," said Stepan
Arkadyevitch, at her sister's eyes; "I can't go to him," she said,
looking away her hand, his book had taken about. She shouked a side
that he could not see them, which went up and say that the peasants'
