In [1]:
import time
from collections import namedtuple

import numpy as np
import tensorflow as tf

In [2]:
with open('../data/anna.txt', 'r') as f:
    text=f.read()
vocab = set(text)
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)

In [3]:
def get_batches(arr, n_seqs, n_steps):
    '''Create a generator that returns batches of size
       n_seqs x n_steps from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       n_seqs: Batch size, the number of sequences per batch
       n_steps: Number of sequence steps per batch
    '''
    # Get the number of characters per batch and number of batches we can make
    characters_per_batch = n_seqs * n_steps
    n_batches = len(arr)//characters_per_batch
    
    # Keep only enough characters to make full batches
    arr = arr[:n_batches * characters_per_batch]
    
    # Reshape into n_seqs rows
    arr = arr.reshape((n_seqs, -1))
    
    for n in range(0, arr.shape[1], n_steps):
        # The features
        x = arr[:, n:n+n_steps]
        # The targets, shifted by one
        y = np.zeros_like(x)
        y[:, :-1], y[:, -1] = x[:, 1:], x[:, 0]
        yield x, y

In [7]:
class CharRNN:
    
    def __init__(self, num_classes, batch_size=64, num_steps=50, 
                       lstm_size=128, num_layers=2, learning_rate=0.001, 
                       grad_clip=5, sampling=False):
    
        # When we're using this network for sampling later, we'll be passing in
        # one character at a time, so providing an option for that
        if sampling == True:
            batch_size, num_steps = 1, 1
        else:
            batch_size, num_steps = batch_size, num_steps

        tf.reset_default_graph()
        
        # Build the input placeholder tensors
        #self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, num_steps)
        self.inputs = tf.placeholder(tf.int32, [batch_size, num_steps], name='inputs')
        self.targets = tf.placeholder(tf.int32, [batch_size, num_steps], name='targets')
        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')

        # Build the LSTM cell
        #cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob)
        def build_cell(lstm_size, keep_prob):
            lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
            drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
            return drop
        cell = tf.contrib.rnn.MultiRNNCell([build_cell(lstm_size, keep_prob) for _ in range(num_layers)])
        self.initial_state = cell.zero_state(batch_size, tf.float32)

        ### Run the data through the RNN layers
        # First, one-hot encode the input tokens
        x_one_hot = tf.one_hot(self.inputs, num_classes)
        
        # Run each sequence step through the RNN and collect the outputs
        outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state)
        self.final_state = state
        
        # Get softmax predictions and logits
        #self.prediction, self.logits = build_output(outputs, lstm_size, num_classes)
        seq_output = tf.concat(outputs, axis=1)
        x = tf.reshape(seq_output, [-1, lstm_size])
        
        with tf.variable_scope('softmax'):
            softmax_w = tf.Variable(tf.truncated_normal([lstm_size, num_classes], stddev=0.1))
            softmax_b = tf.Variable(tf.zeros(num_classes))
        
        self.logits = tf.matmul(x, softmax_w) + softmax_b
        self.prediction = tf.nn.softmax(self.logits, name='predictions')
        
        # Loss and optimizer (with gradient clipping)
        #self.loss = build_loss(self.logits, self.targets, lstm_size, num_classes)
        y_one_hot = tf.one_hot(self.targets, num_classes)
        y_reshaped = tf.reshape(y_one_hot, self.logits.get_shape())
        self.loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=y_reshaped)
        self.loss = tf.reduce_mean(self.loss)
        
        #self.optimizer = build_optimizer(self.loss, learning_rate, grad_clip)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), grad_clip)
        train_op = tf.train.AdamOptimizer(learning_rate=learning_rate)
        self.optimizer = train_op.apply_gradients(zip(grads, tvars))

In [8]:
batch_size = 100        # Sequences per batch
num_steps = 100         # Number of sequence steps per batch
lstm_size = 512         # Size of hidden layers in LSTMs
num_layers = 2          # Number of LSTM layers
learning_rate = 0.001   # Learning rate
keep_prob = 0.5         # Dropout keep probability

In [9]:
epochs = 20
# Save every N iterations
save_every_n = 200

model = CharRNN(len(vocab), batch_size=batch_size, num_steps=num_steps,
                lstm_size=lstm_size, num_layers=num_layers, 
                learning_rate=learning_rate)

saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    # Use the line below to load a checkpoint and resume training
    #saver.restore(sess, 'checkpoints/______.ckpt')
    counter = 0
    for e in range(epochs):
        # Train network
        new_state = sess.run(model.initial_state)
        loss = 0
        for x, y in get_batches(encoded, batch_size, num_steps):
            counter += 1
            start = time.time()
            feed = {model.inputs: x,
                    model.targets: y,
                    model.keep_prob: keep_prob,
                    model.initial_state: new_state}
            batch_loss, new_state, _ = sess.run([model.loss, 
                                                 model.final_state, 
                                                 model.optimizer], 
                                                 feed_dict=feed)
            
            end = time.time()
            print('Epoch: {}/{}... '.format(e+1, epochs),
                  'Training Step: {}... '.format(counter),
                  'Training loss: {:.4f}... '.format(batch_loss),
                  '{:.4f} sec/batch'.format((end-start)))
        
            if (counter % save_every_n == 0):
                saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))
    
    saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))

Epoch: 1/20...  Training Step: 1...  Training loss: 4.4202...  0.3931 sec/batch
Epoch: 1/20...  Training Step: 2...  Training loss: 4.3360...  0.1560 sec/batch
Epoch: 1/20...  Training Step: 3...  Training loss: 3.9438...  0.1590 sec/batch
Epoch: 1/20...  Training Step: 4...  Training loss: 6.0447...  0.1570 sec/batch
Epoch: 1/20...  Training Step: 5...  Training loss: 4.1006...  0.1580 sec/batch
Epoch: 1/20...  Training Step: 6...  Training loss: 3.8226...  0.1570 sec/batch
Epoch: 1/20...  Training Step: 7...  Training loss: 3.7023...  0.1580 sec/batch
Epoch: 1/20...  Training Step: 8...  Training loss: 3.6441...  0.1570 sec/batch
Epoch: 1/20...  Training Step: 9...  Training loss: 3.5253...  0.1580 sec/batch
Epoch: 1/20...  Training Step: 10...  Training loss: 3.4419...  0.1570 sec/batch
Epoch: 1/20...  Training Step: 11...  Training loss: 3.3647...  0.1571 sec/batch
Epoch: 1/20...  Training Step: 12...  Training loss: 3.3401...  0.1570 sec/batch
Epoch: 1/20...  Training Step: 13... 

Epoch: 1/20...  Training Step: 103...  Training loss: 3.0437...  0.1580 sec/batch
Epoch: 1/20...  Training Step: 104...  Training loss: 3.0365...  0.1580 sec/batch
Epoch: 1/20...  Training Step: 105...  Training loss: 3.0436...  0.1570 sec/batch
Epoch: 1/20...  Training Step: 106...  Training loss: 3.0257...  0.1571 sec/batch
Epoch: 1/20...  Training Step: 107...  Training loss: 3.0227...  0.1590 sec/batch
Epoch: 1/20...  Training Step: 108...  Training loss: 3.0145...  0.1590 sec/batch
Epoch: 1/20...  Training Step: 109...  Training loss: 3.0337...  0.1580 sec/batch
Epoch: 1/20...  Training Step: 110...  Training loss: 2.9886...  0.1580 sec/batch
Epoch: 1/20...  Training Step: 111...  Training loss: 2.9921...  0.1570 sec/batch
Epoch: 1/20...  Training Step: 112...  Training loss: 3.0109...  0.1590 sec/batch
Epoch: 1/20...  Training Step: 113...  Training loss: 2.9770...  0.1570 sec/batch
Epoch: 1/20...  Training Step: 114...  Training loss: 2.9613...  0.1570 sec/batch
Epoch: 1/20...  

Epoch: 2/20...  Training Step: 203...  Training loss: 2.4288...  0.1590 sec/batch
Epoch: 2/20...  Training Step: 204...  Training loss: 2.4348...  0.1580 sec/batch
Epoch: 2/20...  Training Step: 205...  Training loss: 2.4324...  0.1580 sec/batch
Epoch: 2/20...  Training Step: 206...  Training loss: 2.4424...  0.1590 sec/batch
Epoch: 2/20...  Training Step: 207...  Training loss: 2.4492...  0.1580 sec/batch
Epoch: 2/20...  Training Step: 208...  Training loss: 2.4218...  0.1570 sec/batch
Epoch: 2/20...  Training Step: 209...  Training loss: 2.4132...  0.1560 sec/batch
Epoch: 2/20...  Training Step: 210...  Training loss: 2.4250...  0.1570 sec/batch
Epoch: 2/20...  Training Step: 211...  Training loss: 2.4085...  0.1580 sec/batch
Epoch: 2/20...  Training Step: 212...  Training loss: 2.4509...  0.1580 sec/batch
Epoch: 2/20...  Training Step: 213...  Training loss: 2.4136...  0.1610 sec/batch
Epoch: 2/20...  Training Step: 214...  Training loss: 2.4085...  0.1570 sec/batch
Epoch: 2/20...  

Epoch: 2/20...  Training Step: 303...  Training loss: 2.2001...  0.1560 sec/batch
Epoch: 2/20...  Training Step: 304...  Training loss: 2.2068...  0.1560 sec/batch
Epoch: 2/20...  Training Step: 305...  Training loss: 2.1941...  0.1590 sec/batch
Epoch: 2/20...  Training Step: 306...  Training loss: 2.2198...  0.1580 sec/batch
Epoch: 2/20...  Training Step: 307...  Training loss: 2.2143...  0.1580 sec/batch
Epoch: 2/20...  Training Step: 308...  Training loss: 2.1925...  0.1571 sec/batch
Epoch: 2/20...  Training Step: 309...  Training loss: 2.1932...  0.1580 sec/batch
Epoch: 2/20...  Training Step: 310...  Training loss: 2.2071...  0.1570 sec/batch
Epoch: 2/20...  Training Step: 311...  Training loss: 2.1832...  0.1580 sec/batch
Epoch: 2/20...  Training Step: 312...  Training loss: 2.1867...  0.1570 sec/batch
Epoch: 2/20...  Training Step: 313...  Training loss: 2.1761...  0.1580 sec/batch
Epoch: 2/20...  Training Step: 314...  Training loss: 2.1479...  0.1580 sec/batch
Epoch: 2/20...  

Epoch: 3/20...  Training Step: 403...  Training loss: 2.0587...  0.1590 sec/batch
Epoch: 3/20...  Training Step: 404...  Training loss: 2.0587...  0.1700 sec/batch
Epoch: 3/20...  Training Step: 405...  Training loss: 2.0940...  0.1580 sec/batch
Epoch: 3/20...  Training Step: 406...  Training loss: 2.0606...  0.1571 sec/batch
Epoch: 3/20...  Training Step: 407...  Training loss: 2.0387...  0.1580 sec/batch
Epoch: 3/20...  Training Step: 408...  Training loss: 2.0297...  0.1570 sec/batch
Epoch: 3/20...  Training Step: 409...  Training loss: 2.0574...  0.1570 sec/batch
Epoch: 3/20...  Training Step: 410...  Training loss: 2.0918...  0.1570 sec/batch
Epoch: 3/20...  Training Step: 411...  Training loss: 2.0554...  0.1570 sec/batch
Epoch: 3/20...  Training Step: 412...  Training loss: 2.0264...  0.1571 sec/batch
Epoch: 3/20...  Training Step: 413...  Training loss: 2.0401...  0.1590 sec/batch
Epoch: 3/20...  Training Step: 414...  Training loss: 2.0838...  0.1580 sec/batch
Epoch: 3/20...  

Epoch: 3/20...  Training Step: 503...  Training loss: 1.9369...  0.1580 sec/batch
Epoch: 3/20...  Training Step: 504...  Training loss: 1.9572...  0.1581 sec/batch
Epoch: 3/20...  Training Step: 505...  Training loss: 1.9502...  0.1580 sec/batch
Epoch: 3/20...  Training Step: 506...  Training loss: 1.9453...  0.1570 sec/batch
Epoch: 3/20...  Training Step: 507...  Training loss: 1.9266...  0.1570 sec/batch
Epoch: 3/20...  Training Step: 508...  Training loss: 1.9321...  0.1570 sec/batch
Epoch: 3/20...  Training Step: 509...  Training loss: 1.9275...  0.1610 sec/batch
Epoch: 3/20...  Training Step: 510...  Training loss: 1.9185...  0.1570 sec/batch
Epoch: 3/20...  Training Step: 511...  Training loss: 1.9207...  0.1590 sec/batch
Epoch: 3/20...  Training Step: 512...  Training loss: 1.8914...  0.1570 sec/batch
Epoch: 3/20...  Training Step: 513...  Training loss: 1.9225...  0.1580 sec/batch
Epoch: 3/20...  Training Step: 514...  Training loss: 1.9153...  0.1570 sec/batch
Epoch: 3/20...  

Epoch: 4/20...  Training Step: 603...  Training loss: 1.8813...  0.1581 sec/batch
Epoch: 4/20...  Training Step: 604...  Training loss: 1.8501...  0.1570 sec/batch
Epoch: 4/20...  Training Step: 605...  Training loss: 1.8323...  0.1570 sec/batch
Epoch: 4/20...  Training Step: 606...  Training loss: 1.8321...  0.1572 sec/batch
Epoch: 4/20...  Training Step: 607...  Training loss: 1.8611...  0.1580 sec/batch
Epoch: 4/20...  Training Step: 608...  Training loss: 1.8978...  0.1570 sec/batch
Epoch: 4/20...  Training Step: 609...  Training loss: 1.8452...  0.1580 sec/batch
Epoch: 4/20...  Training Step: 610...  Training loss: 1.8191...  0.1571 sec/batch
Epoch: 4/20...  Training Step: 611...  Training loss: 1.8417...  0.1590 sec/batch
Epoch: 4/20...  Training Step: 612...  Training loss: 1.8773...  0.1570 sec/batch
Epoch: 4/20...  Training Step: 613...  Training loss: 1.8457...  0.1570 sec/batch
Epoch: 4/20...  Training Step: 614...  Training loss: 1.8496...  0.1580 sec/batch
Epoch: 4/20...  

Epoch: 4/20...  Training Step: 703...  Training loss: 1.7832...  0.1560 sec/batch
Epoch: 4/20...  Training Step: 704...  Training loss: 1.7850...  0.1570 sec/batch
Epoch: 4/20...  Training Step: 705...  Training loss: 1.7795...  0.1580 sec/batch
Epoch: 4/20...  Training Step: 706...  Training loss: 1.7661...  0.1570 sec/batch
Epoch: 4/20...  Training Step: 707...  Training loss: 1.7649...  0.1580 sec/batch
Epoch: 4/20...  Training Step: 708...  Training loss: 1.7646...  0.1570 sec/batch
Epoch: 4/20...  Training Step: 709...  Training loss: 1.7527...  0.1580 sec/batch
Epoch: 4/20...  Training Step: 710...  Training loss: 1.7469...  0.1560 sec/batch
Epoch: 4/20...  Training Step: 711...  Training loss: 1.7888...  0.1580 sec/batch
Epoch: 4/20...  Training Step: 712...  Training loss: 1.7649...  0.1580 sec/batch
Epoch: 4/20...  Training Step: 713...  Training loss: 1.7754...  0.1572 sec/batch
Epoch: 4/20...  Training Step: 714...  Training loss: 1.7663...  0.1560 sec/batch
Epoch: 4/20...  

Epoch: 5/20...  Training Step: 803...  Training loss: 1.7017...  0.1581 sec/batch
Epoch: 5/20...  Training Step: 804...  Training loss: 1.7112...  0.1570 sec/batch
Epoch: 5/20...  Training Step: 805...  Training loss: 1.7230...  0.1560 sec/batch
Epoch: 5/20...  Training Step: 806...  Training loss: 1.7646...  0.1560 sec/batch
Epoch: 5/20...  Training Step: 807...  Training loss: 1.7130...  0.1570 sec/batch
Epoch: 5/20...  Training Step: 808...  Training loss: 1.6973...  0.1570 sec/batch
Epoch: 5/20...  Training Step: 809...  Training loss: 1.7149...  0.1570 sec/batch
Epoch: 5/20...  Training Step: 810...  Training loss: 1.7443...  0.1560 sec/batch
Epoch: 5/20...  Training Step: 811...  Training loss: 1.7220...  0.1580 sec/batch
Epoch: 5/20...  Training Step: 812...  Training loss: 1.7333...  0.1570 sec/batch
Epoch: 5/20...  Training Step: 813...  Training loss: 1.6933...  0.1580 sec/batch
Epoch: 5/20...  Training Step: 814...  Training loss: 1.7352...  0.1590 sec/batch
Epoch: 5/20...  

Epoch: 5/20...  Training Step: 903...  Training loss: 1.6555...  0.1570 sec/batch
Epoch: 5/20...  Training Step: 904...  Training loss: 1.6498...  0.1560 sec/batch
Epoch: 5/20...  Training Step: 905...  Training loss: 1.6620...  0.1580 sec/batch
Epoch: 5/20...  Training Step: 906...  Training loss: 1.6563...  0.1570 sec/batch
Epoch: 5/20...  Training Step: 907...  Training loss: 1.6371...  0.1580 sec/batch
Epoch: 5/20...  Training Step: 908...  Training loss: 1.6170...  0.1560 sec/batch
Epoch: 5/20...  Training Step: 909...  Training loss: 1.6512...  0.1570 sec/batch
Epoch: 5/20...  Training Step: 910...  Training loss: 1.6513...  0.1570 sec/batch
Epoch: 5/20...  Training Step: 911...  Training loss: 1.6474...  0.1580 sec/batch
Epoch: 5/20...  Training Step: 912...  Training loss: 1.6555...  0.1580 sec/batch
Epoch: 5/20...  Training Step: 913...  Training loss: 1.6556...  0.1581 sec/batch
Epoch: 5/20...  Training Step: 914...  Training loss: 1.6210...  0.1570 sec/batch
Epoch: 5/20...  

Epoch: 6/20...  Training Step: 1003...  Training loss: 1.6330...  0.1611 sec/batch
Epoch: 6/20...  Training Step: 1004...  Training loss: 1.6702...  0.1570 sec/batch
Epoch: 6/20...  Training Step: 1005...  Training loss: 1.6157...  0.1570 sec/batch
Epoch: 6/20...  Training Step: 1006...  Training loss: 1.6043...  0.1570 sec/batch
Epoch: 6/20...  Training Step: 1007...  Training loss: 1.6227...  0.1580 sec/batch
Epoch: 6/20...  Training Step: 1008...  Training loss: 1.6511...  0.1570 sec/batch
Epoch: 6/20...  Training Step: 1009...  Training loss: 1.6247...  0.1580 sec/batch
Epoch: 6/20...  Training Step: 1010...  Training loss: 1.6445...  0.1580 sec/batch
Epoch: 6/20...  Training Step: 1011...  Training loss: 1.6179...  0.1570 sec/batch
Epoch: 6/20...  Training Step: 1012...  Training loss: 1.6515...  0.1581 sec/batch
Epoch: 6/20...  Training Step: 1013...  Training loss: 1.6147...  0.1580 sec/batch
Epoch: 6/20...  Training Step: 1014...  Training loss: 1.6413...  0.1580 sec/batch
Epoc

Epoch: 6/20...  Training Step: 1103...  Training loss: 1.5690...  0.1570 sec/batch
Epoch: 6/20...  Training Step: 1104...  Training loss: 1.5698...  0.1560 sec/batch
Epoch: 6/20...  Training Step: 1105...  Training loss: 1.5540...  0.1580 sec/batch
Epoch: 6/20...  Training Step: 1106...  Training loss: 1.5365...  0.1570 sec/batch
Epoch: 6/20...  Training Step: 1107...  Training loss: 1.5726...  0.1570 sec/batch
Epoch: 6/20...  Training Step: 1108...  Training loss: 1.5829...  0.1570 sec/batch
Epoch: 6/20...  Training Step: 1109...  Training loss: 1.5712...  0.1580 sec/batch
Epoch: 6/20...  Training Step: 1110...  Training loss: 1.5715...  0.1570 sec/batch
Epoch: 6/20...  Training Step: 1111...  Training loss: 1.5824...  0.1580 sec/batch
Epoch: 6/20...  Training Step: 1112...  Training loss: 1.5500...  0.1570 sec/batch
Epoch: 6/20...  Training Step: 1113...  Training loss: 1.5405...  0.1571 sec/batch
Epoch: 6/20...  Training Step: 1114...  Training loss: 1.5913...  0.1581 sec/batch
Epoc

Epoch: 7/20...  Training Step: 1203...  Training loss: 1.5319...  0.1580 sec/batch
Epoch: 7/20...  Training Step: 1204...  Training loss: 1.5213...  0.1590 sec/batch
Epoch: 7/20...  Training Step: 1205...  Training loss: 1.5530...  0.1580 sec/batch
Epoch: 7/20...  Training Step: 1206...  Training loss: 1.5668...  0.1580 sec/batch
Epoch: 7/20...  Training Step: 1207...  Training loss: 1.5451...  0.1570 sec/batch
Epoch: 7/20...  Training Step: 1208...  Training loss: 1.5774...  0.1570 sec/batch
Epoch: 7/20...  Training Step: 1209...  Training loss: 1.5411...  0.1571 sec/batch
Epoch: 7/20...  Training Step: 1210...  Training loss: 1.5627...  0.1580 sec/batch
Epoch: 7/20...  Training Step: 1211...  Training loss: 1.5493...  0.1580 sec/batch
Epoch: 7/20...  Training Step: 1212...  Training loss: 1.5583...  0.1560 sec/batch
Epoch: 7/20...  Training Step: 1213...  Training loss: 1.5530...  0.1570 sec/batch
Epoch: 7/20...  Training Step: 1214...  Training loss: 1.5026...  0.1580 sec/batch
Epoc

Epoch: 7/20...  Training Step: 1303...  Training loss: 1.4945...  0.1581 sec/batch
Epoch: 7/20...  Training Step: 1304...  Training loss: 1.4771...  0.1560 sec/batch
Epoch: 7/20...  Training Step: 1305...  Training loss: 1.5213...  0.1561 sec/batch
Epoch: 7/20...  Training Step: 1306...  Training loss: 1.5137...  0.1560 sec/batch
Epoch: 7/20...  Training Step: 1307...  Training loss: 1.5091...  0.1570 sec/batch
Epoch: 7/20...  Training Step: 1308...  Training loss: 1.4975...  0.1570 sec/batch
Epoch: 7/20...  Training Step: 1309...  Training loss: 1.5133...  0.1570 sec/batch
Epoch: 7/20...  Training Step: 1310...  Training loss: 1.4814...  0.1570 sec/batch
Epoch: 7/20...  Training Step: 1311...  Training loss: 1.4646...  0.1580 sec/batch
Epoch: 7/20...  Training Step: 1312...  Training loss: 1.5108...  0.1580 sec/batch
Epoch: 7/20...  Training Step: 1313...  Training loss: 1.5040...  0.1580 sec/batch
Epoch: 7/20...  Training Step: 1314...  Training loss: 1.4668...  0.1570 sec/batch
Epoc

Epoch: 8/20...  Training Step: 1403...  Training loss: 1.4888...  0.1590 sec/batch
Epoch: 8/20...  Training Step: 1404...  Training loss: 1.5047...  0.1560 sec/batch
Epoch: 8/20...  Training Step: 1405...  Training loss: 1.4765...  0.1570 sec/batch
Epoch: 8/20...  Training Step: 1406...  Training loss: 1.5081...  0.1570 sec/batch
Epoch: 8/20...  Training Step: 1407...  Training loss: 1.4824...  0.1580 sec/batch
Epoch: 8/20...  Training Step: 1408...  Training loss: 1.4961...  0.1590 sec/batch
Epoch: 8/20...  Training Step: 1409...  Training loss: 1.4746...  0.1570 sec/batch
Epoch: 8/20...  Training Step: 1410...  Training loss: 1.4919...  0.1580 sec/batch
Epoch: 8/20...  Training Step: 1411...  Training loss: 1.4801...  0.1600 sec/batch
Epoch: 8/20...  Training Step: 1412...  Training loss: 1.4394...  0.1570 sec/batch
Epoch: 8/20...  Training Step: 1413...  Training loss: 1.4484...  0.1580 sec/batch
Epoch: 8/20...  Training Step: 1414...  Training loss: 1.4875...  0.1570 sec/batch
Epoc

Epoch: 8/20...  Training Step: 1503...  Training loss: 1.4524...  0.1600 sec/batch
Epoch: 8/20...  Training Step: 1504...  Training loss: 1.4692...  0.1570 sec/batch
Epoch: 8/20...  Training Step: 1505...  Training loss: 1.4572...  0.1581 sec/batch
Epoch: 8/20...  Training Step: 1506...  Training loss: 1.4447...  0.1590 sec/batch
Epoch: 8/20...  Training Step: 1507...  Training loss: 1.4506...  0.1580 sec/batch
Epoch: 8/20...  Training Step: 1508...  Training loss: 1.4261...  0.1950 sec/batch
Epoch: 8/20...  Training Step: 1509...  Training loss: 1.4156...  0.1670 sec/batch
Epoch: 8/20...  Training Step: 1510...  Training loss: 1.4586...  0.1620 sec/batch
Epoch: 8/20...  Training Step: 1511...  Training loss: 1.4454...  0.1640 sec/batch
Epoch: 8/20...  Training Step: 1512...  Training loss: 1.4030...  0.1740 sec/batch
Epoch: 8/20...  Training Step: 1513...  Training loss: 1.4703...  0.1720 sec/batch
Epoch: 8/20...  Training Step: 1514...  Training loss: 1.4657...  0.1730 sec/batch
Epoc

Epoch: 9/20...  Training Step: 1603...  Training loss: 1.4426...  0.1600 sec/batch
Epoch: 9/20...  Training Step: 1604...  Training loss: 1.4633...  0.1560 sec/batch
Epoch: 9/20...  Training Step: 1605...  Training loss: 1.4287...  0.1600 sec/batch
Epoch: 9/20...  Training Step: 1606...  Training loss: 1.4496...  0.1590 sec/batch
Epoch: 9/20...  Training Step: 1607...  Training loss: 1.4274...  0.1570 sec/batch
Epoch: 9/20...  Training Step: 1608...  Training loss: 1.4331...  0.1590 sec/batch
Epoch: 9/20...  Training Step: 1609...  Training loss: 1.4360...  0.1570 sec/batch
Epoch: 9/20...  Training Step: 1610...  Training loss: 1.3923...  0.1580 sec/batch
Epoch: 9/20...  Training Step: 1611...  Training loss: 1.3981...  0.1580 sec/batch
Epoch: 9/20...  Training Step: 1612...  Training loss: 1.4411...  0.1580 sec/batch
Epoch: 9/20...  Training Step: 1613...  Training loss: 1.4400...  0.1570 sec/batch
Epoch: 9/20...  Training Step: 1614...  Training loss: 1.4513...  0.1590 sec/batch
Epoc

Epoch: 9/20...  Training Step: 1703...  Training loss: 1.4080...  0.1610 sec/batch
Epoch: 9/20...  Training Step: 1704...  Training loss: 1.3971...  0.1580 sec/batch
Epoch: 9/20...  Training Step: 1705...  Training loss: 1.4156...  0.1580 sec/batch
Epoch: 9/20...  Training Step: 1706...  Training loss: 1.3759...  0.1570 sec/batch
Epoch: 9/20...  Training Step: 1707...  Training loss: 1.3554...  0.1580 sec/batch
Epoch: 9/20...  Training Step: 1708...  Training loss: 1.4143...  0.1580 sec/batch
Epoch: 9/20...  Training Step: 1709...  Training loss: 1.4001...  0.1580 sec/batch
Epoch: 9/20...  Training Step: 1710...  Training loss: 1.3624...  0.1571 sec/batch
Epoch: 9/20...  Training Step: 1711...  Training loss: 1.4164...  0.1570 sec/batch
Epoch: 9/20...  Training Step: 1712...  Training loss: 1.4152...  0.1560 sec/batch
Epoch: 9/20...  Training Step: 1713...  Training loss: 1.3915...  0.1570 sec/batch
Epoch: 9/20...  Training Step: 1714...  Training loss: 1.3744...  0.1580 sec/batch
Epoc

Epoch: 10/20...  Training Step: 1803...  Training loss: 1.3905...  0.1580 sec/batch
Epoch: 10/20...  Training Step: 1804...  Training loss: 1.4028...  0.1570 sec/batch
Epoch: 10/20...  Training Step: 1805...  Training loss: 1.3907...  0.1610 sec/batch
Epoch: 10/20...  Training Step: 1806...  Training loss: 1.4089...  0.1580 sec/batch
Epoch: 10/20...  Training Step: 1807...  Training loss: 1.4129...  0.1610 sec/batch
Epoch: 10/20...  Training Step: 1808...  Training loss: 1.3452...  0.1580 sec/batch
Epoch: 10/20...  Training Step: 1809...  Training loss: 1.3631...  0.1580 sec/batch
Epoch: 10/20...  Training Step: 1810...  Training loss: 1.4092...  0.1570 sec/batch
Epoch: 10/20...  Training Step: 1811...  Training loss: 1.3994...  0.1590 sec/batch
Epoch: 10/20...  Training Step: 1812...  Training loss: 1.4048...  0.1630 sec/batch
Epoch: 10/20...  Training Step: 1813...  Training loss: 1.3824...  0.1590 sec/batch
Epoch: 10/20...  Training Step: 1814...  Training loss: 1.3758...  0.1560 se

Epoch: 10/20...  Training Step: 1901...  Training loss: 1.3723...  0.1580 sec/batch
Epoch: 10/20...  Training Step: 1902...  Training loss: 1.3790...  0.1570 sec/batch
Epoch: 10/20...  Training Step: 1903...  Training loss: 1.3714...  0.1581 sec/batch
Epoch: 10/20...  Training Step: 1904...  Training loss: 1.3390...  0.1560 sec/batch
Epoch: 10/20...  Training Step: 1905...  Training loss: 1.3167...  0.1580 sec/batch
Epoch: 10/20...  Training Step: 1906...  Training loss: 1.3787...  0.1571 sec/batch
Epoch: 10/20...  Training Step: 1907...  Training loss: 1.3704...  0.1581 sec/batch
Epoch: 10/20...  Training Step: 1908...  Training loss: 1.3238...  0.1570 sec/batch
Epoch: 10/20...  Training Step: 1909...  Training loss: 1.3831...  0.1600 sec/batch
Epoch: 10/20...  Training Step: 1910...  Training loss: 1.3823...  0.1570 sec/batch
Epoch: 10/20...  Training Step: 1911...  Training loss: 1.3619...  0.1570 sec/batch
Epoch: 10/20...  Training Step: 1912...  Training loss: 1.3286...  0.1570 se

Epoch: 11/20...  Training Step: 1999...  Training loss: 1.3727...  0.1582 sec/batch
Epoch: 11/20...  Training Step: 2000...  Training loss: 1.3937...  0.1580 sec/batch
Epoch: 11/20...  Training Step: 2001...  Training loss: 1.3684...  0.1620 sec/batch
Epoch: 11/20...  Training Step: 2002...  Training loss: 1.3712...  0.1580 sec/batch
Epoch: 11/20...  Training Step: 2003...  Training loss: 1.3549...  0.1581 sec/batch
Epoch: 11/20...  Training Step: 2004...  Training loss: 1.3779...  0.1570 sec/batch
Epoch: 11/20...  Training Step: 2005...  Training loss: 1.3701...  0.1570 sec/batch
Epoch: 11/20...  Training Step: 2006...  Training loss: 1.3230...  0.1570 sec/batch
Epoch: 11/20...  Training Step: 2007...  Training loss: 1.3302...  0.1570 sec/batch
Epoch: 11/20...  Training Step: 2008...  Training loss: 1.3859...  0.1570 sec/batch
Epoch: 11/20...  Training Step: 2009...  Training loss: 1.3734...  0.1580 sec/batch
Epoch: 11/20...  Training Step: 2010...  Training loss: 1.3738...  0.1580 se

Epoch: 11/20...  Training Step: 2097...  Training loss: 1.3467...  0.1580 sec/batch
Epoch: 11/20...  Training Step: 2098...  Training loss: 1.3493...  0.1571 sec/batch
Epoch: 11/20...  Training Step: 2099...  Training loss: 1.3439...  0.1580 sec/batch
Epoch: 11/20...  Training Step: 2100...  Training loss: 1.3413...  0.1580 sec/batch
Epoch: 11/20...  Training Step: 2101...  Training loss: 1.3431...  0.1570 sec/batch
Epoch: 11/20...  Training Step: 2102...  Training loss: 1.3253...  0.1570 sec/batch
Epoch: 11/20...  Training Step: 2103...  Training loss: 1.3024...  0.1650 sec/batch
Epoch: 11/20...  Training Step: 2104...  Training loss: 1.3607...  0.1591 sec/batch
Epoch: 11/20...  Training Step: 2105...  Training loss: 1.3343...  0.1681 sec/batch
Epoch: 11/20...  Training Step: 2106...  Training loss: 1.3003...  0.1560 sec/batch
Epoch: 11/20...  Training Step: 2107...  Training loss: 1.3498...  0.1600 sec/batch
Epoch: 11/20...  Training Step: 2108...  Training loss: 1.3518...  0.1580 se

Epoch: 12/20...  Training Step: 2195...  Training loss: 1.3588...  0.1590 sec/batch
Epoch: 12/20...  Training Step: 2196...  Training loss: 1.3586...  0.1570 sec/batch
Epoch: 12/20...  Training Step: 2197...  Training loss: 1.3351...  0.1570 sec/batch
Epoch: 12/20...  Training Step: 2198...  Training loss: 1.3682...  0.1570 sec/batch
Epoch: 12/20...  Training Step: 2199...  Training loss: 1.3398...  0.1570 sec/batch
Epoch: 12/20...  Training Step: 2200...  Training loss: 1.3559...  0.1570 sec/batch
Epoch: 12/20...  Training Step: 2201...  Training loss: 1.3319...  0.1571 sec/batch
Epoch: 12/20...  Training Step: 2202...  Training loss: 1.3503...  0.1570 sec/batch
Epoch: 12/20...  Training Step: 2203...  Training loss: 1.3346...  0.1580 sec/batch
Epoch: 12/20...  Training Step: 2204...  Training loss: 1.2904...  0.1580 sec/batch
Epoch: 12/20...  Training Step: 2205...  Training loss: 1.3073...  0.1600 sec/batch
Epoch: 12/20...  Training Step: 2206...  Training loss: 1.3566...  0.1570 se

Epoch: 12/20...  Training Step: 2293...  Training loss: 1.2980...  0.1570 sec/batch
Epoch: 12/20...  Training Step: 2294...  Training loss: 1.2879...  0.1570 sec/batch
Epoch: 12/20...  Training Step: 2295...  Training loss: 1.3266...  0.1590 sec/batch
Epoch: 12/20...  Training Step: 2296...  Training loss: 1.3289...  0.1600 sec/batch
Epoch: 12/20...  Training Step: 2297...  Training loss: 1.3329...  0.1580 sec/batch
Epoch: 12/20...  Training Step: 2298...  Training loss: 1.3274...  0.1570 sec/batch
Epoch: 12/20...  Training Step: 2299...  Training loss: 1.3351...  0.1570 sec/batch
Epoch: 12/20...  Training Step: 2300...  Training loss: 1.2872...  0.1570 sec/batch
Epoch: 12/20...  Training Step: 2301...  Training loss: 1.2769...  0.1598 sec/batch
Epoch: 12/20...  Training Step: 2302...  Training loss: 1.3349...  0.1576 sec/batch
Epoch: 12/20...  Training Step: 2303...  Training loss: 1.3203...  0.1580 sec/batch
Epoch: 12/20...  Training Step: 2304...  Training loss: 1.2764...  0.1580 se

Epoch: 13/20...  Training Step: 2391...  Training loss: 1.2953...  0.1623 sec/batch
Epoch: 13/20...  Training Step: 2392...  Training loss: 1.2906...  0.1601 sec/batch
Epoch: 13/20...  Training Step: 2393...  Training loss: 1.3346...  0.1572 sec/batch
Epoch: 13/20...  Training Step: 2394...  Training loss: 1.3394...  0.1581 sec/batch
Epoch: 13/20...  Training Step: 2395...  Training loss: 1.3108...  0.1590 sec/batch
Epoch: 13/20...  Training Step: 2396...  Training loss: 1.3446...  0.1580 sec/batch
Epoch: 13/20...  Training Step: 2397...  Training loss: 1.3042...  0.1560 sec/batch
Epoch: 13/20...  Training Step: 2398...  Training loss: 1.3345...  0.1571 sec/batch
Epoch: 13/20...  Training Step: 2399...  Training loss: 1.3053...  0.1600 sec/batch
Epoch: 13/20...  Training Step: 2400...  Training loss: 1.3312...  0.1590 sec/batch
Epoch: 13/20...  Training Step: 2401...  Training loss: 1.3176...  0.1640 sec/batch
Epoch: 13/20...  Training Step: 2402...  Training loss: 1.2732...  0.1570 se

Epoch: 13/20...  Training Step: 2489...  Training loss: 1.3088...  0.1572 sec/batch
Epoch: 13/20...  Training Step: 2490...  Training loss: 1.2964...  0.1570 sec/batch
Epoch: 13/20...  Training Step: 2491...  Training loss: 1.2870...  0.1580 sec/batch
Epoch: 13/20...  Training Step: 2492...  Training loss: 1.2689...  0.1580 sec/batch
Epoch: 13/20...  Training Step: 2493...  Training loss: 1.3139...  0.1580 sec/batch
Epoch: 13/20...  Training Step: 2494...  Training loss: 1.3033...  0.1570 sec/batch
Epoch: 13/20...  Training Step: 2495...  Training loss: 1.3028...  0.1590 sec/batch
Epoch: 13/20...  Training Step: 2496...  Training loss: 1.3003...  0.1560 sec/batch
Epoch: 13/20...  Training Step: 2497...  Training loss: 1.3003...  0.1570 sec/batch
Epoch: 13/20...  Training Step: 2498...  Training loss: 1.2749...  0.1581 sec/batch
Epoch: 13/20...  Training Step: 2499...  Training loss: 1.2562...  0.1581 sec/batch
Epoch: 13/20...  Training Step: 2500...  Training loss: 1.3031...  0.1640 se

Epoch: 14/20...  Training Step: 2587...  Training loss: 1.3124...  0.1580 sec/batch
Epoch: 14/20...  Training Step: 2588...  Training loss: 1.3064...  0.1580 sec/batch
Epoch: 14/20...  Training Step: 2589...  Training loss: 1.2853...  0.1590 sec/batch
Epoch: 14/20...  Training Step: 2590...  Training loss: 1.2670...  0.1580 sec/batch
Epoch: 14/20...  Training Step: 2591...  Training loss: 1.3138...  0.1640 sec/batch
Epoch: 14/20...  Training Step: 2592...  Training loss: 1.3243...  0.1620 sec/batch
Epoch: 14/20...  Training Step: 2593...  Training loss: 1.2972...  0.1580 sec/batch
Epoch: 14/20...  Training Step: 2594...  Training loss: 1.3132...  0.1580 sec/batch
Epoch: 14/20...  Training Step: 2595...  Training loss: 1.2892...  0.1640 sec/batch
Epoch: 14/20...  Training Step: 2596...  Training loss: 1.3195...  0.1570 sec/batch
Epoch: 14/20...  Training Step: 2597...  Training loss: 1.2937...  0.1580 sec/batch
Epoch: 14/20...  Training Step: 2598...  Training loss: 1.3202...  0.1630 se

Epoch: 14/20...  Training Step: 2685...  Training loss: 1.2840...  0.1570 sec/batch
Epoch: 14/20...  Training Step: 2686...  Training loss: 1.2876...  0.1570 sec/batch
Epoch: 14/20...  Training Step: 2687...  Training loss: 1.2846...  0.1570 sec/batch
Epoch: 14/20...  Training Step: 2688...  Training loss: 1.2812...  0.1561 sec/batch
Epoch: 14/20...  Training Step: 2689...  Training loss: 1.2698...  0.1570 sec/batch
Epoch: 14/20...  Training Step: 2690...  Training loss: 1.2559...  0.1590 sec/batch
Epoch: 14/20...  Training Step: 2691...  Training loss: 1.2920...  0.1580 sec/batch
Epoch: 14/20...  Training Step: 2692...  Training loss: 1.2910...  0.1571 sec/batch
Epoch: 14/20...  Training Step: 2693...  Training loss: 1.2861...  0.1570 sec/batch
Epoch: 14/20...  Training Step: 2694...  Training loss: 1.2838...  0.1571 sec/batch
Epoch: 14/20...  Training Step: 2695...  Training loss: 1.2968...  0.1570 sec/batch
Epoch: 14/20...  Training Step: 2696...  Training loss: 1.2629...  0.1580 se

Epoch: 15/20...  Training Step: 2783...  Training loss: 1.2785...  0.1580 sec/batch
Epoch: 15/20...  Training Step: 2784...  Training loss: 1.2855...  0.1580 sec/batch
Epoch: 15/20...  Training Step: 2785...  Training loss: 1.2788...  0.1580 sec/batch
Epoch: 15/20...  Training Step: 2786...  Training loss: 1.3017...  0.1570 sec/batch
Epoch: 15/20...  Training Step: 2787...  Training loss: 1.2700...  0.1570 sec/batch
Epoch: 15/20...  Training Step: 2788...  Training loss: 1.2600...  0.1570 sec/batch
Epoch: 15/20...  Training Step: 2789...  Training loss: 1.3010...  0.1580 sec/batch
Epoch: 15/20...  Training Step: 2790...  Training loss: 1.2984...  0.1570 sec/batch
Epoch: 15/20...  Training Step: 2791...  Training loss: 1.2865...  0.1580 sec/batch
Epoch: 15/20...  Training Step: 2792...  Training loss: 1.3058...  0.1570 sec/batch
Epoch: 15/20...  Training Step: 2793...  Training loss: 1.2895...  0.1580 sec/batch
Epoch: 15/20...  Training Step: 2794...  Training loss: 1.2990...  0.1570 se

Epoch: 15/20...  Training Step: 2881...  Training loss: 1.2585...  0.1580 sec/batch
Epoch: 15/20...  Training Step: 2882...  Training loss: 1.2898...  0.1560 sec/batch
Epoch: 15/20...  Training Step: 2883...  Training loss: 1.2651...  0.1730 sec/batch
Epoch: 15/20...  Training Step: 2884...  Training loss: 1.2732...  0.1570 sec/batch
Epoch: 15/20...  Training Step: 2885...  Training loss: 1.2749...  0.1581 sec/batch
Epoch: 15/20...  Training Step: 2886...  Training loss: 1.2688...  0.1580 sec/batch
Epoch: 15/20...  Training Step: 2887...  Training loss: 1.2566...  0.1570 sec/batch
Epoch: 15/20...  Training Step: 2888...  Training loss: 1.2283...  0.1570 sec/batch
Epoch: 15/20...  Training Step: 2889...  Training loss: 1.2796...  0.1580 sec/batch
Epoch: 15/20...  Training Step: 2890...  Training loss: 1.2780...  0.1590 sec/batch
Epoch: 15/20...  Training Step: 2891...  Training loss: 1.2765...  0.1580 sec/batch
Epoch: 15/20...  Training Step: 2892...  Training loss: 1.2745...  0.1570 se

Epoch: 16/20...  Training Step: 2979...  Training loss: 1.2705...  0.1580 sec/batch
Epoch: 16/20...  Training Step: 2980...  Training loss: 1.2675...  0.1570 sec/batch
Epoch: 16/20...  Training Step: 2981...  Training loss: 1.2581...  0.1570 sec/batch
Epoch: 16/20...  Training Step: 2982...  Training loss: 1.2633...  0.1560 sec/batch
Epoch: 16/20...  Training Step: 2983...  Training loss: 1.2677...  0.1580 sec/batch
Epoch: 16/20...  Training Step: 2984...  Training loss: 1.2822...  0.1570 sec/batch
Epoch: 16/20...  Training Step: 2985...  Training loss: 1.2533...  0.1570 sec/batch
Epoch: 16/20...  Training Step: 2986...  Training loss: 1.2489...  0.1570 sec/batch
Epoch: 16/20...  Training Step: 2987...  Training loss: 1.2816...  0.1600 sec/batch
Epoch: 16/20...  Training Step: 2988...  Training loss: 1.2836...  0.1580 sec/batch
Epoch: 16/20...  Training Step: 2989...  Training loss: 1.2666...  0.1582 sec/batch
Epoch: 16/20...  Training Step: 2990...  Training loss: 1.2868...  0.1571 se

Epoch: 16/20...  Training Step: 3077...  Training loss: 1.2590...  0.1570 sec/batch
Epoch: 16/20...  Training Step: 3078...  Training loss: 1.2657...  0.1570 sec/batch
Epoch: 16/20...  Training Step: 3079...  Training loss: 1.2451...  0.1580 sec/batch
Epoch: 16/20...  Training Step: 3080...  Training loss: 1.2681...  0.1570 sec/batch
Epoch: 16/20...  Training Step: 3081...  Training loss: 1.2482...  0.1570 sec/batch
Epoch: 16/20...  Training Step: 3082...  Training loss: 1.2662...  0.1570 sec/batch
Epoch: 16/20...  Training Step: 3083...  Training loss: 1.2590...  0.1600 sec/batch
Epoch: 16/20...  Training Step: 3084...  Training loss: 1.2448...  0.1571 sec/batch
Epoch: 16/20...  Training Step: 3085...  Training loss: 1.2413...  0.1580 sec/batch
Epoch: 16/20...  Training Step: 3086...  Training loss: 1.2198...  0.1570 sec/batch
Epoch: 16/20...  Training Step: 3087...  Training loss: 1.2643...  0.1571 sec/batch
Epoch: 16/20...  Training Step: 3088...  Training loss: 1.2600...  0.1580 se

Epoch: 17/20...  Training Step: 3175...  Training loss: 1.2560...  0.1570 sec/batch
Epoch: 17/20...  Training Step: 3176...  Training loss: 1.2470...  0.1570 sec/batch
Epoch: 17/20...  Training Step: 3177...  Training loss: 1.2557...  0.1580 sec/batch
Epoch: 17/20...  Training Step: 3178...  Training loss: 1.2443...  0.1580 sec/batch
Epoch: 17/20...  Training Step: 3179...  Training loss: 1.2435...  0.1570 sec/batch
Epoch: 17/20...  Training Step: 3180...  Training loss: 1.2546...  0.1570 sec/batch
Epoch: 17/20...  Training Step: 3181...  Training loss: 1.2576...  0.1580 sec/batch
Epoch: 17/20...  Training Step: 3182...  Training loss: 1.2653...  0.1590 sec/batch
Epoch: 17/20...  Training Step: 3183...  Training loss: 1.2447...  0.1580 sec/batch
Epoch: 17/20...  Training Step: 3184...  Training loss: 1.2296...  0.1580 sec/batch
Epoch: 17/20...  Training Step: 3185...  Training loss: 1.2695...  0.1580 sec/batch
Epoch: 17/20...  Training Step: 3186...  Training loss: 1.2822...  0.1580 se

Epoch: 17/20...  Training Step: 3273...  Training loss: 1.2498...  0.1580 sec/batch
Epoch: 17/20...  Training Step: 3274...  Training loss: 1.2396...  0.1600 sec/batch
Epoch: 17/20...  Training Step: 3275...  Training loss: 1.2456...  0.1581 sec/batch
Epoch: 17/20...  Training Step: 3276...  Training loss: 1.2625...  0.1580 sec/batch
Epoch: 17/20...  Training Step: 3277...  Training loss: 1.2314...  0.1620 sec/batch
Epoch: 17/20...  Training Step: 3278...  Training loss: 1.2590...  0.1570 sec/batch
Epoch: 17/20...  Training Step: 3279...  Training loss: 1.2351...  0.1570 sec/batch
Epoch: 17/20...  Training Step: 3280...  Training loss: 1.2532...  0.1560 sec/batch
Epoch: 17/20...  Training Step: 3281...  Training loss: 1.2466...  0.1570 sec/batch
Epoch: 17/20...  Training Step: 3282...  Training loss: 1.2410...  0.1560 sec/batch
Epoch: 17/20...  Training Step: 3283...  Training loss: 1.2241...  0.1580 sec/batch
Epoch: 17/20...  Training Step: 3284...  Training loss: 1.2097...  0.1590 se

Epoch: 18/20...  Training Step: 3371...  Training loss: 1.2304...  0.1580 sec/batch
Epoch: 18/20...  Training Step: 3372...  Training loss: 1.2062...  0.1590 sec/batch
Epoch: 18/20...  Training Step: 3373...  Training loss: 1.2460...  0.1580 sec/batch
Epoch: 18/20...  Training Step: 3374...  Training loss: 1.2360...  0.1570 sec/batch
Epoch: 18/20...  Training Step: 3375...  Training loss: 1.2525...  0.1570 sec/batch
Epoch: 18/20...  Training Step: 3376...  Training loss: 1.2411...  0.1570 sec/batch
Epoch: 18/20...  Training Step: 3377...  Training loss: 1.2349...  0.1580 sec/batch
Epoch: 18/20...  Training Step: 3378...  Training loss: 1.2423...  0.1600 sec/batch
Epoch: 18/20...  Training Step: 3379...  Training loss: 1.2519...  0.1571 sec/batch
Epoch: 18/20...  Training Step: 3380...  Training loss: 1.2545...  0.1570 sec/batch
Epoch: 18/20...  Training Step: 3381...  Training loss: 1.2242...  0.1570 sec/batch
Epoch: 18/20...  Training Step: 3382...  Training loss: 1.2142...  0.1570 se

Epoch: 18/20...  Training Step: 3469...  Training loss: 1.2457...  0.1580 sec/batch
Epoch: 18/20...  Training Step: 3470...  Training loss: 1.2273...  0.1570 sec/batch
Epoch: 18/20...  Training Step: 3471...  Training loss: 1.2171...  0.1600 sec/batch
Epoch: 18/20...  Training Step: 3472...  Training loss: 1.2299...  0.1570 sec/batch
Epoch: 18/20...  Training Step: 3473...  Training loss: 1.2426...  0.1570 sec/batch
Epoch: 18/20...  Training Step: 3474...  Training loss: 1.2479...  0.1560 sec/batch
Epoch: 18/20...  Training Step: 3475...  Training loss: 1.2240...  0.1570 sec/batch
Epoch: 18/20...  Training Step: 3476...  Training loss: 1.2491...  0.1580 sec/batch
Epoch: 18/20...  Training Step: 3477...  Training loss: 1.2249...  0.1580 sec/batch
Epoch: 18/20...  Training Step: 3478...  Training loss: 1.2422...  0.1560 sec/batch
Epoch: 18/20...  Training Step: 3479...  Training loss: 1.2417...  0.1570 sec/batch
Epoch: 18/20...  Training Step: 3480...  Training loss: 1.2313...  0.1580 se

Epoch: 19/20...  Training Step: 3567...  Training loss: 1.2422...  0.1580 sec/batch
Epoch: 19/20...  Training Step: 3568...  Training loss: 1.2590...  0.1570 sec/batch
Epoch: 19/20...  Training Step: 3569...  Training loss: 1.2189...  0.1570 sec/batch
Epoch: 19/20...  Training Step: 3570...  Training loss: 1.1920...  0.1600 sec/batch
Epoch: 19/20...  Training Step: 3571...  Training loss: 1.2402...  0.1590 sec/batch
Epoch: 19/20...  Training Step: 3572...  Training loss: 1.2388...  0.1560 sec/batch
Epoch: 19/20...  Training Step: 3573...  Training loss: 1.2477...  0.1570 sec/batch
Epoch: 19/20...  Training Step: 3574...  Training loss: 1.2242...  0.1590 sec/batch
Epoch: 19/20...  Training Step: 3575...  Training loss: 1.2249...  0.1570 sec/batch
Epoch: 19/20...  Training Step: 3576...  Training loss: 1.2312...  0.1570 sec/batch
Epoch: 19/20...  Training Step: 3577...  Training loss: 1.2316...  0.1570 sec/batch
Epoch: 19/20...  Training Step: 3578...  Training loss: 1.2426...  0.1560 se

Epoch: 19/20...  Training Step: 3665...  Training loss: 1.2352...  0.1570 sec/batch
Epoch: 19/20...  Training Step: 3666...  Training loss: 1.2148...  0.1581 sec/batch
Epoch: 19/20...  Training Step: 3667...  Training loss: 1.2305...  0.1582 sec/batch
Epoch: 19/20...  Training Step: 3668...  Training loss: 1.2258...  0.1580 sec/batch
Epoch: 19/20...  Training Step: 3669...  Training loss: 1.2233...  0.1580 sec/batch
Epoch: 19/20...  Training Step: 3670...  Training loss: 1.2247...  0.1591 sec/batch
Epoch: 19/20...  Training Step: 3671...  Training loss: 1.2380...  0.1570 sec/batch
Epoch: 19/20...  Training Step: 3672...  Training loss: 1.2329...  0.1570 sec/batch
Epoch: 19/20...  Training Step: 3673...  Training loss: 1.2142...  0.1570 sec/batch
Epoch: 19/20...  Training Step: 3674...  Training loss: 1.2399...  0.1570 sec/batch
Epoch: 19/20...  Training Step: 3675...  Training loss: 1.2114...  0.1570 sec/batch
Epoch: 19/20...  Training Step: 3676...  Training loss: 1.2334...  0.1580 se

Epoch: 20/20...  Training Step: 3763...  Training loss: 1.3273...  0.1570 sec/batch
Epoch: 20/20...  Training Step: 3764...  Training loss: 1.2350...  0.1580 sec/batch
Epoch: 20/20...  Training Step: 3765...  Training loss: 1.2261...  0.1600 sec/batch
Epoch: 20/20...  Training Step: 3766...  Training loss: 1.2511...  0.1580 sec/batch
Epoch: 20/20...  Training Step: 3767...  Training loss: 1.2025...  0.1590 sec/batch
Epoch: 20/20...  Training Step: 3768...  Training loss: 1.1880...  0.1570 sec/batch
Epoch: 20/20...  Training Step: 3769...  Training loss: 1.2173...  0.1570 sec/batch
Epoch: 20/20...  Training Step: 3770...  Training loss: 1.2241...  0.1560 sec/batch
Epoch: 20/20...  Training Step: 3771...  Training loss: 1.2358...  0.1570 sec/batch
Epoch: 20/20...  Training Step: 3772...  Training loss: 1.2139...  0.1580 sec/batch
Epoch: 20/20...  Training Step: 3773...  Training loss: 1.2155...  0.1580 sec/batch
Epoch: 20/20...  Training Step: 3774...  Training loss: 1.2207...  0.1580 se

Epoch: 20/20...  Training Step: 3861...  Training loss: 1.1962...  0.1580 sec/batch
Epoch: 20/20...  Training Step: 3862...  Training loss: 1.1987...  0.1570 sec/batch
Epoch: 20/20...  Training Step: 3863...  Training loss: 1.2125...  0.1590 sec/batch
Epoch: 20/20...  Training Step: 3864...  Training loss: 1.2006...  0.1570 sec/batch
Epoch: 20/20...  Training Step: 3865...  Training loss: 1.2268...  0.1600 sec/batch
Epoch: 20/20...  Training Step: 3866...  Training loss: 1.2089...  0.1581 sec/batch
Epoch: 20/20...  Training Step: 3867...  Training loss: 1.2106...  0.1580 sec/batch
Epoch: 20/20...  Training Step: 3868...  Training loss: 1.2020...  0.1600 sec/batch
Epoch: 20/20...  Training Step: 3869...  Training loss: 1.2131...  0.1580 sec/batch
Epoch: 20/20...  Training Step: 3870...  Training loss: 1.2143...  0.1570 sec/batch
Epoch: 20/20...  Training Step: 3871...  Training loss: 1.2052...  0.1570 sec/batch
Epoch: 20/20...  Training Step: 3872...  Training loss: 1.2229...  0.1570 se

Epoch: 20/20...  Training Step: 3959...  Training loss: 1.2094...  0.1560 sec/batch
Epoch: 20/20...  Training Step: 3960...  Training loss: 1.2048...  0.1570 sec/batch


## testing

In [10]:
def pick_top_n(preds, vocab_size, top_n=5):
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size, 1, p=p)[0]
    return c

In [11]:
def sample(checkpoint, n_samples, lstm_size, vocab_size, prime="The "):
    samples = [c for c in prime]
    model = CharRNN(len(vocab), lstm_size=lstm_size, sampling=True)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        new_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1, 1))
            x[0,0] = vocab_to_int[c]
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

        c = pick_top_n(preds, len(vocab))
        samples.append(int_to_vocab[c])

        for i in range(n_samples):
            x[0,0] = c
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

            c = pick_top_n(preds, len(vocab))
            samples.append(int_to_vocab[c])
        
    return ''.join(samples)

In [12]:
tf.train.latest_checkpoint('checkpoints')
checkpoint = tf.train.latest_checkpoint('checkpoints')
samp = sample(checkpoint, 2000, lstm_size, len(vocab), prime="Far")
print(samp)

INFO:tensorflow:Restoring parameters from checkpoints\i3960_l512.ckpt
Farther.

"Anna will the work in the wagater and he pursuishly. It sought it, if
they've not seen it out."

"Oh, then, inded that," he siin silent.

"Why, these death! Well, that, it's only in the carrying it in the stail,"
 the position, so shouting herself, and was simpining tomernow and agond
torance.

"I should not be often anyone. A mind of a tron, I chanced howe it all in
aristoca!" said Anna, stating his bitter with which. "I teing this woman
is anxieticed in tomorrow is how they'll greater the sights, and I can
not live to me, I could not to go out, I'll consincurate them what it
here about them that I am," he said to Anna with in smill, said that she
would nut he said when he coule obviously at him, and askidiously apparant
from the ball to see he how all this teress it was not lef in her
same, and he did not, becaue Anna, when a good might said nothing, andwhere
was an old man, and that in the most secondre