In [1]:
from __future__ import print_function
import os

import numpy as np
import zipfile
import tarfile
from six.moves.urllib.request import urlretrieve
import shutil 
import random

import string
import tensorflow as tf

# Dirs - must be absolute paths!
LOG_DIR = '/tmp/tf/ptb_char_lstm2/hidden32_batch1_seq5/'
# Local dir where PTB files will be stored.
PTB_DIR = '/home/tkornuta/data/ptb/'

# Filenames.
TRAIN = "ptb.train.txt"
VALID = "ptb.valid.txt"
TEST = "ptb.test.txt"

# Size of the hidden state.
HIDDEN_SIZE = 32

# Batch size.
BATCH_SIZE = 1

# A single recurrent layer of number of units = sequences of length
# e.g. 200 bytes
SEQ_LENGTH = 5


### Check/maybe download PTB.

In [2]:
def maybe_download_ptb(path, 
                       filename='simple-examples.tgz', 
                       url='http://www.fit.vutbr.cz/~imikolov/rnnlm/', 
                       expected_bytes =34869662):
  # Eventually create the PTB dir.
  if not tf.gfile.Exists(path):
    tf.gfile.MakeDirs(path)
  """Download a file if not present, and make sure it's the right size."""
  _filename = path+filename
  if not os.path.exists(_filename):
    print('Downloading %s...' % filename)
    _filename, _ = urlretrieve(url+filename, _filename)
  statinfo = os.stat(_filename)
  if statinfo.st_size == expected_bytes:
    print('Found and verified', (_filename), '(', statinfo.st_size, ')')
  else:
    print(statinfo.st_size)
    raise Exception(
      'Failed to verify ' + _filename + '. Can you get to it with a browser?')
  return filename

filename = maybe_download_ptb(PTB_DIR)

Found and verified /home/tkornuta/data/ptb/simple-examples.tgz ( 34869662 )


### Extract dataset-related files from the PTB archive.

In [3]:
def extract_ptb(path, filename='simple-examples.tgz', files=["ptb.train.txt", "ptb.valid.txt", "ptb.test.txt", 
                                       "ptb.char.train.txt", "ptb.char.valid.txt", "ptb.char.test.txt"]):
    """Extracts files from PTB archive."""
    # Extract
    tar = tarfile.open(path+filename)
    tar.extractall(path)
    tar.close()
    # Copy files
    for file in files:
        shutil.copyfile(PTB_DIR+"simple-examples/data/"+file, PTB_DIR+file)
    # Delete directory
    shutil.rmtree(PTB_DIR+"simple-examples/")        

extract_ptb(PTB_DIR)

### Load train, valid and test texts.

In [4]:
def read_data(filename, path):
    with open(path+filename, 'r') as myfile:
        data=myfile.read()# .replace('\n', '')
        return data

train_text = read_data(TRAIN, PTB_DIR)
train_size=len(train_text)
print(train_size, train_text[:100])

valid_text = read_data(VALID, PTB_DIR)
valid_size=len(valid_text)
print(valid_size, valid_text[:64])

test_text = read_data(TEST, PTB_DIR)
test_size=len(test_text)
print(test_size, test_text[:64])

5101618  aer banknote berlitz calloway centrust cluett fromstein gitano guterman hydro-quebec ipo kia memote
399782  consumers may want to move their telephones a little closer to 
449945  no it was n't black monday 
 but while the new york stock excha


### Utility functions to map characters to vocabulary IDs and back.

In [5]:
vocabulary_size = 59 # [A-Z] + [a-z] + ' ' +few 'in between; + punctuation
first_letter = ord(string.ascii_uppercase[0]) # ascii_uppercase before lowercase! 
print("vocabulary size = ", vocabulary_size)
print(first_letter)

def char2id(char):
  """ Converts char to id (int) with one-hot encoding handling of unexpected characters"""
  if char in string.ascii_letters:# or char in string.punctuation or char in string.digits:
    return ord(char) - first_letter + 1
  elif char == ' ':
    return 0
  else:
    # print('Unexpected character: %s' % char)
    return 0
  
def id2char(dictid):
  """ Converts single id (int) to character"""
  if dictid > 0:
    return chr(dictid + first_letter - 1)
  else:
    return ' '

def characters(probabilities):
  """Turn a 1-hot encoding or a probability distribution over the possible
  characters back into its (most likely) character representation."""
  return [id2char(c) for c in np.argmax(probabilities, 1)]

def batches2string(batches):
  """Convert a sequence of batches back into their (most likely) string
  representation."""
  s = [''] * batches[0].shape[0]
  for b in batches:
    s = [''.join(x) for x in zip(s, characters(b))]
  return s

#print(len(string.punctuation))
#for i in string.ascii_letters:
#    print (i, char2id(i))


print(char2id('a'), char2id('A'), char2id('z'), char2id('Z'), char2id(' '), char2id('ï'))
print(id2char(char2id('a')), id2char(char2id('A')))
#print(id2char(65), id2char(33), id2char(90), id2char(58), id2char(0))
#bankno
sample = np.zeros(shape=(1, vocabulary_size), dtype=np.float)
sample[0, char2id(' ')] = 1.0
print(sample)

vocabulary size =  59
65
33 1 58 26 0 0
a A
[[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.]]


### Helper class for batch generation

In [6]:
class BatchGenerator(object):
  def __init__(self, text, batch_size, seq_length, vocab_size):
    """
    Initializes the batch generator object. Stores the variables and first "letter batch".
    text is text to be processed
    batch_size is size of batch (number of samples)
    seq_length represents the length of sequence
    vocab_size is number of words in vocabulary (assumes one-hot encoding)
    """
    # Store input parameters.
    self._text = text
    self._text_size = len(text)
    self._batch_size = batch_size
    self._seq_length = seq_length
    self._vocab_size = vocab_size
    # Divide text into segments depending on number of batches, each segment determines a cursor position for a batch.
    segment = self._text_size // batch_size
    # Set initial cursor position.
    self._cursor = [ offset * segment for offset in range(batch_size)]
    # Store first "letter batch".
    self._last_letter_batch = self._next_letter_batch()
  
  def _next_letter_batch(self):
    """
    Returns a batch containing of encoded single letters depending on the current batch 
    cursor positions in the data.
    Returned "letter batch" is of size batch_size x vocab_size
    """
    letter_batch = np.zeros(shape=(self._batch_size, self._vocab_size), dtype=np.float)
    # Iterate through "samples"
    for b in range(self._batch_size):
      # Set 1 in position pointed out by one-hot char encoding.
      letter_batch[b, char2id(self._text[self._cursor[b]])] = 1.0
      self._cursor[b] = (self._cursor[b] + 1) % self._text_size
    return letter_batch
  
  def next(self):
    """Generate the next array of batches from the data. The array consists of
    the last batch of the previous array, followed by num_unrollings new ones.
    """
    # First add last letter from previous batch (the "additional one").
    batches = [self._last_letter_batch]
    for step in range(self._seq_length):
      batches.append(self._next_letter_batch())
    # Store last "letter batch" for next batch.
    self._last_letter_batch = batches[-1]
    return batches


In [7]:
# Trick - override first 10 chars
#list1 = list(train_text)
#for i in range(2):
#    list1[i] = 'z'
#train_text = ''.join(list1)
#print("Train set =", train_text[0:100])

# Create objects for training, validation and testing batch generation.
train_batches = BatchGenerator(train_text, BATCH_SIZE, SEQ_LENGTH, vocabulary_size)

# Get first training batch.
batch = train_batches.next()
print(len(batch))
print(batch[0].shape)
#print("Batch = ", batch)
#print(batches2string(batch))
#print("batch len = num of enrollings",len(batch))
#for i in range(num_unrollings):
#    print("i = ", i, "letter=", batches2string(batch)[0][i][0], "bits = ", batch[i][0])


# For validation  - process the whole text as one big batch.
VALID_BATCH_SIZE = int(np.floor(valid_size/SEQ_LENGTH))
valid_batches = BatchGenerator(valid_text, VALID_BATCH_SIZE, SEQ_LENGTH, vocabulary_size)
valid_batch = valid_batches.next()
#print (VALID_BATCH_SIZE)
#print(len(valid_batch))
#print(valid_batch[0].shape)

# For texting  - process the whole text as one big batch.
TEST_BATCH_SIZE = int(np.floor(test_size/SEQ_LENGTH))
test_batches = BatchGenerator(test_text, TEST_BATCH_SIZE, SEQ_LENGTH, vocabulary_size)
# Get single batch! 
test_batch = test_batches.next()


6
(1, 59)


### Helper function defining the LSTM cell

In [8]:
  # Definition of the cell computation.
  def lstm_cell(input_, prev_output_, prev_cell_state_, name_):
    """Create a LSTM cell"""
    with tf.name_scope(name_):
        # Equations according to C. Olah blog.
        
        # Concatenate h_prev ("prev output") with x.
        h_prev_x = tf.concat([prev_output_, input_], 1)
        
        # Calculate forget and input gates activations.
        forget_gate = tf.sigmoid(tf.matmul(h_prev_x, Wf) + bf, name="forget_gate")
        input_gate = tf.sigmoid(tf.matmul(h_prev_x, Wi) + bi, name="Input_gate")

        # Update of the cell state C~.
        cell_update = tf.tanh(tf.matmul(h_prev_x, Wc) + bc, name="Cell_update")
        # New cell state C.
        cell_state = tf.add(forget_gate * prev_cell_state_, input_gate * cell_update, name = "Cell_state")
        
        # Calculate output gate.
        output_gate = tf.sigmoid(tf.matmul(h_prev_x, Wo) + bo, name="Output_gate")
        # Calculate h - "output".
        output = output_gate * tf.tanh(cell_state)

        return output, cell_state


###  Definition of tensor graph

In [9]:
# Reset graph - just in case.
tf.reset_default_graph()

# 0. Shared variables ops.
with tf.name_scope("Shared_Variables"):
  # Define parameters:
  # Input gate: input, previous output, and bias.
  Wf = tf.Variable(tf.truncated_normal([vocabulary_size+HIDDEN_SIZE, HIDDEN_SIZE], -0.1, 0.1), name="Wf")
  bf = tf.Variable(tf.zeros([1, HIDDEN_SIZE]), name="bf")

  # Forget gate: input, previous output, and bias.
  Wi = tf.Variable(tf.truncated_normal([vocabulary_size+HIDDEN_SIZE, HIDDEN_SIZE], -0.1, 0.1), name="Wi")
  bi = tf.Variable(tf.zeros([1, HIDDEN_SIZE]), name="bi")

  # Memory cell: input, state and bias.                             
  Wc = tf.Variable(tf.truncated_normal([vocabulary_size+HIDDEN_SIZE, HIDDEN_SIZE], -0.1, 0.1), name="Wc")
  bc = tf.Variable(tf.zeros([1, HIDDEN_SIZE]), name="bc")

  # Output gate: input, previous output, and bias.
  Wo = tf.Variable(tf.truncated_normal([vocabulary_size+HIDDEN_SIZE, HIDDEN_SIZE], -0.1, 0.1), name="Wo")
  bo = tf.Variable(tf.zeros([1, HIDDEN_SIZE]), name="bo")

  # Classifier weights and biases.
  w = tf.Variable(tf.truncated_normal([HIDDEN_SIZE, vocabulary_size], -0.1, 0.1), name="w")
  b = tf.Variable(tf.zeros([vocabulary_size]), name="b")

  # Placeholders for previous (the oldest) state and output.
  prev_output = tf.placeholder(tf.float32, shape=None, name="prev_output")
  prev_state = tf.placeholder(tf.float32, shape=None, name="prev_state")

# 0. Placeholders for inputs.
with tf.name_scope("Input_data"):
  # Define input data buffers.
  input_buffer = list()
  for _ in range(SEQ_LENGTH + 1):
    # Collect placeholders for inputs/labels.
    input_buffer.append(tf.placeholder(tf.float32, shape=None, name="Input_data"))
  print ("input_buffer shape =", input_buffer[0].shape)
  # Collection of training inputs.
  train_inputs = input_buffer[:SEQ_LENGTH]
  # Labels are pointing to the same placeholders!
  # Labels are inputs shifted by one time step.
  train_labels = input_buffer[1:]  
  print ("Seq length  =", len(train_inputs))
  print ("Batch shape =", train_inputs[0].shape)
  # Concatenate targets into 2D tensor.
  targets = tf.concat(train_labels, 0)

 # 2. Training LSTM ops.
with tf.name_scope("LSTM"):
  # Unrolled LSTM loop.
  # Build outpus of size SEQ_LENGTH.
  outputs = list()
  output = prev_output
  state = prev_state
  for i in train_inputs:
    output, state = lstm_cell(i, output, state, "cell")
    outputs.append(output)
  print (len(outputs))
  print (outputs[0].shape)
  print (tf.concat(outputs, 0).shape)

# Fully connected layer on top => classification.
# In fact we will create lots of FC layers (one for each output layer), with shared weights.
logits = tf.nn.xw_plus_b(tf.concat(outputs, 0), w, b, name = "Final_FC")

# 2. Loss ops.
with tf.name_scope("Loss"):
    # Loss function(s) - one for every output generated by every lstm cell.
    loss = tf.reduce_mean(
      tf.nn.softmax_cross_entropy_with_logits(
        labels=targets, logits=logits))
    # Add loss summary.
    loss_summary = tf.summary.scalar("loss", loss)

# 3. Training ops.  
with tf.name_scope("Optimization"):
    # Learning rate decay.
    global_step = tf.Variable(0)
    learning_rate = tf.train.exponential_decay(0.1, global_step, 5000, 0.9, staircase=True)
    # Optimizer.
    optimizer = tf.train.AdamOptimizer(learning_rate)
    gradients, v = zip(*optimizer.compute_gradients(loss))
    # Gradient clipping.
    gradients, _ = tf.clip_by_global_norm(gradients, 1.25)
    optimizer = optimizer.apply_gradients(zip(gradients, v), global_step=global_step)

# 4. Predictions ops.  
with tf.name_scope("Evaluation") as scope:
  # Predictions.
  train_prediction = tf.nn.softmax(logits)


input_buffer shape = <unknown>
Seq length  = 5
Batch shape = <unknown>
5
<unknown>
<unknown>


### Subgraph responsible for generation of sample texts, char by char.

In [10]:
with tf.name_scope("Sample_generation") as scope:
  # Create graphs for sampling and validation evaluation: batch 1, "no unrolling".
  sample_input = tf.placeholder(tf.float32, shape=[1, vocabulary_size], name="Input_data")
  saved_sample_output = tf.Variable(tf.zeros([1, HIDDEN_SIZE]), name="Output_data")
  saved_sample_state = tf.Variable(tf.zeros([1, HIDDEN_SIZE]), name="Hidden_state")

  # Node responsible for resetting the state and output.
  reset_sample_state = tf.group(
      saved_sample_output.assign(tf.zeros([1, HIDDEN_SIZE])),
      saved_sample_state.assign(tf.zeros([1, HIDDEN_SIZE])))
  # Single LSTM cell.
  sample_output, sample_state = lstm_cell(sample_input, saved_sample_output, saved_sample_state, "cell")
  # Output depends on the hidden state.
  with tf.control_dependencies([saved_sample_output.assign(sample_output), saved_sample_state.assign(sample_state)]):
    sample_prediction = tf.nn.softmax(tf.nn.xw_plus_b(sample_output, w, b, name="logits"), name="outputs")

# Merge all summaries.
merged_summaries = tf.summary.merge_all()

# 4. Init global variable.
init = tf.global_variables_initializer() 

### Helper functions for language generation (letter sampling etc). 

In [11]:

def sample_distribution(distribution):
  """Sample one element from a distribution assumed to be an array of normalized
  probabilities.
  """
  r = random.uniform(0, 1)
  s = 0
  for i in range(len(distribution)):
    s += distribution[i]
    if s >= r:
      return i
  return len(distribution) - 1

def sample(prediction):
  """Turn a (column) prediction into 1-hot encoded samples."""
  p = np.zeros(shape=[1, vocabulary_size], dtype=np.float)
  p[0, sample_distribution(prediction[0])] = 1.0
  return p

def random_distribution():
  """Generate a random column of probabilities."""
  b = np.random.uniform(0.0, 1.0, size=[1, vocabulary_size])
  return b/np.sum(b, 1)[:,None]

In [12]:
def create_feed_dict(dataset):
  """Creates a dictionaries for different sets: maps data onto Tensor placeholders."""
  feed_dict = dict()
  if dataset=="train":
    # Get next batch and create a feed dict.
    next_batch = train_batches.next()
    for i in range(SEQ_LENGTH + 1):
        feed_dict[input_buffer[i]] = next_batch[i]
    # Reset previous state and output
    feed_dict[prev_output] = np.zeros([BATCH_SIZE, HIDDEN_SIZE])
    feed_dict[prev_state] = np.zeros([BATCH_SIZE, HIDDEN_SIZE])
        
  elif dataset=="valid":
    for i in range(SEQ_LENGTH + 1):
        feed_dict[input_buffer[i]] = valid_batch[i]
    # Reset previous state and output
    feed_dict[prev_output] = np.zeros([VALID_BATCH_SIZE, HIDDEN_SIZE])
    feed_dict[prev_state] = np.zeros([VALID_BATCH_SIZE, HIDDEN_SIZE])
    
  else: # test
    for i in range(SEQ_LENGTH + 1):
        feed_dict[input_buffer[i]] = test_batch[i]
    # Reset previous state and output
    feed_dict[prev_output] = np.zeros([TEST_BATCH_SIZE, HIDDEN_SIZE])
    feed_dict[prev_state] = np.zeros([TEST_BATCH_SIZE, HIDDEN_SIZE])
    
  return feed_dict # {prev_output: train_output_zeros, prev_state: train_state_zeros }

### Session execution

In [13]:
# Eventually clear the log dir.
if tf.gfile.Exists(LOG_DIR):
  tf.gfile.DeleteRecursively(LOG_DIR)
# Create (new) log dir.
tf.gfile.MakeDirs(LOG_DIR)

In [14]:
# How often the test loss on validation batch will be computed. 
summary_frequency = 100

# Create session.
sess = tf.InteractiveSession()
# Create summary writers, point them to LOG_DIR.
train_writer = tf.summary.FileWriter(LOG_DIR + '/train', sess.graph)
valid_writer = tf.summary.FileWriter(LOG_DIR + '/valid')
test_writer = tf.summary.FileWriter(LOG_DIR + '/test')

# Initialize global variables.
tf.global_variables_initializer().run()
print('Initialized')

num_steps =  train_size // (BATCH_SIZE*SEQ_LENGTH) #70001
print("Number of iterations per epoch =", num_steps)
for step in range(num_steps):
    # Run training graph.
    batch = train_batches.next()
    summary, _, t_loss, lr = sess.run([merged_summaries, optimizer, loss, learning_rate], 
                                      feed_dict=create_feed_dict("train"))
    # Add summary.
    train_writer.add_summary(summary, step*SEQ_LENGTH)
    train_writer.flush()

    # Every (100) steps collect statistics.
    if step % summary_frequency == 0:
      # Print loss from last batch.
      print('Training set BPC at step %d: %0.5f learning rate: %f' % (step, t_loss, lr))
    
      if step % (summary_frequency * 10) == 0:
        # Generate sample text...
        print('=' * 80)
        # consisting of 5 lines...
        for _ in range(5):
          feed = sample(random_distribution())
          sentence = characters(feed)[0]
          # Reset LSTM hidden state.
          reset_sample_state.run()
          # with 79 characters in each.
          for _ in range(79):
            prediction = sample_prediction.eval({sample_input: feed})
            feed = sample(prediction)
            sentence += characters(feed)[0]
          print(sentence)
        print('=' * 80)
        
        # Validation set BPC.
        v_summary, v_loss = sess.run([merged_summaries, loss], feed_dict=create_feed_dict("valid"))
        print("Validation set BPC: %.5f" % v_loss)
        valid_writer.add_summary(v_summary, step*SEQ_LENGTH)
        valid_writer.flush()
    # End of statistics collection

# Test set BPC.
print("Calculating BPC on test dataset")
t_summary, t_loss = sess.run([merged_summaries, loss], feed_dict=create_feed_dict("test"))
print("Final test set BPC: %.5f" % t_loss)
test_writer.add_summary(t_summary, step*SEQ_LENGTH)
test_writer.flush()
    
# Close writers and session.
train_writer.close()
valid_writer.close()
test_writer.close()
sess.close()

Initialized
Number of iterations per epoch = 1020323
Training set BPC at step 0: 4.07531 learning rate: 0.100000
rnrIj_SXfWLgBLb Hoernhjq]fWJNKLeWtLt[Giiku eFb[pPs_dYo[Nh^ACON qrDg\YbW^eSddzMZy
LsV`NxZMwX njNuTXkf\ustYbW ubkdgU\`]P_JspwjKZky_LZwJv]qkUwgeahTBtDleMYUszGzwfEtM
xWexcubfhRRgNMocYOFguyF^kf]ZIY_QIt[wIrvgsvyYvi\oByhfLHDrXqczQRkDZTmpIsqcOl^DE lw
rJcOYWYMD\`fXJ]ZxLjhXVeFNw^QC ^qdQZt^D eIFL]sAyqwmRmRiL]qsUnZaVGsrZ`[W`sZjPrcEYb
OwDLUcBwayGRvt rv^ww]HT^eXIi`c[bUCqp_AQYki PMLhYgMMl[mtqkCjbfTxPEukwE]xUl_\kRJXj
Validation set BPC: 3.95437
Training set BPC at step 100: 3.00891 learning rate: 0.100000
Training set BPC at step 200: 1.40547 learning rate: 0.100000
Training set BPC at step 300: 2.63007 learning rate: 0.100000
Training set BPC at step 400: 1.19735 learning rate: 0.100000
Training set BPC at step 500: 2.72800 learning rate: 0.100000
Training set BPC at step 600: 3.20879 learning rate: 0.100000
Training set BPC at step 700: 3.41517 learning rate: 0.100000
Training set BPC at 

Training set BPC at step 6400: 1.85463 learning rate: 0.090000
Training set BPC at step 6500: 2.27314 learning rate: 0.090000
Training set BPC at step 6600: 1.77919 learning rate: 0.090000
Training set BPC at step 6700: 1.64630 learning rate: 0.090000
Training set BPC at step 6800: 1.58586 learning rate: 0.090000
Training set BPC at step 6900: 2.20632 learning rate: 0.090000
Training set BPC at step 7000: 0.83926 learning rate: 0.090000
\ns vit cibit dos s  of ceds conrga mrlant ss to jon    canun ewtaid   iid sprou
o meied puld pis  nod oftpog licay dancthaid ad as thin as aipand to wites unk u
en drad redbat sss nega  sphad  ss unk   fold patissens pad nots lehes  sbit r p
Ut ithins orstilishiredar caida me  sicrilavil ars les dous pad il    tewr to wi
us gemonty sc sdar bo   samigedecomprisasegandors ichempropo dewale  novicaed ce
Validation set BPC: 2.44545
Training set BPC at step 7100: 2.38784 learning rate: 0.090000
Training set BPC at step 7200: 0.89716 learning rate: 0.090000


Validation set BPC: 2.43139
Training set BPC at step 13100: 1.85917 learning rate: 0.081000
Training set BPC at step 13200: 2.59496 learning rate: 0.081000
Training set BPC at step 13300: 3.22861 learning rate: 0.081000
Training set BPC at step 13400: 1.42528 learning rate: 0.081000
Training set BPC at step 13500: 2.31518 learning rate: 0.081000
Training set BPC at step 13600: 3.26787 learning rate: 0.081000
Training set BPC at step 13700: 1.58345 learning rate: 0.081000
Training set BPC at step 13800: 3.40195 learning rate: 0.081000
Training set BPC at step 13900: 3.41611 learning rate: 0.081000
Training set BPC at step 14000: 1.62419 learning rate: 0.081000
eapslecve clit unk  sllad gocpiumornop prrell    mis onfremte t  croit  unk  pag
ees unk  liarerN liy gus ick  iderpimtli bibemt pipeanste unk  pun wamailillat a
wion  ou illu  propve paliation    pralsat unk  lrridntion r    mipdanom sacilu 
Xs  unk     gille pith lllrr N the we    pinuect demslud th vi aw  eund e con lo
Ss N col

Validation set BPC: 2.77542
Training set BPC at step 20100: 1.69633 learning rate: 0.065610
Training set BPC at step 20200: 2.49305 learning rate: 0.065610
Training set BPC at step 20300: 2.05078 learning rate: 0.065610
Training set BPC at step 20400: 1.53275 learning rate: 0.065610
Training set BPC at step 20500: 2.01116 learning rate: 0.065610
Training set BPC at step 20600: 0.93829 learning rate: 0.065610
Training set BPC at step 20700: 2.52010 learning rate: 0.065610
Training set BPC at step 20800: 2.20073 learning rate: 0.065610
Training set BPC at step 20900: 2.44559 learning rate: 0.065610
Training set BPC at step 21000: 1.99272 learning rate: 0.065610
mmidinglly x ais wig llotof fome inteerk   malattalt mllank  N it faisists  thes
ld N meta treandd neo l loigeat te  sloman bite potiders relingllear unk  toen  
Plt hide of beditidelll leerenction    thiain the las eoride of pannu lome nehow
Utese we halk bel unk   fit ling he homearorave N intronctecraneim susagen unk  
Dcive wi

Validation set BPC: 2.35714
Training set BPC at step 27100: 3.15039 learning rate: 0.059049
Training set BPC at step 27200: 2.55496 learning rate: 0.059049
Training set BPC at step 27300: 2.73079 learning rate: 0.059049
Training set BPC at step 27400: 2.18729 learning rate: 0.059049
Training set BPC at step 27500: 1.13442 learning rate: 0.059049
Training set BPC at step 27600: 3.55070 learning rate: 0.059049
Training set BPC at step 27700: 1.51893 learning rate: 0.059049
Training set BPC at step 27800: 2.35165 learning rate: 0.059049
Training set BPC at step 27900: 3.53659 learning rate: 0.059049
Training set BPC at step 28000: 2.12879 learning rate: 0.059049
x withe  of aigive e pricce finmecto N s N me the br sospo s bll cef thate  pres
_owideag ve whe  b the withe ustwitions luthe whiongalve whe poy cre an the the 
moncy gulunepchreeom cron  reorst sexrat song the fro it ak upl  be rengiong pro
xors edo  to  cons pevopoplopesepprepint aluron unk  comprold  iputhe dilemition
n acicth

Vcrerbess to the   ank  ine  unk  of tharreatharmatiscinthitureir adeank  of  in
ves thal cant of unk  unk  thelse sets alwonks unk  gant sairrintenes N int  act
Validation set BPC: 2.41054
Training set BPC at step 34100: 2.51986 learning rate: 0.053144
Training set BPC at step 34200: 2.78764 learning rate: 0.053144
Training set BPC at step 34300: 1.42568 learning rate: 0.053144
Training set BPC at step 34400: 4.25970 learning rate: 0.053144
Training set BPC at step 34500: 3.01070 learning rate: 0.053144
Training set BPC at step 34600: 2.91431 learning rate: 0.053144
Training set BPC at step 34700: 5.13022 learning rate: 0.053144
Training set BPC at step 34800: 2.88850 learning rate: 0.053144
Training set BPC at step 34900: 2.29863 learning rate: 0.053144
Training set BPC at step 35000: 4.42886 learning rate: 0.047830
the yangs  fireeadoie  bre the sess tods pew pleyem the ne ont shaibeings   re  
l che ne  pinss moniverde N ord N of s ines  she  ai se a N cupfrill wan pe thei
llivax d

Training set BPC at step 40900: 1.55895 learning rate: 0.043047
Training set BPC at step 41000: 2.09625 learning rate: 0.043047
Vs ousubof bof fat mat unk  of bous tuamen to seld bou rat the a N to unk   cont
ctrainand the k enkint  onal  mr cher bep under abay onpent ppall unk   unk  neu
N e   se yek fubor of ases inond hiant  to staanuepin unk  but that  nefay con y
bue a ies u tto to bins haid the e ine  we  curn N the  unk  nurannorests shitic
Byats ghate  unk  sont sars nice of yastce frat we  niks infon chind has  shand 
Validation set BPC: 2.31514
Training set BPC at step 41100: 1.65806 learning rate: 0.043047
Training set BPC at step 41200: 1.14436 learning rate: 0.043047
Training set BPC at step 41300: 2.35803 learning rate: 0.043047
Training set BPC at step 41400: 1.65505 learning rate: 0.043047
Training set BPC at step 41500: 1.87580 learning rate: 0.043047
Training set BPC at step 41600: 2.16569 learning rate: 0.043047
Training set BPC at step 41700: 2.11441 learning rate: 

Training set BPC at step 47200: 2.26875 learning rate: 0.038742
Training set BPC at step 47300: 2.02245 learning rate: 0.038742
Training set BPC at step 47400: 2.93996 learning rate: 0.038742
Training set BPC at step 47500: 1.78686 learning rate: 0.038742
Training set BPC at step 47600: 2.80067 learning rate: 0.038742
Training set BPC at step 47700: 2.96588 learning rate: 0.038742
Training set BPC at step 47800: 2.07647 learning rate: 0.038742
Training set BPC at step 47900: 1.97760 learning rate: 0.038742
Training set BPC at step 48000: 2.09443 learning rate: 0.038742
\  pripindounk  an  new  afler aniorase inturs pof insous ofrde  thiraives cus m
Gw ned  ly waincun it lis courearted have nided  fex poinautes unk  iney manen m
Xve ats to stote onden unk  sun beanions s ficompad beq whithompintherstan sisit
y s  unk  novitiveal wily cied durarnts are unk  ether doit donon ating ably ast
n to wonanemelly  hast friter ontasuno wide yo buntes   leadest  unk  ry anin as
Validation set BPC:

Validation set BPC: 2.29946
Training set BPC at step 54100: 2.07753 learning rate: 0.034868
Training set BPC at step 54200: 1.74504 learning rate: 0.034868
Training set BPC at step 54300: 0.99683 learning rate: 0.034868
Training set BPC at step 54400: 1.84389 learning rate: 0.034868
Training set BPC at step 54500: 2.13437 learning rate: 0.034868
Training set BPC at step 54600: 3.87641 learning rate: 0.034868
Training set BPC at step 54700: 2.42956 learning rate: 0.034868
Training set BPC at step 54800: 3.37762 learning rate: 0.034868
Training set BPC at step 54900: 1.36598 learning rate: 0.034868
Training set BPC at step 55000: 2.02231 learning rate: 0.031381
Qde niont praurn wy beand how lolmpaive murdeishamey thap et orsing yis ctas cay
Grl ie e theas itenes  ir bainomut ha agound  in re oflion waneto gre botinivet 
ysissunt neatroo coming or do vinte tad  casl shorast to tharly of suteasad gein
orition clo trat foree moncintrakein ay  thelloverstro ar sounave af prae eny ho
Uriond m

glelicte think  huerdion of of noro home juras me tage mer threntias und son ine
Validation set BPC: 2.23010
Training set BPC at step 61100: 2.67049 learning rate: 0.028243
Training set BPC at step 61200: 1.95490 learning rate: 0.028243
Training set BPC at step 61300: 2.12654 learning rate: 0.028243
Training set BPC at step 61400: 1.92960 learning rate: 0.028243
Training set BPC at step 61500: 1.72992 learning rate: 0.028243
Training set BPC at step 61600: 2.51829 learning rate: 0.028243
Training set BPC at step 61700: 1.05634 learning rate: 0.028243
Training set BPC at step 61800: 1.92181 learning rate: 0.028243
Training set BPC at step 61900: 2.29465 learning rate: 0.028243
Training set BPC at step 62000: 2.36770 learning rate: 0.028243
w beis of the cor with   the  ssur ermistoont with xpedated share shoted bewaley
peco weh N unk  anets dor mior offe soslestll isaineventenaure unk  of nores the
Gbed a mon thelid it scation s the deltarens leng sconted   the boid naced subis
Sr the n

Validation set BPC: 2.23153
Training set BPC at step 68100: 3.78191 learning rate: 0.025419
Training set BPC at step 68200: 2.04945 learning rate: 0.025419
Training set BPC at step 68300: 1.39590 learning rate: 0.025419
Training set BPC at step 68400: 1.53753 learning rate: 0.025419
Training set BPC at step 68500: 1.31462 learning rate: 0.025419
Training set BPC at step 68600: 1.33739 learning rate: 0.025419
Training set BPC at step 68700: 1.92651 learning rate: 0.025419
Training set BPC at step 68800: 2.10156 learning rate: 0.025419
Training set BPC at step 68900: 2.08384 learning rate: 0.025419
Training set BPC at step 69000: 1.32979 learning rate: 0.025419
e arvicte ecurgided unk  at  tuld weng of unk     inkelly  aireles indliellill t
Jqpre brliar to as ope laty unk  sinate pove boved disadopetl lade lopes it gre 
of the by  forenging N  unk    unk   novore at inked  auted she of legre rales w
Zl nat  cit toreratodet omal un taves unk    unk   pat pro ele  cakellly slo mee
Ph fien 

Validation set BPC: 2.23492
Training set BPC at step 75100: 0.72816 learning rate: 0.020589
Training set BPC at step 75200: 1.69559 learning rate: 0.020589
Training set BPC at step 75300: 1.38585 learning rate: 0.020589
Training set BPC at step 75400: 2.52348 learning rate: 0.020589
Training set BPC at step 75500: 0.90472 learning rate: 0.020589
Training set BPC at step 75600: 2.46833 learning rate: 0.020589
Training set BPC at step 75700: 2.35656 learning rate: 0.020589
Training set BPC at step 75800: 2.29747 learning rate: 0.020589
Training set BPC at step 75900: 1.83538 learning rate: 0.020589
Training set BPC at step 76000: 2.22225 learning rate: 0.020589
fictacuaru ac ay  eaper esarreled N fotic that cort it and ep ablaisit  seever t
Ms   frote ank  ro and corsts sais wathealing stigaccas condais N unk    to boke
Ds thad inncuter ratex thar thases orvells cuinifiropsts somesttease cout andis 
Uculd    wous  ons iled of sal sontod  ait he ank  owar mel grice  these woreeos
_rs u gr

Pgulearned ces   unk  am  unk  aise equoret re interaperal pevi  N saly laf  the
Validation set BPC: 2.18373
Training set BPC at step 82100: 2.47234 learning rate: 0.018530
Training set BPC at step 82200: 2.46650 learning rate: 0.018530
Training set BPC at step 82300: 2.43675 learning rate: 0.018530
Training set BPC at step 82400: 2.21756 learning rate: 0.018530
Training set BPC at step 82500: 2.61377 learning rate: 0.018530
Training set BPC at step 82600: 1.86215 learning rate: 0.018530
Training set BPC at step 82700: 2.91372 learning rate: 0.018530
Training set BPC at step 82800: 1.84959 learning rate: 0.018530
Training set BPC at step 82900: 2.91553 learning rate: 0.018530
Training set BPC at step 83000: 1.22510 learning rate: 0.018530
wevel poical eelthe wish ition  unk  brac   age foresten bra bating sation hosuy
v sicy wan  tot s bule ackegarnt cuby narst acty N py nyst basalate pare unk  th
vieros tratopass poy unk   haritoed yef to thor us s N N woris c off unk  as se 
Sgest ra

Validation set BPC: 2.18592
Training set BPC at step 89100: 1.23355 learning rate: 0.016677
Training set BPC at step 89200: 1.01822 learning rate: 0.016677
Training set BPC at step 89300: 1.85360 learning rate: 0.016677
Training set BPC at step 89400: 2.77270 learning rate: 0.016677
Training set BPC at step 89500: 1.21351 learning rate: 0.016677
Training set BPC at step 89600: 2.02035 learning rate: 0.016677
Training set BPC at step 89700: 1.77573 learning rate: 0.016677
Training set BPC at step 89800: 2.10640 learning rate: 0.016677
Training set BPC at step 89900: 2.05078 learning rate: 0.016677
Training set BPC at step 90000: 2.24917 learning rate: 0.015009
Gepremiduage annpent sprojan tey unt ing unk  ay   lor of N s  fins the ploge un
lritire furace win har  s conaced  sed shoyceradestion ford momer  issuuchi race
\ars  and conkes adecks ands exto to dise ruting  goo s s  N unk  ws badol  N  s
ct  N  unk   unk  wirk  expinfr  offimin mibies  unk  whay in N  sempunter cual 
ion mewn

Validation set BPC: 2.16601
Training set BPC at step 96100: 2.53229 learning rate: 0.013509
Training set BPC at step 96200: 2.20086 learning rate: 0.013509
Training set BPC at step 96300: 1.77473 learning rate: 0.013509
Training set BPC at step 96400: 1.92712 learning rate: 0.013509
Training set BPC at step 96500: 2.17497 learning rate: 0.013509
Training set BPC at step 96600: 1.17421 learning rate: 0.013509
Training set BPC at step 96700: 2.47789 learning rate: 0.013509
Training set BPC at step 96800: 2.26217 learning rate: 0.013509
Training set BPC at step 96900: 2.14444 learning rate: 0.013509
Training set BPC at step 97000: 2.81435 learning rate: 0.013509
Xchal   stanceraticapreve junt comp n  the ank  ay unparai  pal eals det age ari
hes partas  spard the thould siatitctora move  unk     unk     it docks   suler 
Ay ening por quor unpave qur sabid s ethe gifion se afaplar thiticald bedierands
inen ace unk  s    unk    afedolm inges ap conintion is ghed of  sent croact hic
_k  inda

Validation set BPC: 2.16877
Training set BPC at step 103100: 1.52031 learning rate: 0.012158
Training set BPC at step 103200: 2.63995 learning rate: 0.012158
Training set BPC at step 103300: 2.35547 learning rate: 0.012158
Training set BPC at step 103400: 1.63243 learning rate: 0.012158
Training set BPC at step 103500: 2.30853 learning rate: 0.012158
Training set BPC at step 103600: 2.49437 learning rate: 0.012158
Training set BPC at step 103700: 2.22593 learning rate: 0.012158
Training set BPC at step 103800: 1.73735 learning rate: 0.012158
Training set BPC at step 103900: 1.42173 learning rate: 0.012158
Training set BPC at step 104000: 2.14432 learning rate: 0.012158
by  equatet abamin jaiguat cate from xan r   N more pations prat nerys nepiccera
ys of unk  by  absalier llel ntocar an an u if bank  ha fiprent patito the ear r
zir gy talltd sat harthe aplas  the ct at e mare fors nicater hetild hen unk   o
unk    conmerasing s ant ar nelen of whes   unk   irker  mey priarnts us oper  

Pats vagh oto N that centwill onire of bus  al say  the ley ment we ag N bared c
Validation set BPC: 2.16414
Training set BPC at step 110100: 2.12687 learning rate: 0.009848
Training set BPC at step 110200: 1.75490 learning rate: 0.009848
Training set BPC at step 110300: 1.96978 learning rate: 0.009848
Training set BPC at step 110400: 2.78047 learning rate: 0.009848
Training set BPC at step 110500: 2.37806 learning rate: 0.009848
Training set BPC at step 110600: 2.18300 learning rate: 0.009848
Training set BPC at step 110700: 2.57081 learning rate: 0.009848
Training set BPC at step 110800: 1.25165 learning rate: 0.009848
Training set BPC at step 110900: 0.98459 learning rate: 0.009848
Training set BPC at step 111000: 1.29687 learning rate: 0.009848
Ge for purtentlistever the cuf and fill     u nebt  ank  metras rey ree of  oarg
\N to mut cogouces of profor  s gey  a tal  bosdrese buchemeras whan conowath tu
Ucumot rat con bek  ded ins mol quek offereations by br ime ly thisene fol the 

`he frops a re hastn would proquores haw cer awe whisishing onitchents  amibers 
qury sant of   of sece t  sas out  unk   ing neging assmald norced a y   intaly 
Validation set BPC: 2.12620
Training set BPC at step 117100: 2.59584 learning rate: 0.008863
Training set BPC at step 117200: 1.91018 learning rate: 0.008863
Training set BPC at step 117300: 2.67394 learning rate: 0.008863
Training set BPC at step 117400: 1.93636 learning rate: 0.008863
Training set BPC at step 117500: 1.76661 learning rate: 0.008863
Training set BPC at step 117600: 2.05916 learning rate: 0.008863
Training set BPC at step 117700: 1.82934 learning rate: 0.008863
Training set BPC at step 117800: 2.58887 learning rate: 0.008863
Training set BPC at step 117900: 2.08242 learning rate: 0.008863
Training set BPC at step 118000: 2.67603 learning rate: 0.008863
Eom  unk   l corkignin u  N be stak   the too the ing  the expectiwqur   by  inv
Upemaw unk  moncer and  a an af  sulo ighe vily ae the tudimall ye of theery bo

Training set BPC at step 123800: 2.82944 learning rate: 0.007977
Training set BPC at step 123900: 2.90732 learning rate: 0.007977
Training set BPC at step 124000: 2.29526 learning rate: 0.007977
Wk  sire a  unk  xp eull on the expeted    unk aw  beacn unk   of rus berothe fu
`ks the fit and her and kas moct the nas ronf sounde  of  unk  xp qrie prog mand
cord the geverokerdetotherarkering the for it  the exand ight  ingem the of be b
hinadesar mo frout unk     stlayt amon wink  ca unk   prove  unk  xbuFTt uints s
`ash two ethe dunds  se  lasage the of ked ut of  threeseats comport ekored or t
Validation set BPC: 2.12254
Training set BPC at step 124100: 1.10947 learning rate: 0.007977
Training set BPC at step 124200: 2.00941 learning rate: 0.007977
Training set BPC at step 124300: 1.89632 learning rate: 0.007977
Training set BPC at step 124400: 2.15158 learning rate: 0.007977
Training set BPC at step 124500: 1.25741 learning rate: 0.007977
Training set BPC at step 124600: 1.81457 learni

Validation set BPC: 2.12508
Training set BPC at step 130100: 2.28851 learning rate: 0.006461
Training set BPC at step 130200: 1.28617 learning rate: 0.006461
Training set BPC at step 130300: 1.39237 learning rate: 0.006461
Training set BPC at step 130400: 2.01746 learning rate: 0.006461
Training set BPC at step 130500: 2.24194 learning rate: 0.006461
Training set BPC at step 130600: 2.38633 learning rate: 0.006461
Training set BPC at step 130700: 2.36828 learning rate: 0.006461
Training set BPC at step 130800: 1.85065 learning rate: 0.006461
Training set BPC at step 130900: 1.51342 learning rate: 0.006461
Training set BPC at step 131000: 1.33447 learning rate: 0.006461
Jvainerss compall  otive an rinced bivers  unk   coull but ye wancing gron beker
t of mewit pelled in ethey war necticies  unk   proce sso sut lost for thetr   o
ims ine diller is of mif to jotittest but pban mithincent  migen then N ap  in a
Ilop s in contoratevings  at is taced quend sorke ghoraniner onent   om to ar  

Validation set BPC: 2.11640
Training set BPC at step 137100: 1.69333 learning rate: 0.005815
Training set BPC at step 137200: 2.15701 learning rate: 0.005815
Training set BPC at step 137300: 1.64130 learning rate: 0.005815
Training set BPC at step 137400: 1.50092 learning rate: 0.005815
Training set BPC at step 137500: 2.70731 learning rate: 0.005815
Training set BPC at step 137600: 2.21585 learning rate: 0.005815
Training set BPC at step 137700: 2.07074 learning rate: 0.005815
Training set BPC at step 137800: 2.45998 learning rate: 0.005815
Training set BPC at step 137900: 2.30383 learning rate: 0.005815
Training set BPC at step 138000: 1.79786 learning rate: 0.005815
Sand to and jushlificeesting ark of wepy  the nave wan eard pad tearnes revitent
ply for fa  unk   action at to i N unk  wey  heced fillion agan act who  pro and
xt bave mis woush unk  were stig  unk   westremes the  storallion tinmer an dis 
Hpon at bild her shepet the clad in crisaid N  unk   by eact onir pants troced 

Validation set BPC: 2.11137
Training set BPC at step 144100: 1.59639 learning rate: 0.005233
Training set BPC at step 144200: 2.15452 learning rate: 0.005233
Training set BPC at step 144300: 0.60302 learning rate: 0.005233
Training set BPC at step 144400: 1.23483 learning rate: 0.005233
Training set BPC at step 144500: 1.32529 learning rate: 0.005233
Training set BPC at step 144600: 2.20409 learning rate: 0.005233
Training set BPC at step 144700: 2.06798 learning rate: 0.005233
Training set BPC at step 144800: 1.94079 learning rate: 0.005233
Training set BPC at step 144900: 2.33009 learning rate: 0.005233
Training set BPC at step 145000: 2.04279 learning rate: 0.004710
^xeotion onestur two ut to indaly tait  my at  sajut pay to we af the govise gro
thes  unk     tratt prog waitasion  succhiby yomentk   pmer  thiry gon  unk    s
xandion homin  the fedut   ch uew shad wew  unk    s ecthat unk  hean w n of in 
y to    N mit ectornt at unk   lunt cang the how eboll  N  ont  the mary    s u

KeyboardInterrupt: 