In [1]:
from __future__ import print_function
import os

import numpy as np
import zipfile
import tarfile
from six.moves.urllib.request import urlretrieve
import shutil 
import random

import string
import tensorflow as tf

# Dirs - must be absolute paths!
LOG_DIR = '/tmp/tf/ptb_char_lstm_mann_lrua/100'
# Local dir where PTB files will be stored.
PTB_DIR = '/home/tkornuta/data/ptb/'

# Filenames.
TRAIN = "ptb.train.txt"
VALID = "ptb.valid.txt"
TEST = "ptb.test.txt"

# Size of the hidden state 64
# 59 now - as we are using output of word embeddings as "hidden state of LSTM"
HIDDEN_SIZE = 59

# Size of batch - 2 for now :]
BATCH_SIZE = 2

# Length of sequence (=  number of units of controller (recurrent layer))
SEQ_LENGTH = 5

#### MANN-related parameters.
# Size of the MANN memory.
MEMORY_SIZE = 6

# Number of smallest elements.
N_SMALLEST = 1

# "Update weight decay".
GAMMA = 0.1


### Check/maybe download PTB.

In [2]:
def maybe_download_ptb(path, 
                       filename='simple-examples.tgz', 
                       url='http://www.fit.vutbr.cz/~imikolov/rnnlm/', 
                       expected_bytes =34869662):
  # Eventually create the PTB dir.
  if not tf.gfile.Exists(path):
    tf.gfile.MakeDirs(path)
  """Download a file if not present, and make sure it's the right size."""
  _filename = path+filename
  if not os.path.exists(_filename):
    print('Downloading %s...' % filename)
    _filename, _ = urlretrieve(url+filename, _filename)
  statinfo = os.stat(_filename)
  if statinfo.st_size == expected_bytes:
    print('Found and verified', (_filename), '(', statinfo.st_size, ')')
  else:
    print(statinfo.st_size)
    raise Exception(
      'Failed to verify ' + _filename + '. Can you get to it with a browser?')
  return filename

filename = maybe_download_ptb(PTB_DIR)

Found and verified /home/tkornuta/data/ptb/simple-examples.tgz ( 34869662 )


### Extract dataset-related files from the PTB archive.

In [3]:
def extract_ptb(path, filename='simple-examples.tgz', files=["ptb.train.txt", "ptb.valid.txt", "ptb.test.txt", 
                                       "ptb.char.train.txt", "ptb.char.valid.txt", "ptb.char.test.txt"]):
    """Extracts files from PTB archive."""
    # Extract
    tar = tarfile.open(path+filename)
    tar.extractall(path)
    tar.close()
    # Copy files
    for file in files:
        shutil.copyfile(PTB_DIR+"simple-examples/data/"+file, PTB_DIR+file)
    # Delete directory
    shutil.rmtree(PTB_DIR+"simple-examples/")        

extract_ptb(PTB_DIR)

### Load train, valid and test texts.

In [4]:
def read_data(filename, path):
    with open(path+filename, 'r') as myfile:
        data=myfile.read()# .replace('\n', '')
        return data

train_text = read_data(TRAIN, PTB_DIR)
train_size=len(train_text)
print(train_size, train_text[:100])

valid_text = read_data(VALID, PTB_DIR)
valid_size=len(valid_text)
print(valid_size, valid_text[:64])

test_text = read_data(TEST, PTB_DIR)
test_size=len(test_text)
print(test_size, test_text[:64])

5101618  aer banknote berlitz calloway centrust cluett fromstein gitano guterman hydro-quebec ipo kia memote
399782  consumers may want to move their telephones a little closer to 
449945  no it was n't black monday 
 but while the new york stock excha


### Utility functions to map characters to vocabulary IDs and back.

In [5]:
vocabulary_size = 59 # [A-Z] + [a-z] + ' ' +few 'in between; + punctuation
first_letter = ord(string.ascii_uppercase[0]) # ascii_uppercase before lowercase! 
print("vocabulary size = ", vocabulary_size)
print(first_letter)

def char2id(char):
  """ Converts char to id (int) with one-hot encoding handling of unexpected characters"""
  if char in string.ascii_letters:# or char in string.punctuation or char in string.digits:
    return ord(char) - first_letter + 1
  elif char == ' ':
    return 0
  else:
    # print('Unexpected character: %s' % char)
    return 0
  
def id2char(dictid):
  """ Converts single id (int) to character"""
  if dictid > 0:
    return chr(dictid + first_letter - 1)
  else:
    return ' '

def characters(probabilities):
  """Turn a 1-hot encoding or a probability distribution over the possible
  characters back into its (most likely) character representation."""
  return [id2char(c) for c in np.argmax(probabilities, 1)]

def batches2string(batches):
  """Convert a sequence of batches back into their (most likely) string
  representation."""
  s = [''] * batches[0].shape[0]
  for b in batches:
    s = [''.join(x) for x in zip(s, characters(b))]
  return s

#print(len(string.punctuation))
#for i in string.ascii_letters:
#    print (i, char2id(i))


print(char2id('a'), char2id('A'), char2id('z'), char2id('Z'), char2id(' '), char2id('ï'))
print(id2char(char2id('a')), id2char(char2id('A')))
#print(id2char(65), id2char(33), id2char(90), id2char(58), id2char(0))
#bankno
sample = np.zeros(shape=(1, vocabulary_size), dtype=np.float)
sample[0, char2id(' ')] = 1.0
print(sample)

vocabulary size =  59
65
33 1 58 26 0 0
a A
[[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.]]


### Helper class for batch generation

In [6]:
class BatchGenerator(object):
  def __init__(self, text, batch_size, seq_length, vocab_size):
    """
    Initializes the batch generator object. Stores the variables and first "letter batch".
    text is text to be processed
    batch_size is size of batch (number of samples)
    seq_length represents the length of sequence
    vocab_size is number of words in vocabulary (assumes one-hot encoding)
    """
    # Store input parameters.
    self._text = text
    self._text_size = len(text)
    self._batch_size = batch_size
    self._seq_length = seq_length
    self._vocab_size = vocab_size
    # Divide text into segments depending on number of batches, each segment determines a cursor position for a batch.
    segment = self._text_size // batch_size
    # Set initial cursor position.
    self._cursor = [ offset * segment for offset in range(batch_size)]
    # Store first "letter batch".
    self._last_letter_batch = self._next_letter_batch()
  
  def _next_letter_batch(self):
    """
    Returns a batch containing of encoded single letters depending on the current batch 
    cursor positions in the data.
    Returned "letter batch" is of size batch_size x vocab_size
    """
    letter_batch = np.zeros(shape=(self._batch_size, self._vocab_size), dtype=np.float)
    # Iterate through "samples"
    for b in range(self._batch_size):
      # Set 1 in position pointed out by one-hot char encoding.
      letter_batch[b, char2id(self._text[self._cursor[b]])] = 1.0
      self._cursor[b] = (self._cursor[b] + 1) % self._text_size
    return letter_batch
  
  def next(self):
    """Generate the next array of batches from the data. The array consists of
    the last batch of the previous array, followed by num_unrollings new ones.
    """
    # First add last letter from previous batch (the "additional one").
    batches = [self._last_letter_batch]
    for step in range(self._seq_length):
      batches.append(self._next_letter_batch())
    # Store last "letter batch" for next batch.
    self._last_letter_batch = batches[-1]
    return batches


In [7]:
# Trick - override first 10 chars
#list1 = list(train_text)
#for i in range(2):
#    list1[i] = 'z'
#train_text = ''.join(list1)
#print("Train set =", train_text[0:100])

# Create objects for training, validation and testing batch generation.
train_batches = BatchGenerator(train_text, BATCH_SIZE, SEQ_LENGTH, vocabulary_size)

# Get first training batch.
batch = train_batches.next()
print(len(batch))
print(batch[0].shape)
#print("Batch = ", batch)
#print(batches2string(batch))
#print("batch len = num of enrollings",len(batch))
#for i in range(num_unrollings):
#    print("i = ", i, "letter=", batches2string(batch)[0][i][0], "bits = ", batch[i][0])


# For validation  - process the whole text as one big batch.
VALID_BATCH_SIZE = int(np.floor(valid_size/SEQ_LENGTH))
valid_batches = BatchGenerator(valid_text, VALID_BATCH_SIZE, SEQ_LENGTH, vocabulary_size)
valid_batch = valid_batches.next()
#print (VALID_BATCH_SIZE)
#print(len(valid_batch))
#print(valid_batch[0].shape)

# For texting  - process the whole text as one big batch.
TEST_BATCH_SIZE = int(np.floor(test_size/SEQ_LENGTH))
test_batches = BatchGenerator(test_text, TEST_BATCH_SIZE, SEQ_LENGTH, vocabulary_size)
# Get single batch! 
test_batch = test_batches.next()


6
(2, 59)


###  Definition of tensor graph

In [8]:
# Reset graph - just in case.
tf.reset_default_graph()

# Placeholders for inputs.
with tf.name_scope("Memory"):
    # Memory.
    memory = tf.Variable(tf.truncated_normal(shape=[HIDDEN_SIZE, MEMORY_SIZE]), trainable=False, name="Memory_M")
    # Latest vs LRU ratio.
    alpha = tf.Variable(tf.truncated_normal(shape=[1]), name="Alpha")


with tf.name_scope("Previous"):
    # Placeholders for previous weights.
    prev_read_weights_seq_batch = list()    
    prev_update_weights_seq_batch = list()    
    for i_seq in range(SEQ_LENGTH):
        prev_read_weights_seq_batch.append(tf.placeholder(tf.float32, shape=None, name="Prev_rw"))
        prev_update_weights_seq_batch.append(tf.placeholder(tf.float32, shape=None, name="Prev_uw"))
    
# SET INITIAL MEMORY STATE.
#memory_set = memory.assign(tf.transpose([
#    [0.0, 0, 0, 1],
#    [0, 0, 1, 0],
#    [0, 1, 0, 0],
#    [1, 0, 0, 0],
#    [1, 0, 1, 0],
#    [1, 1, 0, 0]]))
#alpha_set = alpha.assign([0.1])


# Placeholders for inputs.
with tf.name_scope("Input_data"):
    # Define input data buffers.
    data_buffers = list()
    for _ in range(SEQ_LENGTH + 1):
        # Collect placeholders for inputs/labels.
        data_buffers.append(tf.placeholder(tf.float32, shape=None, name="data_buffers"))
    print ("data_buffers shape =", data_buffers[0].shape)

    # Sequence of batches.
    input_seq_batch = data_buffers[:SEQ_LENGTH]
    print ("Seq length  =", len(input_seq_batch))
    print ("Batch shape =", input_seq_batch[0].shape)

    # Labels are pointing to the same placeholders!
    # Labels are inputs shifted by one time step.
    labels_seq_batch = data_buffers[1:]  
    # Concatenate targets into 2D tensor.
    target_batch = tf.concat(labels_seq_batch, 0)

    # Add histograms to TensorBoard.
    input_seq_batch_hist = tf.summary.histogram("input_seq_batch", input_seq_batch)
    
    
# Create SEQ_LEN x BATCH_SIZE placeholders for similarity - each MEMORY_SIZE x 1,  
with tf.name_scope("Read_head"):

    # Normalize sequence of batches.
    norm_seq_batch = list()
    for i_seq in range(SEQ_LENGTH):
        # Collect placeholders for inputs/labels.
        norm_seq_batch.append(tf.nn.l2_normalize(input_seq_batch[i_seq],1) )
    
    # Normalize memory.
    norm_memory = tf.nn.l2_normalize(memory,0)
    print("norm_memory =", norm_memory)
    
    # Calculate cosine similarity.
    similarity_seq_batch = list()    
    for i_seq in range(SEQ_LENGTH):
        similarity_seq_batch.append(tf.matmul(norm_seq_batch[i_seq], norm_memory))
    # Add histograms to TensorBoard.
    similarity_seq_batch_hist = tf.summary.histogram("cosine_similarity_seq_batch", similarity_seq_batch)
    
    # Calcualte read weights based on similarity.
    read_weights_seq_batch = list()    
    for i_seq in range(SEQ_LENGTH):
        read_weights_seq_batch.append(tf.nn.softmax(similarity_seq_batch[i_seq]))    
    # Add histograms to TensorBoard.
    read_weights_seq_batch_hist = tf.summary.histogram("read_weights_seq_batch", read_weights_seq_batch)

# TODO: add read vector.
#with tf.name_scope("Read_vector"):

    
    
# TODO: add dependencies, that write will be done after read.
with tf.name_scope("Write_head"):

    # Calcualte read weights based on similarity.
    write_weights_seq_batch = list()  
    #test_batch = list()
    for i_seq in range(SEQ_LENGTH):
        # "Truncation scheme to update the least-used positions".
        # First, find (size-n) top elements (in each "batch sample"/head separatelly).
        top = tf.nn.top_k(-prev_update_weights_seq_batch[i_seq], N_SMALLEST)
        # To get boolean True/False values, you can first get the k-th value and then use tf.greater_equal:
        kth = tf.reduce_min(top.values, axis=1, keep_dims=True)
        top2 = tf.greater_equal(-prev_update_weights_seq_batch[i_seq], kth)
        # And finally - cast it to n smallest elements.
        prev_smallest_lru_weights = tf.cast(top2, tf.float32)

        #write_weights_seq_batch.append(prev_smallest_lru_weights)
        write_weights_seq_batch.append(tf.add(tf.sigmoid(alpha) * prev_read_weights_seq_batch[i_seq],
                               (1.0 - tf.sigmoid(alpha)) * prev_smallest_lru_weights,
                               name="Write_weights_ww"))
        #test_batch.append(top)
    # Add histograms to TensorBoard.
    write_weights_seq_batch_hist = tf.summary.histogram("write_weights_seq_batch", write_weights_seq_batch)
        
with tf.name_scope("Memory_update"):
    calculated_mem_update_seq_batch = list()
    for i_seq in range(SEQ_LENGTH):
        # Perform single update for each sequence/batch.
        calculated_mem_update_seq_batch.append(tf.tensordot(tf.transpose(input_seq_batch[i_seq]), 
                                                            write_weights_seq_batch[i_seq], axes=1))
    # Sum updates.
    mem_update = tf.add_n(calculated_mem_update_seq_batch)
    # Update the memory
    memory_update_op = memory.assign(memory + mem_update)
    # Add histograms to TensorBoard.
    memory_hist = tf.summary.histogram("memory", memory)

with tf.name_scope("Update_head"):
    # This relies on prev. weights and will be used in fact in the NEXT step.
    update_weights_seq_batch = list()    
    for i_seq in range(SEQ_LENGTH):
        update_weights_seq_batch.append(tf.add(GAMMA * prev_update_weights_seq_batch[i_seq],
                                               read_weights_seq_batch[i_seq] + write_weights_seq_batch[i_seq],
                                               name="Update_weights_uw"))
    # Add histograms to TensorBoard.
    update_weights_seq_batch_hist = tf.summary.histogram("update_weights_seq_batch", update_weights_seq_batch)


# Merge all summaries.
merged_summaries = tf.summary.merge_all()

data_buffers shape = <unknown>
Seq length  = 5
Batch shape = <unknown>
norm_memory = Tensor("Read_head/l2_normalize_5:0", shape=(59, 6), dtype=float32)


In [9]:
def create_feed_dict():#batch_seq):
    """Creates a dictionaries for different sets: maps data onto Tensor placeholders."""
    feed_dict = dict()
    # Get next batch and create a feed dict.
    next_batch = train_batches.next()
    
    # Feed batch to input buffers.
    for i in range(SEQ_LENGTH + 1):
        feed_dict[data_buffers[i]] = next_batch[i]
    # Reset previous state and output
    for i in range(SEQ_LENGTH):
        feed_dict[prev_read_weights_seq_batch[i]] = prev_rw_seq_batch[i]
        feed_dict[prev_update_weights_seq_batch[i]] = prev_uw_seq_batch[i]
    #feed_dict[prev_read_weights_seq_batch] = prev_rw
    #feed_dict[prev_update_weights_seq_batch] = prev_uw
    
    
    return feed_dict # {prev_output: train_output_zeros, prev_state: train_state_zeros }

### Session execution

In [10]:
# Eventually clear the log dir.
if tf.gfile.Exists(LOG_DIR):
  tf.gfile.DeleteRecursively(LOG_DIR)
# Create (new) log dir.
tf.gfile.MakeDirs(LOG_DIR)

In [11]:

########################
# Execute graph.
sess=tf.InteractiveSession()

# Create summary writers, point them to LOG_DIR.
train_writer = tf.summary.FileWriter(LOG_DIR + '/train', sess.graph)
valid_writer = tf.summary.FileWriter(LOG_DIR + '/valid')
test_writer = tf.summary.FileWriter(LOG_DIR + '/test')

# Initialize global variables.
tf.global_variables_initializer().run()
print('Initialized')



#memory_, _, norm_memory_ = sess.run([memory_set, alpha_set, norm_memory])
print("Memory =\n",memory)
#print("norm_memory_ =\n",norm_memory_)

# Batch - of dimensions: SEQUENCE x BATCH x VECTOR SIZE
#batch_seq = np.array([[[0, 0, 1, 0],[0, 0, 1, 1]],
#             [[0, 1, 0, 0],[1, 0, 0, 0]],
#             [[0, 0, 1, 0],[0,1,0,0]],
#            [[0, 0, 1, 0],[0,1,0,0]]])
#batch_seq = np.array([[[0, 0, 1, 0]],
#             [[0, 1, 0, 0]],
#             [[-0.1, 0.2, 1, 0.1]],
#            [[-0.1, 0.2, 1, 0.1]]]) # "additional row"

#batch_seq = np.array([[[0, 0, 1, 0],[0, 1, 0, 0],[0, 0, 1, 0]],
#             [[0, 0, 1, 0],[1, 0, 0, 0],[0,1,0,0]]])
# Reset previous state and output
prev_rw_seq_batch = list()
prev_uw_seq_batch = list()
for i in range(SEQ_LENGTH):
    prev_rw_seq_batch.append(np.zeros([BATCH_SIZE, MEMORY_SIZE]))
    prev_uw_seq_batch.append(np.zeros([BATCH_SIZE, MEMORY_SIZE]))

#print("prev_uw_seq_batch=\n",prev_uw_seq_batch[0].shape)


#print("Batch=\n",batch_seq.shape)

for i in range(100):
    print("\n=================\nIteration = ",i)
    input_seq_batch_, norm_seq_batch_, similarity_seq_batch_, prev_rw_seq_batch, write_weights_seq_batch_, prev_uw_seq_batch, mem_update_, memory_, summaries  = sess.run([
        input_seq_batch, norm_seq_batch, similarity_seq_batch, read_weights_seq_batch, write_weights_seq_batch, update_weights_seq_batch, mem_update, memory_update_op, merged_summaries],
        feed_dict=create_feed_dict())#batch_seq))
    train_writer.add_summary(summaries, i)
    #for i in range(SEQ_LENGTH):
    #    print("inputs[",i, "] =\n",input_seq_batch_[i])

    #for i in range(SEQ_LENGTH):
    #    print("prev_rw_seq_batch[",i, "] = ",prev_rw_seq_batch[i])

    #for i in range(SEQ_LENGTH):
    #    print("write_weights_seq_batch_[",i, "] = ",write_weights_seq_batch_[i])

    #for i in range(SEQ_LENGTH):
    #    print("prev_uw_seq_batch[",i, "] = ",prev_uw_seq_batch[i])

    #print("mem_update =\n", mem_update_)

    #print("memory =\n ",memory_)

    
# Close writers and session.
train_writer.close()
valid_writer.close()
test_writer.close()
sess.close() 

Initialized
Memory =
 <tf.Variable 'Memory/Memory_M:0' shape=(59, 6) dtype=float32_ref>

Iteration =  0

Iteration =  1

Iteration =  2

Iteration =  3

Iteration =  4

Iteration =  5

Iteration =  6

Iteration =  7

Iteration =  8

Iteration =  9

Iteration =  10

Iteration =  11

Iteration =  12

Iteration =  13

Iteration =  14

Iteration =  15

Iteration =  16

Iteration =  17

Iteration =  18

Iteration =  19

Iteration =  20

Iteration =  21

Iteration =  22

Iteration =  23

Iteration =  24

Iteration =  25

Iteration =  26

Iteration =  27

Iteration =  28

Iteration =  29

Iteration =  30

Iteration =  31

Iteration =  32

Iteration =  33

Iteration =  34

Iteration =  35

Iteration =  36

Iteration =  37

Iteration =  38

Iteration =  39

Iteration =  40

Iteration =  41

Iteration =  42

Iteration =  43

Iteration =  44

Iteration =  45

Iteration =  46

Iteration =  47

Iteration =  48

Iteration =  49

Iteration =  50

Iteration =  51

Iteration =  52

Iteration =  53

Ite