In [2]:
from __future__ import print_function
import numpy as np
import random
import os
import string
import zipfile
import tensorflow as tf
import sys
from six.moves import range
from tensorflow.contrib.rnn import LSTMCell, GRUCell
import tensorflow.contrib.seq2seq as Seq2Seq

In [3]:
url = 'http://mattmahoney.net/dc/'

def maybe_download(filename, expected_bytes):
  """Download a file if not present, and make sure it's the right size."""
  if not os.path.exists(filename):
    filename, _ = urlretrieve(url + filename, filename)
  statinfo = os.stat(filename)
  if statinfo.st_size == expected_bytes:
    print('Found and verified %s' % filename)
  else:
    print(statinfo.st_size)
    raise Exception(
      'Failed to verify ' + filename + '. Can you get to it with a browser?')
  return filename

filename = maybe_download('text8.zip', 31344016)

Found and verified text8.zip


In [4]:
def read_data(filename):
  with zipfile.ZipFile(filename) as f:
    name = f.namelist()[0]
    data = tf.compat.as_str(f.read(name))
  return data
  
text = read_data(filename)
print('Data size %d' % len(text))

Data size 100000000


In [5]:
valid_size = 1000
valid_text = text[:valid_size]
train_text = text[valid_size:]
train_size = len(train_text)
print(train_size, train_text[:64])
print(valid_size, valid_text[:64])


99999000 ons anarchists advocate social relations based upon voluntary as
1000  anarchism originated as a term of abuse first used against earl


In [6]:
vocabulary_size = len(string.ascii_lowercase) + 2 # [a-z] + ' ' + '_' (padding)
first_letter = ord(string.ascii_lowercase[0])

def char2id(char):
  if char in string.ascii_lowercase:
    return ord(char) - first_letter + 1
  elif char == ' ':
    return 27
  else:
    print('Unexpected character: %s' % char)
    return 0
  
def id2char(dictid):
  if dictid==27:
    return ' '
  if dictid > 0:
    return chr(dictid + first_letter - 1)
  else:
    return '_'

print(char2id('a'), char2id('z'), char2id(' '), char2id('ï'))
print(id2char(1), id2char(26), id2char(0))

Unexpected character: ï
1 26 27 0
a z _


In [62]:
batch_size=32 # number of sentences (axis=1 of batch array) 
num_unrollings=3 # number of words
maxlen = 30 # 30 characters at most. pad with 0 '_'

class BatchGenerator(object):
  def __init__(self, text, batch_size, num_unrollings):
    self._text = text
    self._words = text.split()
    self._num_words = len(self._words)
    self._batch_size = batch_size
    self._num_unrollings = num_unrollings
    segment = self._num_words // batch_size # larger than num_unrollings, to prevent getting same phrases in different... 
    # sentences, however the code does not assert so
    self._cursor = [ offset * segment for offset in range(batch_size)] # each batch/sentence has its own cursor, positioned...
    # "segment" indices away
  
  def _next_batch(self):
    """Generate a single batch from the current cursor position in the data."""
    
    batches = np.zeros(shape=[maxlen,self._batch_size],dtype=np.int32)
    seqlen = list()
    labels = np.zeros(shape=[maxlen,self._batch_size],dtype=np.int32)
    for b in range(self._batch_size):
      s = list()
      l = list()
      for x in range(num_unrollings):
        w = self._words[self._cursor[b]]  
        s.extend([char2id(i) for i in w])
        l.extend([char2id(i) for i in reversed(w)])
        if x < num_unrollings-1:
          s.extend([27])
          l.extend([27])
        self._cursor[b] = (self._cursor[b] + 1) % self._num_words # move a cursor of a sentence/batch to next char...
      seqlen.extend([len(s)])
      while len(s)<maxlen:
        s.extend([0])
        l.extend([0])
      if(len(s)>maxlen):
        s=s[:maxlen]
        l=l[:maxlen]
        seqlen[-1]=maxlen
      batches[:,b]=s
      labels[:,b]=l
    return batches,seqlen,labels


def characters(probabilities):
  """Turn a 1-hot encoding or a probability distribution over the possible
  characters back into its (most likely) character representation."""
  return [id2char(c) for c in np.argmax(probabilities, 1)]


def batches2string(batches):
  #Convert a batch double-list to their string representation.
  S = list()
  for i in range(batches.shape[1]):
    s=''
    for j in range(maxlen):
      s = s + id2char(batches[j,i])
    S.append(s)
  return S
            
def labels2string(labels):
  #Convert a batch double-list to their string representation.
  S = list()
  for i in labels:
    s=''
    for j in i:
      s = s + id2char(j)
    S.append(s)
  return S

train_batches = BatchGenerator(train_text, batch_size, num_unrollings) 
valid_batches = BatchGenerator(valid_text, 1, num_unrollings)

print(batches2string(train_batches._next_batch()[0]))
#print(train_batches._next_batch()[1])
print(batches2string(valid_batches._next_batch()[2]))

['ons anarchists advocate_______', 'her novels to_________________', 'alc cer do____________________', 'eight zero and________________', 'they were seen________________', 'a combo amplifier_____________', 'a game against________________', 'for the practiced_____________', 'nitrogen dioxide sulfur_______', 'heat engine acts______________', 'way the shoemakers____________', 'zero zloty coin_______________', 'illustration to don___________', 'by his second_________________', 'used credit card______________', 'one two seven_________________', 'drupelets because each________', 'paul jones one________________', 'organization inmarsat interpol', 'people foreign support________', 'the horror writers____________', 'of ireland string_____________', 'connectivity at all___________', 'one day each__________________', 'one p v_______________________', 'by the loss___________________', 'groom at a____________________', 'charles taylor accepted_______', 'nine seven zero_______________', 'throughout m

In [83]:
num_nodes = 64
embedding_size = 128

graph = tf.Graph()
with graph.as_default():
    
  embedding_encoder = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], -0.1, 0.1,dtype=tf.float32))
  encoder_inputs = tf.placeholder(shape=[None,batch_size], dtype=tf.int32)
  encoder_lengths=tf.placeholder(dtype=tf.int32,shape=batch_size)
  encoder_emb_inp = tf.nn.embedding_lookup(embedding_encoder, encoder_inputs)
 
  encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(num_nodes)
  encoder_outputs, encoder_state = tf.nn.dynamic_rnn(encoder_cell, encoder_emb_inp, dtype=tf.float32, time_major=True,\
                                                    sequence_length=encoder_lengths)
  del encoder_outputs    
  
  embedding_decoder = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], -0.1, 0.1,dtype=tf.float32))
  decoder_inputs = tf.placeholder(shape=[None,batch_size], dtype=tf.int32)
  decoder_emb_inp = tf.nn.embedding_lookup(embedding_decoder, decoder_inputs)
  decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(num_nodes) 
  decoder_lengths=tf.placeholder(dtype=tf.int32,shape=batch_size)
  helper = tf.contrib.seq2seq.TrainingHelper(decoder_emb_inp,sequence_length= decoder_lengths, time_major=True)
   
  
  proj_layer=tf.layers.Dense(vocabulary_size,activation=None,use_bias=False)
  decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, helper, encoder_state,output_layer=proj_layer)   
  decoder_outputs,_,_ = tf.contrib.seq2seq.dynamic_decode(decoder,output_time_major=True)
  
  decoder_logits = decoder_outputs.rnn_output
  decoder_prediction= decoder_outputs.sample_id
  print(decoder_logits)
  decoder_targets=tf.one_hot(decoder_inputs, depth=vocabulary_size, dtype=tf.float32)
  print(decoder_targets)
  loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
    labels=decoder_targets,logits=decoder_logits))
 
  params = tf.trainable_variables()
  gradients = tf.gradients(loss, params)
  clipped_gradients, _ = tf.clip_by_global_norm(gradients, 2)

  optimizer = tf.train.AdamOptimizer(0.0005)
  update_step = optimizer.apply_gradients(zip(clipped_gradients, params)) 
   

Tensor("decoder/TensorArrayStack/TensorArrayGatherV3:0", shape=(?, 32, 28), dtype=float32)
Tensor("one_hot:0", shape=(?, 32, 28), dtype=float32)


In [84]:
num_steps = 7001

with tf.Session(graph=graph) as session:
  tf.global_variables_initializer().run()
  print('Initialized')
  
  for step in range(num_steps):
    batches,seqlen,labels = train_batches._next_batch()
    #print(seqlen)
    #print(maxlen*batch_size-sum(seqlen))
    feed_dict = dict()
    feed_dict[encoder_inputs] = batches
    feed_dict[decoder_inputs] = labels
    feed_dict[encoder_lengths] = seqlen
    feed_dict[decoder_lengths] = seqlen
    _,l, predictions=session.run([update_step, loss, decoder_prediction],feed_dict=feed_dict)
    print(batches2string(prediction))
    

Initialized


InvalidArgumentError: logits and labels must be broadcastable: logits_size=[832,28] labels_size=[960,28]
	 [[Node: softmax_cross_entropy_with_logits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](softmax_cross_entropy_with_logits/Reshape, softmax_cross_entropy_with_logits/Reshape_1)]]

Caused by op 'softmax_cross_entropy_with_logits', defined at:
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\ipykernel\kernelapp.py", line 486, in start
    self.io_loop.start()
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\tornado\platform\asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\asyncio\base_events.py", line 422, in run_forever
    self._run_once()
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\asyncio\base_events.py", line 1434, in _run_once
    handle._run()
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\tornado\platform\asyncio.py", line 122, in _handle_events
    handler_func(fileobj, events)
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\tornado\stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\zmq\eventloop\zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\zmq\eventloop\zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\zmq\eventloop\zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\tornado\stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\ipykernel\ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\ipykernel\zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2901, in run_ast_nodes
    if self.run_code(code, result):
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2961, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-83-add5807adaf7>", line 34, in <module>
    labels=decoder_targets,logits=decoder_logits))
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 1879, in softmax_cross_entropy_with_logits_v2
    precise_logits, labels, name=name)
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\gen_nn_ops.py", line 7738, in softmax_cross_entropy_with_logits
    name=name)
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py", line 3414, in create_op
    op_def=op_def)
  File "c:\users\phill\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py", line 1740, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): logits and labels must be broadcastable: logits_size=[832,28] labels_size=[960,28]
	 [[Node: softmax_cross_entropy_with_logits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](softmax_cross_entropy_with_logits/Reshape, softmax_cross_entropy_with_logits/Reshape_1)]]
