In [1]:
import numpy as np
import tensorflow as tf
import jamo
import os

In [2]:
ROOT = 'ᐁ' # Root of sentence symbol
MS = 'ᑌ' # morpheme separator symbol
WS = 'ᐯ' # word separator symbol
EOS = 'ᕒ' # end of sentence symbol
ESC_BEGIN = 'ᐸ' # beginning of escape sequence symbol
ESC_END = 'ᐳ' # end of escape sequence symbol
PADDING = 'ᒣ' # padding after end-of-sentence
MASK = 'ᗰ' # masking symbol
hangul = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] + \
    [ESC_BEGIN, ESC_END, ROOT, MS, WS, EOS, PADDING, MASK] + \
    [chr(i) for i in range(0x1100, 0x1113)] + \
    [chr(i) for i in range(0x1161, 0x1176)] + \
    [chr(i) for i in range(0x11A8, 0x11C3)] + \
    [' ', '(', ')', '.', ',', '?', '\""', '\'']
PADDING_IDX = hangul.index(PADDING)
MASK_IDX = hangul.index(MASK)
def encode_string(s):
    compat_jamos = [chr(i) for i in range(0x3131, 0x314f)]
    s = jamo.h2j(s)
    s = "".join(jamo.hcj2j(ch, "tail") if ch in compat_jamos else ch for ch in s)
    def escape(ch):
        return [10] + [hangul.index(c) for c in str(ord(ch))] + [11]
    result = []
    for ch in s:
        if ch in hangul:
            result.append(hangul.index(ch))
        else:
            result += escape(ch)
    return result

def decode_string(s):
    result = []
    i = 0
    while i < len(s):
        if s[i] == PADDING_IDX:
            break
        if s[i] == 10:
            ch = '0'
            i += 1
            while i < len(s) and s[i] < 10:
                ch += str(s[i])
                i += 1
            result.append(chr(int(ch)))
        else:
            result.append(hangul[s[i]])
        i += 1
    return "".join(result)

In [3]:
max_in_length = 600
max_out_length = 700
max_dep_length = 90

"""
CoNLL-U Format
ID: Word index, integer starting at 1 for each new sentence; may be a range for multiword tokens; may be a decimal number for empty nodes (decimal numbers can be lower than 1 but must be greater than 0).
FORM: Word form or punctuation symbol.
LEMMA: Lemma or stem of word form.
UPOS: Universal part-of-speech tag.
XPOS: Language-specific part-of-speech tag; underscore if not available.
FEATS: List of morphological features from the universal feature inventory or from a defined language-specific extension; underscore if not available.
HEAD: Head of the current word, which is either a value of ID or zero (0).
DEPREL: Universal dependency relation to the HEAD (root iff HEAD = 0) or a defined language-specific subtype of one.
DEPS: Enhanced dependency graph in the form of a list of head-deprel pairs.
MISC: Any other annotation.
"""
def read_conllu(filenames):
    texts = []
    morphs = []
    depends = []
    for filename in filenames:
        with open(filename) as fp:
            for line in fp.readlines():
                if line.startswith('#'):
                    if line.startswith('# text = '):
                        texts.append([])
                        morphs.append([ROOT])
                        depends.append([])
                else:
                    split = line.split('\t')
                    if len(split) != 10:
                        continue
                    idx, form, lemma, upos, xpos, feats, head, deprel, deps, misc = split
                    
                    if "SpaceAfter=No" not in misc:
                        form += ' '
                    texts[-1].append(encode_string(form))
                    
                    lemma = MS.join(lemma.split('+')) + WS
                    morphs[-1].append(lemma)
                    depends[-1].append(int(head) - 1)
    
    for i in range(len(morphs)):
        morphs[i][-1] += EOS
    morphs = [[encode_string(w) for w in m] for m in morphs]
    
    dep_lengths = []
    depend_idxs = []
    depend_aligns = []
    for text, morph, depend in zip(texts, morphs, depends):
        word_cum_lengths = np.cumsum([len(w) for w in text])
        morph_cum_lengths = np.cumsum([len(w) for w in morph])
        
        indices = morph_cum_lengths[1:] - 1
        pad_size = max_dep_length - len(indices)
        indices = np.pad(indices, (0, pad_size), 'constant')
        depend_idxs.append(indices)
        
        for i in range(len(depend)):
            depend[i] = word_cum_lengths[i] - 1
        dep_lengths.append(len(depend))
        depend_aligns.append(np.pad(depend, (0, pad_size), 'constant'))
    
    text_lengths = []
    for i in range(len(texts)):
        texts[i] = sum(texts[i], [])
        text_lengths.append(len(texts[i]))
        texts[i] += [PADDING_IDX] * (max_in_length - len(texts[i]))
        
    out_lengths = []
    for i in range(len(morphs)):
        morphs[i] = sum(morphs[i], [])
        out_lengths.append(len(morphs[i]))
        morphs[i] += [PADDING_IDX] * (max_out_length - len(morphs[i]))
    
    return {'inputs': np.array(texts, dtype=np.int32), 
            'in_lengths': np.array(text_lengths, dtype=np.int32),
            'depend_idxs': np.array(depend_idxs, dtype=np.int32),
            'depends': np.array(depend_aligns, dtype=np.int32),
            'dep_lengths': np.array(dep_lengths, dtype=np.int32), 
            'morphs': np.array(morphs, dtype=np.int32),
            'out_lengths': np.array(out_lengths, dtype=np.int32)}

In [4]:
reparse = False

print("Reading input files...")
if not reparse and os.path.exists('train.npy'):
    print("Reading from .npy...")
    train = np.load('train.npy').item()
    test = np.load('test.npy').item()
else:
    print("Parsing ConLLU database...")
    train = read_conllu([
        'UD_Korean-GSD/ko_gsd-ud-train.conllu',
        'UD_Korean-kaist/ko_kaist-ud-train.conllu'])
    test  = read_conllu([
        'UD_Korean-GSD/ko_gsd-ud-test.conllu',
        'UD_Korean-kaist/ko_kaist-ud-test.conllu'])

    # Save to file for later
    np.save('train.npy', train)
    np.save('test.npy', test)
    
print("Done.")
print("Training set size:", len(train['inputs']))
print("Test set size:", len(test['inputs']))

Reading input files...
Reading from .npy...
Done.
Training set size: 27410
Test set size: 3276


In [5]:
class ConcatOutputAndAttentionWrapper(tf.contrib.rnn.RNNCell):
    '''Concatenates RNN cell output with the attention context vector.

    This is expected to wrap a cell wrapped with an AttentionWrapper constructed with
    attention_layer_size=None and output_attention=False. Such a cell's state will include an
    "attention" field that is the context vector.
    '''
    def __init__(self, cell):
        super(ConcatOutputAndAttentionWrapper, self).__init__()
        self._cell = cell

    @property
    def state_size(self):
        return self._cell.state_size

    @property
    def output_size(self):
        return self._cell.output_size + self._cell.state_size.attention

    def call(self, inputs, state):
        output, res_state = self._cell(inputs, state)
        return tf.concat([output, res_state.attention], axis=-1), res_state

    def zero_state(self, batch_size, dtype):
        return self._cell.zero_state(batch_size, dtype)

In [6]:
with tf.variable_scope('model', reuse=tf.AUTO_REUSE):
    char_embed_table = tf.get_variable('embedding', 
                            [len(hangul), 256], # number of symbols, embedding vector size
                            dtype=tf.float32,
                            initializer=tf.truncated_normal_initializer(stddev=0.5))

In [7]:
class Encoder:
    # inputs: (batch, input_length)
    def __init__(self, inputs, lengths, is_training):
        self.inputs = inputs
        self.lengths = lengths
        
        char_embedded_inputs = tf.nn.embedding_lookup(char_embed_table, inputs)
        
        # 3 convolution layers
        x = char_embedded_inputs
        with tf.variable_scope('prenet'):
            layer_sizes = [256, 256, 256]
            drop_rate = 0.1 if is_training else 0.0
            for i, size in enumerate(layer_sizes):
                conv_layer = tf.layers.Conv1D(filters=size, # number of output channels
                                              kernel_size=5,
                                              padding="same",
                                              activation=tf.nn.relu,
                                              name="conv_{}".format(i))
                x = conv_layer.apply(x)
                tf.layers.dropout(x, 
                                  rate=drop_rate, 
                                  name="dropout_{}".format(i))
        conv_result = x
        
        num_hidden = 128
        lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(num_hidden, forget_bias=1.0)
        lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(num_hidden, forget_bias=1.0)
        outputs, rnn_states = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=lstm_fw_cell,
            cell_bw=lstm_bw_cell,
            inputs=conv_result,
            sequence_length=lengths,
            dtype=tf.float32)
        output_concat = tf.concat(list(outputs), -1)
        
        self.output = output_concat

In [8]:
def batch_scatter(indices, updates, shape):
    updates = tf.reshape(updates, [-1, shape[2]])
    indices = indices + tf.expand_dims(tf.range(0, shape[0]) * shape[1], 1)
    indices = tf.reshape(indices, [-1, 1])

    scatter = tf.scatter_nd(indices, updates, [shape[0]*shape[1], shape[2]])
    scatter = tf.reshape(scatter, shape)
    return scatter

In [18]:
class Decoder:
    # encoder_outputs: (batch, input_length, 256)
    # depend_targets: (batch, input_length)
    def __init__(self, encoder_outputs, depend_targets, depend_idxs, morph_targets, out_lengths, is_training):
        self.out_lengths = out_lengths
        
        cells = []
        num_hidden = 256
        keep_rate = 0.9 if is_training else 1.0
        for layer_index in range(2):
            lstm_cell = tf.nn.rnn_cell.LSTMCell(num_hidden, forget_bias=1.0)
            cell = tf.contrib.rnn.DropoutWrapper(lstm_cell, input_keep_prob=keep_rate)
            cells.append(cell)
        prenet = tf.contrib.rnn.MultiRNNCell(cells)
        
        attention_size = 256
        attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
            attention_size, 
            encoder_outputs,
            normalize=True)
        
        attention_cell = ConcatOutputAndAttentionWrapper(
            tf.contrib.seq2seq.AttentionWrapper(
                prenet, 
                attention_mechanism,
                output_attention=False))
        
        # lookup encoder outputs from dependency indices
        self.depend_contexts = tf.batch_gather(encoder_outputs, depend_targets)
        
        shape = [tf.shape(encoder_outputs)[0], tf.shape(morph_targets)[1], encoder_outputs.shape[2]]
        depend_contexts_sparse = batch_scatter(depend_idxs, self.depend_contexts, shape)
        
        # lookup char embeddings
        morph_embedded = tf.nn.embedding_lookup(char_embed_table, morph_targets)
        
        decoder_inputs = tf.concat([morph_embedded, depend_contexts_sparse], axis=-1) # (batch, max_out_len, 512)
        
        # mask 15% of the lengths of the decoder inputs randomly
        if is_training:
            print("training mask")
        
            batch_size = tf.shape(decoder_inputs)[0]
            max_out_len = tf.shape(decoder_inputs)[1]
            
            mask_lengths = tf.to_int32(tf.to_float(out_lengths) * 0.15)
            offset_bounds = out_lengths - mask_lengths - 1
            offsets = tf.to_int32(tf.random.uniform([batch_size]) * tf.to_float(offset_bounds))
            
            rng = tf.range(max_out_len)
            mask = tf.math.logical_and(
                tf.expand_dims(rng, 0) >= tf.expand_dims(offsets, -1), 
                tf.expand_dims(rng, 0) < tf.expand_dims(offsets + mask_lengths, -1))
            mask = tf.broadcast_to(tf.expand_dims(mask, -1), tf.shape(decoder_inputs))
            
            mask_symbol = tf.concat([char_embed_table[MASK_IDX], tf.zeros([num_hidden])], -1)
            mask_symbol = tf.expand_dims(tf.expand_dims(mask_symbol, 0), 0)
            mask_symbol = tf.broadcast_to(mask_symbol, tf.shape(decoder_inputs))
            
            decoder_inputs = tf.where(mask, mask_symbol, decoder_inputs)
            
        self.decoder_inputs = decoder_inputs
        
        output, rnn_states = tf.nn.dynamic_rnn(
            cell=attention_cell,
            inputs=decoder_inputs,
            sequence_length=out_lengths,
            dtype=tf.float32)
        
        self.char_output = tf.layers.dense(output[:, :, :num_hidden], len(hangul))
        depend_output = output[:, :, num_hidden:]
        
        self.depend_output = tf.batch_gather(depend_output, depend_idxs)

In [10]:
def cosine_distance(a, b):
    return tf.einsum('ijk,ijk->ij', a, b) / (tf.norm(a, axis=2) * tf.norm(b, axis=2))

class Model:
    def __init__(self, batch, is_training):
        with tf.variable_scope('model', reuse=tf.AUTO_REUSE):
            self.encoder = encoder = Encoder(batch['inputs'], batch['in_lengths'], is_training)
            self.decoder = decoder = Decoder(encoder.output, batch['depends'], batch['depend_idxs'],
                              batch['morphs'], batch['out_lengths'], is_training)

            # dependency analyzer loss
            depend_seq_loss = cosine_distance(decoder.depend_contexts, decoder.depend_output)
            
            depend_mask = tf.math.equal(batch['depends'], 0)
            depend_count = tf.to_float(tf.reduce_sum(batch['dep_lengths']))
            
            depend_masked_loss = tf.where(depend_mask, tf.zeros_like(depend_seq_loss), depend_seq_loss)
            self.depend_loss = tf.reduce_sum(depend_masked_loss) / depend_count

            # morpheme analyzer loss
            char_seq_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=batch['morphs'],
                logits=decoder.char_output)

            mask = tf.math.equal(batch['morphs'], PADDING_IDX)
            total_length = tf.to_float(tf.reduce_sum(batch['out_lengths']))

            char_masked_loss = tf.where(mask, tf.zeros_like(char_seq_loss), char_seq_loss)
            self.char_loss = tf.reduce_sum(char_masked_loss) / total_length

            # sum of the losses
            self.total_loss = self.depend_loss + self.char_loss

            # training-specific
            self.global_step = tf.get_variable("global_step", shape=[], trainable=False,
                                      initializer=tf.zeros_initializer, dtype=tf.int32)

            step = tf.cast(self.global_step + 1, dtype=tf.float32)

            learning_rate = 1e-4 * tf.train.exponential_decay(1., step, 3000, 0.95)

            optimizer = tf.train.AdamOptimizer(learning_rate, 0.9, 0.999)
            gradients, variables = zip(*optimizer.compute_gradients(self.total_loss))
            clipped_gradients, _ = tf.clip_by_global_norm(gradients, 1.0)

            self.optimize = optimizer.apply_gradients(zip(clipped_gradients, variables), global_step=self.global_step)
            
            self.training_summary = tf.summary.merge([
                tf.summary.scalar("total_loss", self.total_loss),
                tf.summary.scalar("char_loss", self.char_loss),
                tf.summary.scalar("depend_loss", self.depend_loss)
            ])
            
            self.validation_summary = tf.summary.merge([
                tf.summary.scalar("validation_total_loss", self.total_loss),
                tf.summary.scalar("validation_char_loss", self.char_loss),
                tf.summary.scalar("validation_depend_loss", self.depend_loss)
            ])

In [20]:
batch_size = 8

inputs_placeholder = tf.placeholder(name='inputs_placeholder', 
                                    shape=(None, max_in_length), 
                                    dtype=tf.int32)
in_lengths_placeholder = tf.placeholder(name='in_lengths_placeholder',
                                        shape=(None),
                                        dtype=tf.int32)
depend_idxs_placeholder = tf.placeholder(name='depend_idxs_placeholder',
                                         shape=(None, max_dep_length),
                                         dtype=tf.int32)
depends_placeholder = tf.placeholder(name='depends_placeholder',
                                     shape=(None, max_dep_length),
                                     dtype=tf.int32)
dep_lengths_placeholder = tf.placeholder(name='dep_lengths_placeholder',
                                         shape=(None),
                                         dtype=tf.int32)
morphs_placeholder = tf.placeholder(name='morphs_placeholder',
                                    shape=(None, max_out_length),
                                    dtype=tf.int32)
out_lengths_placeholder = tf.placeholder(name='out_lengths_placeholder',
                                         shape=(None),
                                         dtype=tf.int32)

dataset = tf.data.Dataset.from_tensor_slices({
    'inputs': inputs_placeholder,
    'in_lengths': in_lengths_placeholder, 
    'depend_idxs': depend_idxs_placeholder,
    'depends': depends_placeholder,
    'dep_lengths': dep_lengths_placeholder,
    'morphs': morphs_placeholder,
    'out_lengths': out_lengths_placeholder
})
train_dataset = dataset.shuffle(buffer_size=10000)
train_dataset = train_dataset.repeat()
train_dataset = train_dataset.batch(batch_size)

test_dataset = dataset.take(batch_size)
test_dataset = test_dataset.batch(batch_size)
    
iterator = train_dataset.make_initializable_iterator()

model = Model(iterator.get_next(), True)

training marker


In [12]:
# initiate session
saver = tf.train.Saver()
sess = tf.Session()

train_writer = tf.summary.FileWriter("logs", sess.graph)
sess.run(tf.global_variables_initializer())
feed_dict={
    inputs_placeholder: train['inputs'],
    in_lengths_placeholder: train['in_lengths'],
    depend_idxs_placeholder: train['depend_idxs'],
    depends_placeholder: train['depends'],
    dep_lengths_placeholder: train['dep_lengths'],
    morphs_placeholder: train['morphs'],
    out_lengths_placeholder: train['out_lengths']
}
sess.run(iterator.initializer, feed_dict=feed_dict)

In [13]:
# restore checkpoint
saver.restore(sess, "models/charrnn-4200")

INFO:tensorflow:Restoring parameters from models/charrnn-4200


In [22]:
with tf.variable_scope('test', reuse=tf.AUTO_REUSE):
    test_dataset = dataset.take(batch_size)
    test_dataset = test_dataset.batch(batch_size)
    train_iterator = test_dataset.make_one_shot_iterator()

    test_model = Model(train_iterator.get_next(), False)

ValueError: Cannot capture a placeholder (name:dep_lengths_placeholder_1, type:Placeholder) by value.

In [21]:
# test some samples

def cosine_distance_np(a, b):
    normalize = np.linalg.norm(a, axis=1, keepdims=True) * np.linalg.norm(b, axis=0, keepdims=True)
    return np.einsum('ki,ij->kj', a, b) / normalize

input_string, lengths, embed_table, char_output, decoder_input = sess.run(
    (test_model.encoder.inputs, test_model.decoder.out_lengths, char_embed_table, 
     test_model.decoder.char_output, test_model.decoder.decoder_inputs))

for i in range(batch_size):
    print(decode_string(input_string[i]))
    print(decode_string(np.argmax(char_output[i, :lengths[i]], axis=1)))
    print(decoder_input[i, :, 0])
    #scores = cosine_distance_np(char_output[i][:lengths[i]], embed_table)

FailedPreconditionError: Attempting to use uninitialized value test/model/dense/kernel
	 [[node test/model/dense/kernel/read (defined at <ipython-input-9-14069d675e7a>:69)  = Identity[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"](test/model/dense/kernel)]]

Caused by op 'test/model/dense/kernel/read', defined at:
  File "/usr/lib/python3.5/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.5/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.5/dist-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.5/asyncio/base_events.py", line 421, in run_forever
    self._run_once()
  File "/usr/lib/python3.5/asyncio/base_events.py", line 1424, in _run_once
    handle._run()
  File "/usr/lib/python3.5/asyncio/events.py", line 126, in _run
    self._callback(*self._args)
  File "/usr/local/lib/python3.5/dist-packages/tornado/platform/asyncio.py", line 122, in _handle_events
    handler_func(fileobj, events)
  File "/usr/local/lib/python3.5/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py", line 456, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py", line 486, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py", line 438, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2843, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2869, in _run_cell
    return runner(coro)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 3044, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 3209, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 3291, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-d93a16616732>", line 7, in <module>
    test_model = Model(train_iterator.get_next(), False)
  File "<ipython-input-10-6daf212d6ad1>", line 9, in __init__
    batch['morphs'], batch['out_lengths'], is_training)
  File "<ipython-input-9-14069d675e7a>", line 69, in __init__
    self.char_output = tf.layers.dense(output[:, :, :num_hidden], len(hangul))
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/layers/core.py", line 184, in dense
    return layer.apply(inputs)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/engine/base_layer.py", line 817, in apply
    return self.__call__(inputs, *args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/layers/base.py", line 374, in __call__
    outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/engine/base_layer.py", line 746, in __call__
    self.build(input_shapes)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/layers/core.py", line 944, in build
    trainable=True)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/layers/base.py", line 288, in add_weight
    getter=vs.get_variable)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/engine/base_layer.py", line 609, in add_weight
    aggregation=aggregation)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/checkpointable/base.py", line 639, in _add_variable_with_custom_getter
    **kwargs_for_getter)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variable_scope.py", line 1487, in get_variable
    aggregation=aggregation)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variable_scope.py", line 1237, in get_variable
    aggregation=aggregation)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variable_scope.py", line 540, in get_variable
    aggregation=aggregation)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variable_scope.py", line 492, in _true_getter
    aggregation=aggregation)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variable_scope.py", line 922, in _get_single_variable
    aggregation=aggregation)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variables.py", line 183, in __call__
    return cls._variable_v1_call(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variables.py", line 146, in _variable_v1_call
    aggregation=aggregation)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variables.py", line 125, in <lambda>
    previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variable_scope.py", line 2444, in default_variable_creator
    expected_shape=expected_shape, import_scope=import_scope)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variables.py", line 187, in __call__
    return super(VariableMetaclass, cls).__call__(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variables.py", line 1329, in __init__
    constraint=constraint)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variables.py", line 1491, in _init_from_args
    self._snapshot = array_ops.identity(self._variable, name="read")
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_ops.py", line 81, in identity
    return gen_array_ops.identity(input, name=name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 3454, in identity
    "Identity", input=input, name=name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
    op_def=op_def)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

FailedPreconditionError (see above for traceback): Attempting to use uninitialized value test/model/dense/kernel
	 [[node test/model/dense/kernel/read (defined at <ipython-input-9-14069d675e7a>:69)  = Identity[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"](test/model/dense/kernel)]]


In [64]:
# run
while True:
    _, loss, step, log = sess.run((model.optimize, model.total_loss, model.global_step, model.training_summary))
    
    train_writer.add_summary(log, step)
    
    if step % 10 == 0:
        print("{}: {}".format(step, loss))
        
        save_path = saver.save(sess, "models/charrnn", step)
        print('Saved to', save_path)

440: 0.059922635555267334
Saved to models/charrnn-440
450: -0.18869411945343018
Saved to models/charrnn-450
460: -0.10303229093551636
Saved to models/charrnn-460
470: -0.1552022099494934
Saved to models/charrnn-470
480: -0.10200667381286621
Saved to models/charrnn-480
490: -0.05547904968261719
Saved to models/charrnn-490
500: -0.14164602756500244
Saved to models/charrnn-500
510: -0.24593961238861084
Saved to models/charrnn-510
520: -0.1792566180229187
Saved to models/charrnn-520
530: -0.14285480976104736
Saved to models/charrnn-530
540: -0.09570342302322388
Saved to models/charrnn-540
550: -0.04417073726654053
Saved to models/charrnn-550
560: -0.2414391040802002
Saved to models/charrnn-560
570: -0.22570860385894775
Saved to models/charrnn-570
580: -0.2700360417366028
Saved to models/charrnn-580
590: -0.25528889894485474
Saved to models/charrnn-590
600: -0.2968563437461853
Saved to models/charrnn-600
610: -0.28264570236206055
Saved to models/charrnn-610
620: -0.19650888442993164
Saved t

1940: -0.4804348945617676
Saved to models/charrnn-1940
1950: -0.4864816665649414
Saved to models/charrnn-1950
1960: -0.49740123748779297
Saved to models/charrnn-1960
1970: -0.4485020637512207
Saved to models/charrnn-1970
1980: -0.49094080924987793
Saved to models/charrnn-1980
1990: -0.5039564371109009
Saved to models/charrnn-1990
2000: -0.5033395290374756
Saved to models/charrnn-2000
2010: -0.5135948061943054
Saved to models/charrnn-2010
2020: -0.39885735511779785
Saved to models/charrnn-2020
2030: -0.4721015691757202
Saved to models/charrnn-2030
2040: -0.5031055212020874
Saved to models/charrnn-2040
2050: -0.5098269581794739
Saved to models/charrnn-2050
2060: -0.4815976619720459
Saved to models/charrnn-2060
2070: -0.48835116624832153
Saved to models/charrnn-2070
2080: -0.4704509973526001
Saved to models/charrnn-2080
2090: -0.5286684036254883
Saved to models/charrnn-2090
2100: -0.44491422176361084
Saved to models/charrnn-2100
2110: -0.48395872116088867
Saved to models/charrnn-2110
2120

3430: -0.4880707859992981
Saved to models/charrnn-3430
3440: -0.5509637594223022
Saved to models/charrnn-3440
3450: -0.5092551708221436
Saved to models/charrnn-3450
3460: -0.5341881513595581
Saved to models/charrnn-3460
3470: -0.525292158126831
Saved to models/charrnn-3470
3480: -0.5004032254219055
Saved to models/charrnn-3480
3490: -0.48614048957824707
Saved to models/charrnn-3490
3500: -0.5389701128005981
Saved to models/charrnn-3500
3510: -0.5100560188293457
Saved to models/charrnn-3510
3520: -0.4937590956687927
Saved to models/charrnn-3520
3530: -0.5226720571517944
Saved to models/charrnn-3530
3540: -0.5455232858657837
Saved to models/charrnn-3540
3550: -0.5306717157363892
Saved to models/charrnn-3550
3560: -0.5230414867401123
Saved to models/charrnn-3560
3570: -0.5041854381561279
Saved to models/charrnn-3570
3580: -0.5299737453460693
Saved to models/charrnn-3580
3590: -0.5173258781433105
Saved to models/charrnn-3590
3600: -0.5077744126319885
Saved to models/charrnn-3600
3610: -0.5

KeyboardInterrupt: 