In [10]:
# after http://r2rt.com/recurrent-neural-networks-in-tensorflow-ii.html
"""
Imports
"""
import numpy as np
import tensorflow as tf
%matplotlib inline
import matplotlib.pyplot as plt
import time
import os
import urllib.request
from tensorflow.models.rnn.ptb import reader

In [11]:
def gen_epochs(n, num_steps, batch_size):
    for i in range(n):
        yield reader.ptb_iterator(data, batch_size, num_steps)

def reset_graph():
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()


In [12]:
def train_network(g, num_epochs, num_steps = 120, batch_size = 32, verbose = True, save=False):
    tf.set_random_seed(2345)
    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        training_losses = []
        for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps, batch_size)):
            training_loss = 0
            steps = 0
            training_state = None
            for X, Y in epoch:
                steps += 1

                feed_dict={g['x']: X, g['y']: Y}
                if training_state is not None:
                    feed_dict[g['init_state']] = training_state
                training_loss_, training_state, _ = sess.run([g['total_loss'],
                                                      g['final_state'],
                                                      g['train_step']],
                                                             feed_dict)
                training_loss += training_loss_
            if verbose:
                print("Average training loss for Epoch", idx, ":", training_loss/steps)
            training_losses.append(training_loss/steps)

        if isinstance(save, str):
            g['saver'].save(sess, save)

    return training_losses

In [13]:
def build_graph(
    num_classes,
    cell_type = None,
    num_weights_for_custom_cell = 5,
    state_size = 128,
    batch_size = 32,
    num_steps = 120,
    num_layers = 3,
    build_with_dropout=False,
    learning_rate = 1e-4):

    reset_graph()

    x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
    y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')

    dropout = tf.constant(1.0)

    embeddings = tf.get_variable('embedding_matrix', [num_classes, state_size])

    rnn_inputs = tf.nn.embedding_lookup(embeddings, x)

    if cell_type == 'Custom':
        cell = CustomCell(state_size, num_weights_for_custom_cell)
    elif cell_type == 'GRU':
        cell = tf.nn.rnn_cell.GRUCell(state_size)
    elif cell_type == 'LSTM':
        cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)
    elif cell_type == 'LN_LSTM':
        cell = LayerNormalizedLSTMCell(state_size)
    else:
        cell = tf.nn.rnn_cell.BasicRNNCell(state_size)

    if build_with_dropout:
        cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=dropout)

    if cell_type == 'LSTM' or cell_type == 'LN_LSTM':
        cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
    else:
        cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers)

    if build_with_dropout:
        cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=dropout)

    init_state = cell.zero_state(batch_size, tf.float32)
    rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)

    with tf.variable_scope('softmax'):
        W = tf.get_variable('W', [state_size, num_classes])
        b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))

    #reshape rnn_outputs and y
    rnn_outputs = tf.reshape(rnn_outputs, [-1, state_size])
    y_reshaped = tf.reshape(y, [-1])

    logits = tf.matmul(rnn_outputs, W) + b

    predictions = tf.nn.softmax(logits)

    total_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y_reshaped))
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)

    return dict(
        x = x,
        y = y,
        init_state = init_state,
        final_state = final_state,
        total_loss = total_loss,
        train_step = train_step,
        preds = predictions,
        saver = tf.train.Saver()
    )

In [14]:
def ln(tensor, scope = None, epsilon = 1e-5):
    """ Layer normalizes a 2D tensor along its second axis """
    assert(len(tensor.get_shape()) == 2)
    m, v = tf.nn.moments(tensor, [1], keep_dims=True)
    if not isinstance(scope, str):
        scope = ''
    with tf.variable_scope(scope + 'layer_norm'):
        scale = tf.get_variable('scale',
                                shape=[tensor.get_shape()[1]],
                                initializer=tf.constant_initializer(1))
        shift = tf.get_variable('shift',
                                shape=[tensor.get_shape()[1]],
                                initializer=tf.constant_initializer(0))
    LN_initial = (tensor - m) / tf.sqrt(v + epsilon)

    return LN_initial * scale + shift

In [15]:
class LayerNormalizedLSTMCell(tf.nn.rnn_cell.RNNCell):
    """
    Adapted from TF's BasicLSTMCell to use Layer Normalization.
    Note that state_is_tuple is always True.
    """

    def __init__(self, num_units, forget_bias=1.0, activation=tf.nn.tanh):
        self._num_units = num_units
        self._forget_bias = forget_bias
        self._activation = activation

    @property
    def state_size(self):
        return tf.nn.rnn_cell.LSTMStateTuple(self._num_units, self._num_units)

    @property
    def output_size(self):
        return self._num_units

    def __call__(self, inputs, state, scope=None):
        """Long short-term memory cell (LSTM)."""
        with tf.variable_scope(scope or type(self).__name__):
            c, h = state

            # change bias argument to False since LN will add bias via shift
            concat = tf.nn.rnn_cell._linear([inputs, h], 4 * self._num_units, False)

            i, j, f, o = tf.split(1, 4, concat)

            # add layer normalization to each gate
            i = ln(i, scope = 'i/')
            j = ln(j, scope = 'j/')
            f = ln(f, scope = 'f/')
            o = ln(o, scope = 'o/')

            new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) *
                   self._activation(j))

            # add layer_normalization in calculation of new hidden state
            new_h = self._activation(ln(new_c, scope = 'new_h/')) * tf.nn.sigmoid(o)
            new_state = tf.nn.rnn_cell.LSTMStateTuple(new_c, new_h)

            return new_h, new_state

In [16]:
#file_name = '/home/key/Downloads/linux-4.9-rc7/kernel/all.c'
file_name = 'faust1.txt'
#file_name = 'racine.txt'

with open(file_name,'r') as f:
    raw_data = f.read()
    vocab = set(raw_data)
    
idx_to_vocab = dict(enumerate(vocab))
vocab_to_idx = dict(zip(idx_to_vocab.values(), idx_to_vocab.keys()))
vocab_size = len(vocab)

data = [vocab_to_idx[c] for c in raw_data]
print(raw_data[:100])
print(vocab)
print(idx_to_vocab)
del raw_data


  Zueignung.

  Ihr naht euch wieder, schwankende Gestalten,
  Die früh sich einst dem trüben Blick 
{"'", 'z', 'n', 'B', ' ', 'N', 'j', 'Ä', 'C', 'd', 'Ö', 'ß', 'X', 'H', 'v', 'A', 'O', 'W', 'e', 'I', 'T', 'J', 's', '!', 'F', '.', 'a', ';', 'Z', 'K', 'P', 'Ü', 't', 'G', 'i', 'f', 'q', 'M', '?', 'U', 'b', 'Q', 'l', 'ö', '"', 'c', 'ü', 'y', 'R', '(', 'h', 'S', '\n', ',', 'D', 'Y', 'w', 'k', 'g', 'ä', 'o', 'p', '-', 'V', 'm', 'E', 'r', 'L', 'u', ':', 'x', ')'}
{0: "'", 1: 'z', 2: 'n', 3: 'B', 4: ' ', 5: 'N', 6: 'j', 7: 'Ä', 8: 'C', 9: 'd', 10: 'Ö', 11: 'ß', 12: 'X', 13: 'H', 14: 'v', 15: 'A', 16: 'O', 17: 'W', 18: 'e', 19: 'I', 20: 'T', 21: 'J', 22: 's', 23: '!', 24: 'F', 25: '.', 26: 'a', 27: ';', 28: 'Z', 29: 'K', 30: 'P', 31: 'Ü', 32: 't', 33: 'G', 34: 'i', 35: 'f', 36: 'q', 37: 'M', 38: '?', 39: 'U', 40: 'b', 41: 'Q', 42: 'l', 43: 'ö', 44: '"', 45: 'c', 46: 'ü', 47: 'y', 48: 'R', 49: '(', 50: 'h', 51: 'S', 52: '\n', 53: ',', 54: 'D', 55: 'Y', 56: 'w', 57: 'k', 58: 'g', 59: 'ä', 60: '

In [23]:
def generate_characters(g, checkpoint, num_chars, prompt='A', pick_top_chars=None):
    """ Accepts a current character, initial state"""

    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        g['saver'].restore(sess, checkpoint)

        state = None
        current_char = vocab_to_idx[prompt]
        chars = [current_char]

        for i in range(num_chars):
            if state is not None:
                feed_dict={g['x']: [[current_char]], g['init_state']: state}
            else:
                feed_dict={g['x']: [[current_char]]}

            preds, state = sess.run([g['preds'],g['final_state']], feed_dict)

            if pick_top_chars is not None:
                p = np.squeeze(preds)
                p[np.argsort(p)[:-pick_top_chars]] = 0
                p = p / np.sum(p)
                current_char = np.random.choice(vocab_size, 1, p=p)[0]
            else:
                current_char = np.random.choice(vocab_size, 1, p=np.squeeze(preds))[0]

            chars.append(current_char)

    chars = map(lambda x: idx_to_vocab[x], chars)
    print("".join(chars))
    return("".join(chars))



In [24]:
#save = 'models/linux_GRU_128_120'
save = 'models/faust_GRU_128_120'
#save = 'models/racine_GRU_128_120'
#save = 'models/mickiewicz_GRU_128_120'
g1 = build_graph(num_classes=vocab_size, cell_type='GRU', num_steps=120)
t = time.time()
losses = train_network(g1, 20, num_steps=120, save=save)
print("It took", time.time() - t, "seconds to train for 20 epochs.")
print("The average loss on the final epoch was:", losses[-1])

Average training loss for Epoch 0 : 3.88722548765
Average training loss for Epoch 1 : 3.33839411362
Average training loss for Epoch 2 : 3.28392141473
Average training loss for Epoch 3 : 3.26528645497
Average training loss for Epoch 4 : 3.20558964505
Average training loss for Epoch 5 : 3.06029105186
Average training loss for Epoch 6 : 2.91993340324
Average training loss for Epoch 7 : 2.80187169243
Average training loss for Epoch 8 : 2.70883094096
Average training loss for Epoch 9 : 2.63630421489
Average training loss for Epoch 10 : 2.58031235022
Average training loss for Epoch 11 : 2.53191724478
Average training loss for Epoch 12 : 2.48458455591
Average training loss for Epoch 13 : 2.43621498463
Average training loss for Epoch 14 : 2.38924817478
Average training loss for Epoch 15 : 2.34514734792
Average training loss for Epoch 16 : 2.30421267771
Average training loss for Epoch 17 : 2.26646601453
Average training loss for Epoch 18 : 2.23204093353
Average training loss for Epoch 19 : 2.20

In [25]:
"""
g = build_graph(cell_type='LSTM', num_steps=80)
t = time.time()
losses = train_network(g, 20, num_steps=80, save="models/LSTM_20_epochs")
print("It took", time.time() - t, "seconds to train for 20 epochs.")
print("The average loss on the final epoch was:", losses[-1])
"""

'\ng = build_graph(cell_type=\'LSTM\', num_steps=80)\nt = time.time()\nlosses = train_network(g, 20, num_steps=80, save="models/LSTM_20_epochs")\nprint("It took", time.time() - t, "seconds to train for 20 epochs.")\nprint("The average loss on the final epoch was:", losses[-1])\n'

In [28]:
g3 = build_graph(num_classes=vocab_size, cell_type='GRU', num_steps=1, batch_size=1)
prompt = 'A'
generate_characters(g3, save, 750, prompt=prompt)

AH(y;'eänünganädes
  

  AE dohr as gürderdchf urd.
  U"M usbn oÜ shus das it ur Meie

  DEH dan asä lerwsam zus vuh LecbenE Vaen Lenimen,
  Mige eilt, Bihg mirt veihchtz,
  Wof doß ,nstu!  (ishen, such und,
  Dise wach sseirt,
  Doit der Söens mich ech btelmtuna,
  Dut ainen Belebe,
  Nür lr zu gite.
  Ich Miedoht gbehnlel iu nos soch ct)en

  EZTE:
  Des wont uct za eßfen,  woa ia fnich eshm zurtr Wehne,
  SnÖ Jemneswvion sngster sarneiche!
  Ziepe geKdolz teber deü em Lam!
  RGain nes Bie zö.  Feie ghelnen ar;
  Mach ikchter doahun egc eilen
  Kahinur iln hiich ihl Lirmfer,
  Dler do mer'.
  ÜYe eelguchllen
  kEk ein dü buslen! BI dön echm!  Aiumse rain ermessrgehrn.
  (Aein sübar wone beh su, bichten gmehd;
  daüh zche ird dae.
  Ich fai


''