In [1]:
import functools
import tensorflow as tf
import codecs
import numpy as np
import random

In [2]:
def lazy_property(function):
    attribute = '_' + function.__name__

    @property
    @functools.wraps(function)
    def wrapper(self):
        if not hasattr(self, attribute):
            with tf.variable_scope(function.__name__):
                setattr(self, attribute, function(self))
        return getattr(self, attribute)
    return wrapper

In [3]:
class BatchGenerator:

    def __init__(self, text, seq_len, batch_size):
        self._text = text
        self._text_size = len(text)
        self._batch_size = batch_size
        self._seq_len = seq_len
        self._vocab_index_dict = {}
        self._index_vocab_dict = {}
        self._vocab_size = 0
        self._batch_len = self._seq_len * self._batch_size * 10

        self.init_dict()
        self._batch_holders = self._create_batch_holders()


    def _create_batch_holders(self):
        batch_holders = []
        for _ in range(self._batch_size):
            start_index = random.randint(0, self._text_size - self._batch_len)
            current_index = 0
            batch_holder = (start_index, current_index)
            batch_holders.append(batch_holder)
        return batch_holders

    def init_dict(self):
        vocab = []
        for c in text:
            if c not in vocab:
                vocab.append(c)
                index = len(vocab)
                self._vocab_index_dict[c] = index
                self._index_vocab_dict[index] = c
                
        self._vocab_size = len(vocab)


    def _next_seq(self, text_index):
        seq = np.zeros(shape=(self._seq_len), dtype=np.float)
        for i in range(self._seq_len):
            if text_index >= self._text_size:
                break
            seq[i] = self.char2id(self._text[text_index])
            text_index += 1
        return seq

    def start(self):
        self._batch_holders = self._create_batch_holders()
        return self.get_batch()

    def get_batch(self):
        if (self._batch_holders[0][0] + self._batch_holders[0][1]) >= self._text_size:
            self._batch_holders = self._create_batch_holders()
        batches = []
        for i in range(len(self._batch_holders)):
            text_index = self._batch_holders[i][0] + self._batch_holders[i][1]
            self._batch_holders[i] = (self._batch_holders[i][0], self._batch_holders[i][1] + self._seq_len)
            batches.append(self._next_seq(text_index))
        return batches

    def char2id(self, c):
        return self._vocab_index_dict[c]


    def id2char(self, id):
        return self._index_vocab_dict[id]

    def seq2text(self, seq):
        l = [self.id2char(c) for c in seq]
        return ''.join(l)

In [4]:
def gen_batch(batch, vocab_size):
    target = []
    input = []
    for seq in batch:
        x = []
        y = []
        for i in range(len(seq)):
            t_x = [0] * vocab_size
            t_y = [0] * vocab_size
            c_i = int(seq[i])-1
            n_i = 0
            if i < len(seq) - 1:
                n_i = int(seq[i+1])-1
            t_x[c_i] = 1    
            t_y[n_i] = 1
            x.append(t_x)
            y.append(t_y)

        input.append(x)
        target.append(y)

    return np.array(input), np.array(target)

In [19]:
tf.reset_default_graph()
class SoftmaxPredictionRnn:

    def __init__(self, input, target, num_hidden=64, num_layers=3):
        self._num_hidden = num_hidden
        self._num_layers = num_layers
        self._max_grad_norm = .2
        self._learning_rate = .001
        self._input = input
        self._target = target
        self.prediction
        self.error
        self.optimize


    @lazy_property
    def length(self):
        used = tf.sign(tf.reduce_max(tf.abs(self._input), reduction_indices=2))
        length = tf.reduce_sum(used, reduction_indices=1)
        length = tf.cast(length, tf.int32)
        return length


    @lazy_property
    def prediction(self):
        # Recurrent network.
        cells = []
        for _ in range(self._num_layers):
            cells.append(tf.contrib.rnn.GRUCell(self._num_hidden))
        cell = tf.contrib.rnn.MultiRNNCell(cells)

        # Get dimensions
        self._max_length = int(self._input.get_shape()[1])
        self._num_classes = int(self._input.get_shape()[2])
        batch_size = tf.shape(self._input)[0]

        states = cell.zero_state(batch_size, tf.float32)
        state_type = type(states)
        self._initial_state = [
            tf.placeholder_with_default(zero_state, [None, self._num_hidden]) for zero_state in states]
        self._initial_state = state_type(self._initial_state)
        self._zero_state = self._initial_state
        
        self._output, self._final_state = tf.nn.dynamic_rnn(cell, self._input,
                                                            dtype=tf.float32, sequence_length=self.length,
                                                            initial_state=self._initial_state)

        # Softmax layer.
        weight = tf.get_variable('W', [self._num_hidden, self._num_classes])
        bias = tf.get_variable('b', [self._num_classes], initializer=tf.constant_initializer(0.1))

        # Flatten to apply same weights to all time steps.
        output = tf.reshape(self._output, [-1, self._num_hidden])
        self._raw_logits = tf.matmul(output, weight) + bias
        self._logits = tf.nn.softmax(tf.matmul(output, weight) + bias)
        prediction = tf.reshape(self._logits, [-1, self._max_length, self._num_classes])

        tf.summary.histogram("rnn_output", output)
        for w in cell.weights:
            tf.summary.histogram("rnn_weight", w)
        tf.summary.histogram("softmax_w", weight)
        tf.summary.histogram("softmax_bias", bias)
        tf.summary.histogram("prediction", prediction)

        return prediction


    @lazy_property
    def cost(self):
        # Compute cross entropy for each frame.
        cross_entropy = self._target * tf.log(self.prediction)
        cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2)
        mask = tf.sign(tf.reduce_max(tf.abs(self._target), reduction_indices=2))
        cross_entropy *= mask

        # Average over actual sequence lengths.
        cross_entropy = tf.reduce_sum(cross_entropy, reduction_indices=1)
        cross_entropy /= tf.cast(self.length, tf.float32)

        loss = tf.reduce_mean(cross_entropy)
        tf.summary.scalar('cross_entropy', loss)

        return loss


    @lazy_property
    def optimize(self):
        tvars = tf.trainable_variables()
        grads = tf.gradients(self.cost, tvars)
        clip_grads, _ = tf.clip_by_global_norm(grads, self._max_grad_norm)
        optimizer = tf.train.AdamOptimizer(self._learning_rate)

        #tf.summary.histogram("gradients", grads)
        #tf.summary.histogram("clip_gradients", clip_grads)

        return optimizer.apply_gradients(zip(clip_grads, tvars))

    @lazy_property
    def error(self):
        mistakes = tf.not_equal(tf.argmax(self._target, 2), tf.argmax(self.prediction, 2))
        mistakes = tf.cast(mistakes, tf.float32)
        mask = tf.sign(tf.reduce_max(tf.abs(self._target), reduction_indices=2))
        mistakes *= mask

        # Average over actual sequence lengths.
        mistakes = tf.reduce_sum(mistakes, reduction_indices=1)
        mistakes /= tf.cast(self.length, tf.float32)
        mistake = tf.reduce_mean(mistakes)

        tf.summary.scalar('error', mistake)

        return mistake


    def train_epoch(self, session, batch_generator, model, epoch, steps, x, y):
        batch = batch_generator.start()
        batch_x, batch_y = gen_batch(batch, batch_generator._vocab_size)
        state = session.run(model._zero_state, feed_dict={self._input: batch_x})
        
        for step in range(steps):
            _, state, s = sess.run([model.optimize, model._final_state, summaries],
                                   feed_dict={x: batch_x, y: batch_y, model._initial_state: state})
            writer.add_summary(s, epoch * steps + step)
            batch = batch_generator.get_batch()
            batch_x, batch_y = gen_batch(batch, batch_generator._vocab_size)

    def sample(self, session, start_text, length, temperature=1., max_prob=True):
        def get_input(symbol, seq_len, vocab_size):
            input = []
            one_hot_one = [0.] * vocab_size
            one_hot_one[int(symbol)-1] = 1.
            
            seq = []
            seq.append(one_hot_one)
            for _ in range(1, seq_len):
                seq.append([0.] * vocab_size)

            input.append(seq)
            return np.array(input, dtype=np.float32)

        # Prepare network's state to generate
        x = get_input(start_text[0], self._max_length, self._num_classes)
        state = session.run(self._zero_state, feed_dict={self._input: x})
        sample = start_text[0]
        for char in start_text[:-1]:
            x = get_input(char, self._max_length, self._num_classes)
            state = session.run(self._final_state, {self._input: x, self._initial_state: state})
            
        # Generate symbols
        x = get_input(start_text[-1], self._max_length, self._num_classes)
        seq = []
        
        for i in range(length):
            state, logits = session.run([self._final_state, self._logits],
                                        {self._input: x, self._initial_state: state})

            sample = np.argmax(logits[0]) + 1
            seq.append(sample)
            x = get_input(sample, self._max_length, self._num_classes)

        return seq

In [20]:

!rm -rf '/tmp/character-rnn'

seq_len = 30
batch_size = 25
epoches = 1000

with codecs.open('tolstoi.txt', 'r', encoding='utf-8') as f:
    text = f.read()

batch_generator = BatchGenerator(text, seq_len, batch_size)
sample_batch_generator = BatchGenerator(text, 100, 1)
steps = batch_generator._text_size // (batch_generator._seq_len * batch_size)

x = tf.placeholder(tf.float32, [None, seq_len, batch_generator._vocab_size], name='inputs')
y = tf.placeholder(tf.float32, [None, seq_len, batch_generator._vocab_size], name='targets')
model = SoftmaxPredictionRnn(x, y)

summaries = tf.summary.merge_all()
writer = tf.summary.FileWriter('/tmp/character-rnn')

sess = tf.Session()
sess.run(tf.global_variables_initializer())

writer.add_graph(sess.graph)

print "epoches: ", epoches
print "steps: ", steps

# Training cycle
for epoch in range(epoches):
    model.train_epoch(sess, batch_generator, model, epoch, steps, x, y)
    batch = batch_generator.start()
    batch_x, batch_y = gen_batch(batch, batch_generator._vocab_size)
    e = sess.run([model.error], feed_dict={x: batch_x, y: batch_y})
    print '%04d' % (epoch + 1), ': {}'.format(e)
    sample_batch = sample_batch_generator.get_batch()
    print '--> ', sample_batch_generator.seq2text(sample_batch[0])
    seq = model.sample(sess, sample_batch[0], 50)
    print '<-- ', batch_generator.seq2text(seq)

epoches:  1000
steps:  1063
0001 : [0.69999999]
-->  oubliez les  torts qu'on a pu avoir envers vous, pensez
  que c'est votre père...  peut-être à l'ago


TypeError: data type not understood