In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import inspect

In [None]:
inspect.getfile(tf)

In [None]:
# model parameters.
num_epochs = 1000
total_series_lengths = 50000
truncated_backprop_length = 15
state_size = 4
num_classes = 2
echo_step = 1
batch_size = 5
num_batches = total_series_lengths // batch_size // truncated_backprop_length

In [2]:
def generate_data(total_series_lengths = 50000, echo_step = 1, batch_size=5):
    x = np.array(np.random.choice(2, total_series_lengths))
#     y = np.roll(x, echo_step)
    
#     x = x.reshape((batch_size, -1))
#     y = x.reshape((batch_size, -1))
    
    return x

In [3]:
raw_data = generate_data()
print(raw_data.shape)

(50000,)


## Convert raw data to tensors using batch producers.
<font color=red> Note: Outdated - use TF Data API to set up pipeline.<br>
Need to setup eager initialization for optimally utilizing the GPU, the method below lazily fetches each sample when it is required by the training step.</font>

In [4]:
def batch_producer(raw_data, batch_size, num_steps, echo_step=1):
    raw_data = tf.convert_to_tensor(raw_data, name='raw_data', dtype=tf.int32)
    data_length = tf.size(raw_data)
    batch_length = data_length // batch_size
    data = tf.reshape(raw_data[0:batch_size * batch_length], [batch_size, batch_length, 1])
    
    # epoch_size => number of steps in each spoch.
    epoch_size = (batch_length - 1) // num_steps
    i = tf.train.range_input_producer(epoch_size, shuffle=False).dequeue()
    x = data[:, i * num_steps:(i+1) * num_steps, :]
    x.set_shape([batch_size, num_steps, 1])
    y = data[:, i * (num_steps + echo_step): (i + 1) * (num_steps + echo_step), :]
    y.set_shape([batch_size, num_steps, 1])
    
    return x, y

## Buidling the model.

In [5]:
# input to the model.
class Input:
    def __init__(self, batch_size, num_steps, data):
        """
        batch_size = number of samples in each batch.
        num_steps = rnn time steps.
        data = input data.
        """
        self.batch_size = batch_size
        self.num_steps = num_steps
        self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
        self.input_data, self.targets = batch_producer(data, batch_size, num_steps)
#         self.input_data, self.targets = None, None
        
    def getData(self):
        self.input_data, self.targets = batch_producer(data, batch_size, num_steps)
    
    def print_shapes(self):
        print('shape of input data is ', self.input_data.shape)
        print('shape of targets is ', self.targets.shape)
        
    def print_data(self):
        print('data of input is ', self.input_data)
        print('shape of targets is ', self.targets)

In [6]:
i = Input(5, 16, raw_data)
print(i.batch_size)

5


In [7]:
i.epoch_size

624

In [8]:
# for _ in range(i.epoch_size):
#     print(i.print_shapes())
#     print(i.print_data())

## Explanation for the model created below.
> shape of the init_state => [num_layers, 2, self.batch_size, self.hidden_size]

1. num_layers -> need to store initial state for every layer.
2. 2 - state for neural network consists of two vectors -> output(h_t) and state(s_t). Dimension of two vector is equal to size of the hidden layer.
3. in each step, samples equivalent to batch size are processed, for each state, so states for each sample need to be stored.
4. the dimension of each output layer is equal to the dimensionality of hidden size.

> unpacking along axis 0.

changes (num_layers, 2, batch_size, hidden_layer_size) to num_layers * (2, batch_size, hidden_layer_size).

> state_is_tuple=True

required to allow the Tensorflow lstm architecture to accept state tuple as input.

> output, self.state = ...dynamic_cell()

output from all unrolled rnn cells with shape (batch_size, num_steps, hidden_size).<br>
self.state is used as input for the next training sample.

In [12]:
# defining model functions.
class Model:
    def __init__(self, input_obj, is_training, hidden_size, num_layers, dropout=0.0, init_scale=0.05):
        self.is_training = is_training
        self.input_obj = input_obj
        self.batch_size = input_obj.batch_size
        self.num_steps = input_obj.num_steps
        self.hidden_size = hidden_size
        
        # finally the data is loaded after the input layer into inputs.
        inputs = input_obj.input_data
        self.init_state = tf.placeholder(tf.float32, [num_layers, 2, self.batch_size, self.hidden_size])
        
        # Tensorflow LSTM models require state in format shown below.
        state_per_layer_list = tf.unstack(self.init_state, axis=0)
        rnn_tuple_state = tuple([tf.nn.rnn_cell.LSTMStateTuple(state_per_layer_list[layer_index][0],
                                                          state_per_layer_list[layer_index][1])
                                 for layer_index in range(num_layers)]
        )
        
        # standard first layer LSTM cell.
        cell = tf.contrib.rnn.LSTMCell(hidden_size, forget_bias=1.0)
        
        # add dropout layers if dropout layers set is required.
        if dropout != '0.0':
            cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=dropout)
            
        # if no. of layers is more than 1, need to stack additional layers of LSTM.\
        # Additional layers of RNN are stacked using RNN Multicell.
        if num_layers > 1:
            cell_list = [cell for _ in range(num_layers)]
            cell = tf.nn.rnn_cell.MultiRNNCell(cell_list, state_is_tuple=True)
            
#         print('shape of input data is ', inputs.shape)
        # using dynamic cell unroll the LSTM network.
        print('shape of rnn_tuple_state', rnn_tuple_state)
        output, self.state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32, initial_state=rnn_tuple_state)
        print('shape of output is', output.shape)
        print(output)
        
        # flatten the rnn output to feed into a softmax layer.
        output = tf.reshape(output, [-1, hidden_size])
        
        # setup the softmax layer.
        softmax_w = tf.Variable(tf.random_uniform([hidden_size, 1], -init_scale, init_scale))
        softmax_b = tf.Variable(tf.random_uniform([1], -init_scale, init_scale))
        logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        
        # reshape logits for using sequence to sequence loss function.
        logits = tf.reshape(logits, [self.batch_size, self.num_steps, 1])
        
        # use sequence to sequence loss.
        loss = tf.contrib.seq2seq.sequence_loss(
            logits,
            self.input_obj.targets,
            tf.ones([self.batch_sizes, self.num_steps], dtype=tf.float32),
            average_across_timesteps=False,
            average_across_batch=True)
        
        # update the cost.
        self.cost = tf.reduce_sum(loss)
        
        # get the prediction cost.
        self.softmax_out = tf.nn.softmax(tf.reshape(logits, [-1, 1]))
        self.predict = tf.cast(tf.argmax(self.softmax_out, axis=1), tf.int32)
        correct_prediction = tf.equal(self.predict, tf.reshape(self.input_obj.targets), [-1])
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        
        if not is_training:
            return
        
        self.learning_rate = tf.Variable(0.0, trainable=False)
        
        tvars = tf.trainable_variables()
        # gradients are clipped, without clipping the weights get too low.
        grads = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), 5)
        optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        
        self.train_op = optimizer.apply_gradients(
            zip(grads, tvars),
            global_step=tf.contrib.framework.get_or_create_global_step())
        
        # create the updatable learning rate.
        # new learning rate to be feed in via feed-dict argument.
        # tf.assign is executed at the start of each epoch.
        self.new_lr = tf.placeholder(tf.float32, shape=[])
        self.lr_update = tf.assign(self.learning_rate, self.new_lr)
        
    def assign_lr(self, session, lr_value):
        session.run(self.lr_update, feed_dict={self.new_lr: lr_value})

In [13]:
def train(train_data, num_layers, num_epochs, batch_size, model_save_name, learning_rate=1.0, 
             max_lr_epoch=10, lr_decay=0.93):
        training_input = Input(batch_size=batch_size, num_steps=16, data=train_data)
        m = Model(training_input, is_training=True, hidden_size=100, num_layers=num_layers)
        init_ops = tf.global_variables_initializer()
        
        orig_lr = lr_decay
        with tf.Session() as sess:
            sess.run([init_op])
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)
            saver = tf.train.Saver()
            
            for epoch in range(num_epochs):
                new_lr_rate = orig_decay ** max(epoch + 1 - max_lr_epoch, 0.0)
                m.assign_lr(sess, learning_rate ** new_lr_decay)
                current_state = np.zeros((num_layers, 2, batch_size, m.hidden_size))
                for step in range(training_input.num_steps):
                    if step % 50 != 0:
                        cost, _, current_state = sess.run([m.cost, m.train_op, m.state], 
                                                          feed_dict={m.init_state: current_state})
                    else:
                        cost, _, current_state, acc = sess.run([m.cost, m.train_op, m.state, m.accuracy],
                                                              feed_dict={m.init_state: current_state})
                        print('Epoch {} Step {}, cost: {:.3f}, accuracy: {:.3f} '.format(epoch, step, cost, acc))
                saver.save(sess, data_path + '\\' + model_save_name, global_step=epoch)
            # last save.
            saver.save(sess, data_path + '\\' + model_save_name + '-final')
            # close the threads.
            coord.request_stop()
            coord.join(threads)

In [14]:
# train the model.
train(train_data=raw_data, num_layers=2, num_epochs=1, batch_size=5, model_save_name='test_model')

shape of rnn_tuple_state (LSTMStateTuple(c=<tf.Tensor 'strided_slice_13:0' shape=(5, 100) dtype=float32>, h=<tf.Tensor 'strided_slice_14:0' shape=(5, 100) dtype=float32>), LSTMStateTuple(c=<tf.Tensor 'strided_slice_15:0' shape=(5, 100) dtype=float32>, h=<tf.Tensor 'strided_slice_16:0' shape=(5, 100) dtype=float32>))


TypeError: Tensors in list passed to 'values' of 'ConcatV2' Op have types [int32, float32] that don't all match.