# Trial 6: structured sequence modeling

* Create simple parametric time series and try to model them.
* Add structure by constructing a graph between the series and see how it improves.
* Usage of `tflearn` inspired by [How to do time series prediction using RNNs, TensorFlow and Cloud ML Engine](https://medium.com/google-cloud/how-to-do-time-series-prediction-using-rnns-and-tensorflow-and-cloud-ml-engine-2ad2eeb189e8).

In [None]:
%matplotlib inline

import shutil

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
import tensorflow.contrib.learn as tflearn

plt.rcParams['figure.figsize'] = (17, 5)

In [None]:
DATA_DIR = os.path.join('..', 'data', 'structured_sequence_trial')

## 1 Data generation

In [None]:
SEQ_LEN = 1000
N_SEQ = 40

def create_time_series(seq_len, random_state):
    freq = random_state.uniform(0.1, 0.6)
    ampl = random_state.uniform(0.5, 1.5)
    offset = random_state.uniform(-1, 1)
    return np.sin(np.arange(seq_len) * freq) * ampl + offset

rs = np.random.RandomState(42)
data = np.empty((N_SEQ, SEQ_LEN))
for i in range(N_SEQ):
    data[i] = create_time_series(SEQ_LEN, rs)
data = pd.DataFrame(data)

In [None]:
data.iloc[:5, :100].T.plot();
plt.savefig('time_series.pdf')
# hist

## 2 Graph construction

k-NN graph between the time series.

## 3 Data preparation

* Store data in TFRecords files which will be read by the input pipeline.
* Preprocessing can be done here.
* Data augmentation should be done in input pipeline (to save disk space).
* We are doing full batch, i.e. we feed data on the whole graph at once.

In [None]:
N_INPUTS = 50  # Number of samples used for prediction, i.e. unrolling length.
N_OUTPUTS = 1  # Number of samples in the time series the model tries to predict.

def feature(array):
    array = array.reshape(-1)
    return tf.train.Feature(float_list=tf.train.FloatList(value=list(array)))

def save_dataset(data, filename):
    """Save dataset as TFRecords."""
    filename = os.path.join(DATA_DIR, filename)
    num_examples = data.shape[1] - N_INPUTS - N_OUTPUTS + 1
    assert num_examples > 0
    tf.logging.info('Writing {} examples to {}'.format(num_examples, filename))
    with tf.python_io.TFRecordWriter(filename) as writer:
        for idx in range(num_examples):
            inputs = data[:, idx:idx+N_INPUTS]
            targets = data[:, idx+N_INPUTS:idx+N_INPUTS+N_OUTPUTS]
            example = tf.train.Example(features=tf.train.Features(feature={
                #'graph': feature(graph),  # Adjacency matrix or Laplacian can be stored here.
                'inputs': feature(inputs),
                'targets': feature(targets)}))
            writer.write(example.SerializeToString())

TRAINING_LEN = int(0.8 * SEQ_LEN)
save_dataset(data.iloc[:, :TRAINING_LEN].values, 'train.tfrecords')
save_dataset(data.iloc[:, TRAINING_LEN:].values, 'validation.tfrecords')

## 4 Data loading

Two training schemes:
* Load whole data for training up to a certain point in time. That is what is done for text (the whole vocabulary graph is used).
* Use some time series (some part of the graph) as training and the others as evaluation.

TF alternative:
* [tf.contrib.slim.dataset](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim)

In [None]:
class DataLoader:

    def __init__(s, filenames, num_epochs=1, read_threads=1, seed=None):
        #if mode == tflearn.ModeKeys.TRAIN:
        s.filenames = filenames
        s.num_epochs = num_epochs
        s.read_threads = read_threads
        s.seed = seed

    def _read_and_decode(s, filename_queue):
        reader = tf.TFRecordReader()
        _, example = reader.read(filename_queue)
        features={
            'inputs': tf.FixedLenFeature([N_SEQ * N_INPUTS], tf.float32),
            'targets': tf.FixedLenFeature([N_SEQ * N_OUTPUTS], tf.float32),
        }
        example = tf.parse_single_example(example, features)
        inputs = tf.reshape(example['inputs'], [N_SEQ, N_INPUTS])
        targets = tf.reshape(example['targets'], [N_SEQ, N_OUTPUTS])
        return inputs, targets

    def __call__(s):
        with tf.name_scope('input_pipeline'):
            with tf.device("/cpu:0"):  # Input queues are on CPU.
                filenames = [os.path.join(DATA_DIR, filename) for filename in s.filenames]
                filename_queue = tf.train.string_input_producer(filenames, s.num_epochs, shuffle=True)

                examples = [s._read_and_decode(filename_queue) for _ in range(s.read_threads)]

                # Shuffle examples.
                if True:
                    min_after_dequeue = 10 #10000
                    capacity = min_after_dequeue + (s.read_threads + 2)  # * s.batch_size
                    inputs, targets = tf.train.shuffle_batch_join(
                            examples, batch_size=1, seed=s.seed, capacity=capacity,
                            min_after_dequeue=min_after_dequeue, allow_smaller_final_batch=True)
                    # We read full batch.
                    inputs = inputs[0, ...]
                    targets = targets[0, ...]
                else:
                    assert s.read_threads == 1
                    inputs, targets = examples[0]

                # Can return a fixed graph or a per-sample graph in the features.
                return {'inputs': inputs}, targets

Make one pass over the dataset to make sure the input pipeline works.

In [None]:
inputs = DataLoader(['train.tfrecords'])()[0]['inputs']

sess = tf.Session()
#sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())

coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess, coord)

idx = 0
training_data = np.empty((N_SEQ, TRAINING_LEN-N_OUTPUTS))
try:
    while not coord.should_stop():
        training_data[:, idx:idx+N_INPUTS] = sess.run(inputs)
        idx += 1

except tf.errors.OutOfRangeError:
    print('Done: {} steps'.format(idx))
finally:
    coord.request_stop()

coord.join(threads)
sess.close()

#np.testing.assert_allclose(training_data, data.iloc[:, :TRAINING_LEN-N_OUTPUTS])

## 5 Sequence modeling

We can either:
* assume the same dynamic on all time series and train a shared model
* train a model for each time series (which still has access to its neighbors)
* mix: e.g. per times series bias or last layer

In [None]:
# Number of hidden units in each of the LSTM cells.
# Number of filters in case of GCN.
LSTM_SIZE = 3

def model(features, targets, mode, params):
    # Reformat input shape to become a sequence.
    x = tf.split(features['inputs'], N_INPUTS, axis=1)
    
    # Recurrent neural network followed by linear transform.
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(LSTM_SIZE, forget_bias=1.0)
    outputs, _ = tf.contrib.rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
    #outputs, _ = tf.contrib.rnn.dynamic_rnn(lstm_cell, x, dtype=tf.float32)
    
    tf.summary.histogram('hidden', outputs[-1])
    predictions = tf.contrib.layers.fully_connected(outputs[-1], N_OUTPUTS, activation_fn=None)
    
    # Loss function and metric for training and evaluation.
    loss = tf.losses.mean_squared_error(targets, predictions)
    eval_metric_ops = {
        'rmse': tf.metrics.root_mean_squared_error(targets, predictions)
    }
    
    # Training operations.
    train_op = tf.contrib.layers.optimize_loss(
        loss=loss,
        global_step=tf.train.get_global_step(),
        learning_rate=params['learning_rate'],
        #learning_rate_decay_fn=lambda lr, gs: tf.train.exponential_decay(lr, gs, 100e3, 0.96, staircase=True),
        optimizer=lambda lr: tf.train.GradientDescentOptimizer(lr),
        #optimizer=lambda lr: tf.train.MomentumOptimizer(lr, 0.9),
    )
    
    return tflearn.ModelFnOps(
        mode=mode,
        predictions={'predictions': predictions},
        loss=loss,
        train_op=train_op,
        eval_metric_ops=eval_metric_ops,
    )

## 6 Experiment

In [None]:
# Observing variables.
#tflearn.monitors.ValidationMonitor
#tf.train.SessionRunHook

In [None]:
# Tuning the hyper-parameters.
#tflearn.learn_runner.run()
#tflearn.learn_runner.tune()

In [None]:
# TF debugger.
from tensorflow.python import debug as tfdbg

hooks = [tfdbg.LocalCLIDebugHook()]
hooks = [tfdbg.DumpingDebugHook('tfdbg_dumps')]
# python -m tensorflow.python.debug.cli.offline_analyzer --dump_dir="tfdbg_dumps/run_<epoch_timestamp_microsec>_<uuid>"

In [None]:
# Statistics like compute time or memory.
# Need to pass run_options and run_metadata to sess.run().
# Not possible with Experiment and Estimator API.
#run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
#run_metadata = tf.RunMetadata()

In [None]:
#MODEL_DIR = os.path.join('..', 'logdir', 'structured_sequence', 'run1')
MODEL_DIR = 'structured_sequence'
config = tflearn.RunConfig(
    save_checkpoints_secs=60,
    # save_summary_steps=100,
    model_dir=MODEL_DIR,
    # To see device placement. It unfortunately only shows up in stderr, not Tensorboard (explicit placement only).
    # session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True),
)
hparams = {
    'learning_rate': 0.01
}
estimator = tflearn.Estimator(model_fn=model, config=config, params=hparams)
#estimator.fit(input_fn=DataLoader(filenames=['train.tfrecords']))
#estimator.evaluate(input_fn=DataLoader(filenames=['validation.tfrecords']))

experiment = tflearn.Experiment(
    estimator,
    eval_steps=None,
    train_input_fn=DataLoader(['train.tfrecords'], num_epochs=10),
    eval_input_fn=DataLoader(['validation.tfrecords']),
)

shutil.rmtree(MODEL_DIR, ignore_errors=True)  # Start fresh each time.
experiment.train_and_evaluate()
#experiment.continuous_train_and_eval()  # Takes less ressources.

#estimator.evaluate(input_fn=DataLoader(filenames=['test.tfrecords']))

## XXX

In [None]:
class RNN:
    
    def __init__(self, units):
        pass
    
    def __call__(self, inputs, states, laplacian):
        """Fully connected layer with Mout features."""
        N, Min = x.get_shape()
        W = self._weight_variable([int(Min), self.Mout], regularization=True)
        b = self._bias_variable([self.Mout], regularization=True)
        x = tf.matmul(x, W) + b
        return tf.nn.relu(x) if self.relu else x

* Inherit from RNNCell to use high level TF machinery like `tf.dynamic_rnn()`.

In [None]:
class LSTM:
    """The network is not unrolled."""
    
    def _input_conv(self, x, w, b=None):
        pass
    
    def _reccurent_conv(self, x, w, b=None):
        pass

In [None]:
from tensorflow.python.ops import control_flow_ops
control_flow_ops.while_loop(
    cond=lambda time, *_: time < time_steps,
    body=_step,
    loop_vars=(time, output_ta) + states,
    parallel_iterations=32,
    swap_memory=True)