## Temporal Convolutional Networks (TCN)

* Original work: An Empirical Evaluation of Generic Convolutional and Recurrent Networks for Sequence Modeling (https://arxiv.org/pdf/1803.01271.pdf)


<img src="FIGS/tcn.png",width=800,height=800> ![](images/k8s-dashboard.png)

In [6]:
# Copyright 2018 @Jacob Su Wang. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import sys
sys.path.insert(0, "/work/04233/sw33286/AIDA-SCRIPTS")

import random
import numpy as np

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from helpers import checkpoint_model


def causal_conv1d(inputs, 
                  filters,
                  kernel_size,
                  strides=1, 
                  dilation_rate=1, 
                  activation=None, 
                  use_bias=True, 
                  kernel_initializer=None, 
                  bias_initializer=tf.zeros_initializer(), 
                  kernel_regularizer=None, 
                  bias_regularizer=None, 
                  activity_regularizer=None, 
                  kernel_constraint=None, # seems missing in v1.0.
                  bias_constraint=None,   # seems missing in v1.0.
                  trainable=True, 
                  name=None,
                  reuse=None, # i added this.
                  **kwargs):
    
    padding = (kernel_size - 1) * dilation_rate
        # F(n) = F(n-1) + (kernel_size - 1) * dilation_rate
    inputs_padded = tf.pad(inputs, [[0,0],[padding,0],[0,0]])
        # pad beginning of input.
        # NB: use this rather than tf.pad(.., tf.constant) 
        #     to have .pad's shape fully specified.
    return tf.layers.conv1d(inputs_padded,
                            filters=filters,
                            kernel_size=kernel_size,
                            strides=strides,
                            padding='valid',
                            data_format='channels_last',
                            dilation_rate=dilation_rate,
                            activation=activation,
                            use_bias=use_bias,
                            kernel_initializer=kernel_initializer,
                            bias_initializer=bias_initializer,
                            kernel_regularizer=kernel_regularizer,
                            bias_regularizer=bias_regularizer,
                            activity_regularizer=activity_regularizer,
                            trainable=trainable,
                            name=name,
                            reuse=reuse)
        
def temporal_block(inputs, 
                   filters, 
                   kernel_size,
                   strides,
                   dilation_rate,
                   conv_block_name,
                   rate,
                   training=True):
    
    conv1 = causal_conv1d(inputs, 
                          filters, 
                          kernel_size, 
                          strides, 
                          dilation_rate,
                          activation=tf.nn.relu,
                          name=conv_block_name+'-conv1')
    conv1_norm = tf.contrib.layers.layer_norm(conv1)
    conv1_dropout = tf.layers.dropout(conv1_norm, 
                                      rate=rate, 
                                      training=training)
    conv2 = causal_conv1d(conv1_dropout,
                          filters, 
                          kernel_size, 
                          strides, 
                          dilation_rate,
                          activation=tf.nn.relu,
                          name=conv_block_name+'-conv2')
    conv2_norm = tf.contrib.layers.layer_norm(conv2)
    conv2_dropout = tf.layers.dropout(conv2_norm, 
                                      rate=rate, 
                                      training=training)
    
    return tf.nn.relu(conv2_dropout + tf.layers.conv1d(inputs, filters=filters, kernel_size=1))
        # residual link: relu(transformed + identity_conv(original))
    
def temporal_cnn(inputs,
                 blocks,
                 kernel_size,
                 rate,
                 block_names):
    
    outputs = inputs
    n_block = len(blocks)
    for i in range(n_block):
        dilation_rate = 2**i
        filters = blocks[i]
        outputs = temporal_block(outputs, 
                                 filters,
                                 kernel_size,
                                 strides=1,
                                 dilation_rate=dilation_rate,
                                 conv_block_name=block_names[i],
                                 rate=rate)
    return outputs


class TCN_MNIST:
    
    def __init__(self, config):
        
        self.blocks = [config['n_filter']]*config['n_level']
        self.kernel_size = config['kernel_size']
        self.dropout_rate = config['dropout_rate']
        self.block_names = config['block_names']
        
        self.time_steps = 28 * 28
        self.n_channel = 1
        self.n_class = 10
        self.learning_rate = config['learning_rate']
        
        self.model_dir = config['model_dir']
        self.model_name = config['model_name'] 
        
        if config['load_from_saved']:
            self.__load_saved_graph()
            print('\nModel loaded for continued training!\n')
        else:
            self.__build_new_graph()
            print('\nNew model built for training!\n')
        
    def __build_new_graph(self):
        
        tf.reset_default_graph()
        self.sess = tf.Session()
        
        self.x = tf.placeholder(tf.float32, [None, self.time_steps, self.n_channel], name='x')
            # <batch-size, input-length, num-channels>
        self.y = tf.placeholder(tf.float32, [None, self.n_class], name='y')
            # <batch-size, num-classes>
        
        self.logits = tf.layers.dense(
            temporal_cnn(inputs=self.x, 
                         blocks=self.blocks, 
                         kernel_size=self.kernel_size,
                         rate=self.dropout_rate,
                         block_names=self.block_names)[:,-1,:], 
                # temporal_cnn out: <batch-size, output-length, num-filters>
                # take the last time-step, retain: <batch-size, num-filters>
            self.n_class, # out: <batch-size, num-classes>
            activation=None,
            kernel_initializer=tf.orthogonal_initializer()
        )
        self.prediction = tf.nn.softmax(self.logits, name='prediction')
        self.correct = tf.equal(tf.argmax(self.prediction, 1), tf.argmax(self.y, 1))
        self.accuracy = tf.reduce_mean(tf.cast(self.correct, tf.float32), name='accuracy')
        
        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,
                                                                           labels=self.y), name='loss')
        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        grads_and_vars = optimizer.compute_gradients(self.loss)
        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        self.train_op = optimizer.apply_gradients(grads_and_vars, global_step=self.global_step, name='train_op')
        
        self.sess.run(tf.global_variables_initializer())
        self.saver = tf.train.Saver()

    def __load_saved_graph(self):
        
        self.sess = tf.Session()
        self.saver = tf.train.import_meta_graph(self.model_dir+self.model_name+'.meta')
        self.saver.restore(self.sess, tf.train.latest_checkpoint(self.model_dir))
        self.graph = tf.get_default_graph()
        
        self.x = self.graph.get_tensor_by_name('x:0')
        self.y = self.graph.get_tensor_by_name('y:0')
        self.prediction = self.graph.get_tensor_by_name('prediction:0')
        self.accuracy = self.graph.get_tensor_by_name('accuracy:0')
        self.loss = self.graph.get_tensor_by_name('loss:0')
        self.global_step = self.graph.get_tensor_by_name('global_step:0')
        self.train_op = self.graph.get_tensor_by_name('train_op:0')
        

def train_mnist_tcn(model_config, train_config):
    
    print("Configuring training ...\n")
    tcn = TCN_MNIST(model_config)
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = False
    batch_size = train_config['batch_size']
    verbose = train_config['verbose']
    n_epoch = train_config['n_epoch']
    n_batch = train_config['n_batch']
    acc_threshold = train_config['acc_threshold']
    s_dir = tcn.model_dir
    s_path = tcn.model_dir+tcn.model_name
    
    print("Load MNIST ...\n")
    mnist = input_data.read_data_sets("/work/04233/sw33286/CNN-STASH/TCN/tcn-mnist/data/", 
                                      one_hot=True)
    
    print("Training ...\n")
    for e in range(n_epoch):
        print("Epoch {}".format(e+1))
        for _ in range(n_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            batch_x = batch_x.reshape((batch_size,tcn.time_steps,tcn.n_channel))
            _, step = tcn.sess.run([tcn.train_op,tcn.global_step], 
                                   feed_dict={tcn.x:batch_x,tcn.y:batch_y})
            if step % verbose == 0:
                l, a = tcn.sess.run([tcn.loss,tcn.accuracy],
                                    feed_dict={tcn.x:batch_x,tcn.y:batch_y})
                test_batch_x, test_batch_y = mnist.test.next_batch(batch_size)
                test_batch_x = test_batch_x.reshape((batch_size,tcn.time_steps,tcn.n_channel))
                test_a = tcn.sess.run(tcn.accuracy, 
                                      feed_dict={tcn.x:test_batch_x,tcn.y:test_batch_y})
                print("@step {} | train loss = {:.5f} | train acc = {:.3f} | test acc = {:.3f}".format(step,
                                                                                       l,a,test_a))
                if test_a >= acc_threshold:
                    checkpoint_model(s_dir, s_path, tcn.saver, tcn.sess)
                    print("[CHECKPOINT] model saved @step {}, with accuracy {}".format(step,test_a))
        print('\n')
        

if __name__ == "__main__": 
    
    import argparse
    parser = argparse.ArgumentParser()
    # model config
    parser.add_argument('--n_filter', type=int, default=8)
    parser.add_argument('--n_level', type=int, default=6)
    parser.add_argument('--kernel_size', type=int, default=8)
    parser.add_argument('--dropout_rate', type=float, default=0.1)
    parser.add_argument('--block_names', type=str, default="tcn-")
    parser.add_argument('--learning_rate', type=float, default=1e-3)
    parser.add_argument('--model_dir', type=str, default="/work/04233/sw33286/CNN-STASH/TCN/tcn-mnist/model/")
    parser.add_argument('--model_name', type=str, default="mnist-00")
    parser.add_argument('--load_from_saved', type=bool, default=False)
    # train config
    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--verbose', type=int, default=500)
    parser.add_argument('--n_epoch', type=int, default=5)
    parser.add_argument('--n_batch', type=int, default=1000)
    parser.add_argument('--acc_threshold', type=float, default=0.95)
    args = parser.parse_args()
    
    block_names = [args.block_names+str(i+1) for i in range(args.n_level)]
    model_config = {'n_filter': args.n_filter, 'n_level': args.n_level, 'kernel_size': args.kernel_size, 
                    'dropout_rate': args.dropout_rate, 'block_names': block_names, 
                    'learning_rate': args.learning_rate, 
                    'model_dir': args.model_dir, 'model_name': args.model_name,
                    'load_from_saved': args.load_from_saved}
    train_config = {'batch_size': args.batch_size, 'verbose': args.verbose, 
                    'n_epoch': args.n_epoch, 'n_batch': args.n_batch,
                    'acc_threshold': args.acc_threshold}
    
    train_mnist_tcn(model_config, train_config)

In [12]:
!ls

[DEMO] TCN (MNIST).ipynb  [DEVELOPMENT] TCN (MNIST).ipynb  tcn_mnist_demo.py


In [24]:
!python3 tcn_mnist_demo.py

I tensorflow/stream_executor/dso_loader.cc:135] successfully opened CUDA library libcublas.so.8.0 locally
I tensorflow/stream_executor/dso_loader.cc:135] successfully opened CUDA library libcudnn.so.5 locally
I tensorflow/stream_executor/dso_loader.cc:135] successfully opened CUDA library libcufft.so.8.0 locally
I tensorflow/stream_executor/dso_loader.cc:135] successfully opened CUDA library libcuda.so.1 locally
I tensorflow/stream_executor/dso_loader.cc:135] successfully opened CUDA library libcurand.so.8.0 locally
Configuring training ...

I tensorflow/core/common_runtime/gpu/gpu_device.cc:885] Found device 0 with properties: 
name: Tesla K40m
major: 3 minor: 5 memoryClockRate (GHz) 0.745
pciBusID 0000:08:00.0
Total memory: 11.17GiB
Free memory: 11.10GiB
I tensorflow/core/common_runtime/gpu/gpu_device.cc:906] DMA: 0 
I tensorflow/core/common_runtime/gpu/gpu_device.cc:916] 0:   Y 
I tensorflow/core/common_runtime/gpu/gpu_device.cc:975] Creating TensorFlow device (/gpu:0) -> (device: 0