<a href="https://colab.research.google.com/github/tiensu/Coding-The-Deep-Learning-Revolution/blob/master/vanishing_gradient.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

In [0]:
base_path = 'E:\MACHINE_LEARNING\CODING_THE_DEEP_LEARNING_REVOLUTION\PRACTICE\TensorBoard'

In [0]:
# load data
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

In [0]:
def get_batch(x_data, y_data, batch_size):
    idxs = np.random.randint(0, len(y_data), batch_size)
    return x_data[idxs,:,:], y_data[idxs]

In [0]:
class Model(object):
    def __init__(self, activation, num_layers=6, hidden_size=10):
        self._activation = activation
        # number of layers does not include the output layer
        self._num_layers = num_layers
        self._hidden_size = hidden_size
        self._model_def()
    
    def _model_def(self):
        # create placeholder for input image variables
        self.input_images = tf.placeholder(tf.float32, shape=[None, 28, 28])
        # reshape input x - for 28x28 pixels = 784
        x_rs = tf.reshape(self.input_images, shape=[-1, 784])
        # scale the input data
        input = tf.div(x_rs, 255.0)
        # create placeholder for labels variables
        self.labels = tf.placeholder(tf.int64, shape=[None, 1])
        # convert the label data to one hot values
        y_one_hot = tf.reshape(tf.one_hot(self.labels, 10), shape=[-1, 10])
        
        # create self._num_layers dense layers as the model
        for i in range(self._num_layers - 1):
            input = tf.layers.dense(input, self._hidden_size, activation=self._activation, name='layer{}'.format(i+1))
        # create output layer. Do not supply an activation for the final layer. The loss function definition will supply softmax
        # activation. This defaults to a linear activation i.e. f(x) = x
        logits = tf.layers.dense(input, 10, name='layer{}'.format(self._num_layers))
        
        # definition loss function. Use softmax cross entropy with logits. No need to apply softmax activation to logits
        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=y_one_hot))
        
        # add the loss, accuracy to the summary
        tf.summary.scalar('loss', self.loss)
        self._log_gradients(self._num_layers)
        self.optimizer = tf.train.AdamOptimizer().minimize(self.loss)
        self.accuracy = self._compute_accuracy(logits, y_one_hot)
        tf.summary.scalar('accuracy', self.accuracy)
        self.merged = tf.summary.merge_all()
        self.init_op = tf.global_variables_initializer()
    
    def _compute_accuracy(self, logits, labels):
        prediction = tf.argmax(logits, 1)
        equality = tf.equal(prediction, tf.argmax(labels, 1))
        accuracy = tf.reduce_mean(tf.cast(equality, tf.float32))
        return accuracy
    
    def _log_gradients(self, num_layers):
        gr = tf.get_default_graph()
        for i in range(num_layers):
            weight = gr.get_tensor_by_name('layer{}/kernel:0'.format(i+1))
            grad = tf.gradients(self.loss, weight)[0]
            mean = tf.reduce_mean(tf.abs(grad))
            tf.summary.scalar('mean_{}'.format(i+1), mean)
            tf.summary.histogram('histogram_{}'.format(i+1), grad)
            tf.summary.histogram('hist_weight_{}'.format(i+1), grad)            

In [0]:
def run_training(model, sub_folder, epochs=50, batch_size=32):
    with tf.Session() as sess:
        sess.run(model.init_op)
        train_writer = tf.summary.FileWriter(base_path + sub_folder, sess.graph)
        #total_batch = int(len(y_train)/batch_size)
        for epoch in range(epochs):
            image_batch, label_batch = get_batch(x_train, y_train, batch_size)
            l, _, acc = sess.run([model.loss, model.optimizer, model.accuracy], 
                            feed_dict={model.input_images: image_batch,
                                      model.labels: label_batch.reshape(-1,1)})
            if epoch%10 == 0:
                summary = sess.run(model.merged, feed_dict={model.input_images: image_batch, model.labels: label_batch.reshape(-1,1)})
                train_writer.add_summary(summary, epoch)
                print('Iteration {} of {}, loss: {:.3f}, train accuracy: {:.2f}%'.format(epoch, epochs, l, acc*100))

In [0]:
# main
if __name__ == "__main__":
    scenarios = ['sigmoid', 'relu', 'leaky relu']
    act_funcs = [tf.sigmoid, tf.nn.relu, tf.nn.leaky_relu]
    assert len(scenarios) == len(act_funcs)
    
    # collect the training data
    for i in range(len(scenarios)):
        tf.reset_default_graph()
        print('Running scenarios: {}'.format(scenarios[i]))
        model = Model(act_funcs[i], 6, 10)
        run_training(model, scenarios[i])

Running scenarios: sigmoid
Iteration 0 of 50, loss: 1.308, train accuracy: 53.12%
Iteration 10 of 50, loss: 0.635, train accuracy: 84.38%
Iteration 20 of 50, loss: 0.248, train accuracy: 93.75%
Iteration 30 of 50, loss: 0.656, train accuracy: 84.38%
Iteration 40 of 50, loss: 0.300, train accuracy: 93.75%
Running scenarios: relu
Iteration 0 of 50, loss: 0.171, train accuracy: 96.88%
Iteration 10 of 50, loss: 0.138, train accuracy: 93.75%
Iteration 20 of 50, loss: 0.199, train accuracy: 93.75%
Iteration 30 of 50, loss: 0.091, train accuracy: 100.00%
Iteration 40 of 50, loss: 0.108, train accuracy: 93.75%
Running scenarios: leaky relu
Iteration 0 of 50, loss: 0.414, train accuracy: 90.62%
Iteration 10 of 50, loss: 0.080, train accuracy: 100.00%
Iteration 20 of 50, loss: 0.185, train accuracy: 96.88%
Iteration 30 of 50, loss: 0.254, train accuracy: 87.50%
Iteration 40 of 50, loss: 0.097, train accuracy: 96.88%
