##### Copyright 2019 The TensorFlow Authors.

In [0]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Customizable Neural Networks

Let's dive into creating a neural network using all customization that it offers!

In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals

# Install TensorFlow
!pip install -q tensorflow-gpu==2.0.0-beta1

import tensorflow as tf
import numpy as np

import tensorflow_datasets as tfds
tfds.disable_progress_bar()

from tensorflow.nn import relu, softmax
from tensorflow.keras import Model
import matplotlib as mpl
import matplotlib.pyplot as plt

## Step 1 : Download and prepare the data
For this task, let's use the CIFAR-10 dataset created by Krizhevsky *et al*. It consists of the following 10 classes : "Airplane", "Automobile", "Bird", "Cat", "Deer", "Dog", "Frog",	"Horse", "Ship",	"Truck". Each image in the dataset is a 3x32x32 color image. The 3 here stands for the three color channels (RBG).

In [0]:
cifar_dataset = tfds.load('cifar10', as_supervised=True)

def cifar_preprocess(image, label):
  image = image/255
  return image, label

In [0]:
cifar_train = cifar_dataset['train'].map(cifar_preprocess).shuffle(10000).batch(128)
cifar_test = cifar_dataset['test'].map(cifar_preprocess).batch(128)

In [0]:
#Class Names
class_names = {
 0: 'Airplane',
 1: 'Automoble',
 2: 'Bird',
 3: 'Cat',
 4: 'Deer',
 5: 'Dog',
 6: 'Frog',
 7: 'Horse',
 8: 'Ship',
 9: 'Truck'
}

Let's preview a couple images:

In [0]:
def display(image, label):
  plt.figure(figsize=(4, 4))
  plt.title(label)
  plt.imshow(image)
  plt.axis('off')
  plt.show()

In [0]:
for image, label in cifar_train.take(2):
  display(image[0], class_names[label[0].numpy()])

## Step 2 : Create the custom neural network
For this tutorial, you will be working with the Sub-Classing API of TensorFlow. In-order to show the flexibility and cutomisation capabilties of TensorFlow2.0, let's break this up into multiple parts. The first step is to create your own Dense Layer, with an activation, a bias, and a kernal initilization.


### Activation function
You can create our own activation using two methods in TensorFlow, the first is by extending the Layer class, and the second is by creating a simple python function to perform the task. Since you will see an example of extending a layer further in the tutorial, let's write a simple pythonic function. Let's create a LeakyReLU activation for our network.

TODO:
  Write your own LeakyReLU

In [0]:
def my_leaky_relu(x, alpha = 0.01): 
    cond = tf.less_equal(x, x*0)
    leaky = 0.01*x
    return tf.where(cond, leaky, x)

### Weight initialisation
You can define and use your own weight and bias initialisation, in the same way you created your activation function, by using a simple python function. Here, let's initialise the weights in the dense layer using He's Normal Initialisation.

In [0]:
def my_he_normal_init(shape):
  stddev = tf.pow(2/shape[0], 0.5)
  return tf.Variable(initial_value = tf.random.normal(shape=shape, mean=0,
                                                      stddev=stddev,
                                                      dtype=tf.float32))

### Dense layer
Let's now create the dense layer. This is a  [simple example](https://www.tensorflow.org/beta/tutorials/eager/custom_layers) of how to do this. The best way to implement your own layer is extending the tf.keras.Layer class. Let's go a step further and add some more flexibility to the layer being created.

TODO :

Parameterise the layer with the following:
*   The number of output units.
*   The kernal initialisation : Here, you can use the initialisation written above, or the default kernal initialisation in TensorFlow (Glorot).
*   If a bias is needed.
*   If an activation is required.



In [0]:
class MyDenseLayer(tf.keras.layers.Layer):
  def __init__(self, num_outputs, kernel_init = None, use_bias = False,
               use_activation = False):
    super(MyDenseLayer, self).__init__()
    self.num_outputs = num_outputs
    self.use_bias = use_bias
    self.kernel_init = kernel_init
    self.use_activation = use_activation

  def build(self, input_shape):
    #Use the kernal initialisation written above
    if self.kernel_init:
      self.kernel = my_he_normal_init(shape= [int(input_shape[-1]), 
                                           self.num_outputs])
    # Use the default initialisation
    else:
      self.kernel = self.add_variable(shape=[int(input_shape[-1]), 
                                           self.num_outputs])
    if self.use_bias:
      self.bias = my_he_normal_init(shape = [self.num_outputs])
    
  def call(self, input):
    output = tf.matmul(input, self.kernel)
    if self.use_bias:
      output = output + self.bias
    if self.use_activation:
      output = my_leaky_relu(output)

    return output

### Define the model
TensorFlow allows you to use the custom functions along with the inbuilt ones.

In [0]:
class MyModel(Model):
  def __init__(self):
    super(MyModel, self).__init__()
    # A 2D Convolution layer followed by a Pooling Layer
    self.conv1 = tf.keras.layers.Conv2D(32, 3)
    self.pool = tf.keras.layers.MaxPooling2D(pool_size = (2, 2))
    self.flatten = tf.keras.layers.Flatten()
    # First use your dense layer with he normal initialisation, a bias and leaky relu activation.
    self.dense1 = MyDenseLayer(128, kernel_init='he', use_bias=True,
                               use_activation=True)
    self.do = tf.keras.layers.Dropout(0.3)
    # Again the dense layer, however, here using the default initialisation, no bias and no activation.
    self.dense2 = MyDenseLayer(10)
    #BatchNormalisation
    self.bn = tf.keras.layers.BatchNormalization()

  def call(self, x, training = True):
    x = self.pool(self.conv1(x))
    x = self.bn(x, training = training)
    x = relu(x)
    x = self.flatten(x)
    x = self.dense1(x)
    x = self.do(x)
    x = self.dense2(x)
    x = softmax(x)
    return x

## Step 3 : Define the loss function
You can also write your own custom loss function, here are the mechanics of how to write your very own "sparse_categorical_crossentropy". 

Since the task here is performing multi-class classification, you will need to find the loss for each class label and sum the result. 

The expression of categorical_crossentropy for a single datapoint is : 
$$loss = -\sum_{j=0}^{n}y_{true, j}*\log(p_{pred, j})$$

where

1.   n : Number of classed
2.   $y_{true}$ : True label of the data-point
3.   $p_{pred}$ : Prediction probability vector for the data-point.  



In [0]:
@tf.function
def myLoss(logits, labels, n_classes=10):  
  labels = tf.squeeze(labels)
  labels = tf.one_hot(labels, n_classes, dtype=tf.float32)
  return tf.reduce_mean(-tf.reduce_sum(labels * tf.math.log(logits), axis=1))

Define the metric to be used, which in the case of this tutorial is accuracy.

In [0]:
test_accuracy_metric = tf.keras.metrics.SparseCategoricalAccuracy()
test_loss_function = tf.keras.metrics.SparseCategoricalCrossentropy()

Create the model and choose the optimiser

In [0]:
# Create the Model
model =  MyModel()
optimizer = tf.keras.optimizers.Adam()

## Training the model

You already know how to use tf.GradientTape to train the model. Check out this [tutorial](https://www.tensorflow.org/beta/tutorials/eager/custom_training_walkthrough) for a refresher.

In [0]:
@tf.function
def train_on_batch(model, images, labels):
  with tf.GradientTape() as tape:
    logits = model(images)
    loss_value = myLoss(logits, labels)
  grads = tape.gradient(loss_value, model.trainable_variables)
  optimizer.apply_gradients(zip(grads, model.trainable_variables))
  return loss_value

In [0]:
def test_step(test_data):
  test_loss_function.reset_states()
  test_accuracy_metric.reset_states()
  for (batch, (images, labels)) in enumerate(test_data):
    test_predictions = model(images)
    test_loss = test_loss_function(y_true=labels, y_pred=test_predictions)
    test_acc = test_accuracy_metric(labels, test_predictions)
  return test_loss, test_acc*100

In [0]:
for epoch in range(5):
  print('Epoch', epoch + 1)
  for (batch, (images, labels)) in enumerate(cifar_train):
    loss_value = train_on_batch(model, images, labels)
    step = optimizer.iterations.numpy()
    if step % 100 == 0:
      print('Step %d\tLoss: %.4f' % (step, loss_value))

  test_loss, test_acc = test_step(cifar_test)
  print(f'Test accuracy : {test_acc:.2f}\nTest loss : {test_loss:.2f}\n\n')
  
print ("Finished Training")

## Testing the model

In [0]:
test_loss, test_acc = test_step(cifar_test)
print(f'Test accuracy : {test_acc:.2f}\nTest loss : {test_loss:.2f}\n\n')

## Next steps
Now that you know how to create such customizable neural networks, go ahead and implement complex architectures. You may try out different layers, activations or even loss functions.