<a href="https://colab.research.google.com/github/martinpius/Applied-Predictive-Modeling2/blob/master/Customized_fit_and_training_loop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)
try:
  COLAB = True
  import tensorflow as tf
  print(f"You are on CoLaB with tensorflow version: {tf.__version__}")
except Exception as e:
  print(f"{type(e)}: {e}\n...please load your drive.")
  COLAB = False
def time_fmt(t:float = 123.8173)->float:
  h = int(t / (60 * 60))
  m = int(t % (60 * 60)/ 60)
  s = int(t % 60)
  return f"{h}: {m:>02}: {s:>05.2f}"
print(f"....time testing....time testing...\n>>>time elapse: {time_fmt()}")

Mounted at /content/drive
You are on CoLaB with tensorflow version: 2.4.1
....time testing....time testing...
>>>time elapse: 0: 02: 03.00


In [3]:
#In this notebook we are going to build MLP from scratch
#We will write the customized fit function and training loop:

In [82]:
import sys, os, time
import tensorflow as tf
import numpy as np

In [83]:
#We start by defining our relu activation function:

In [84]:
class MyReLu(tf.keras.layers.Layer):
  def __init__(self,name = 'my_relu', *args, **kwargs):
    super(MyReLu, self).__init__(name = name, *args, **kwargs)
    
  def myrelu(self, inputs_tensor):
    return tf.math.maximum(0, inputs_tensor)

In [85]:
#Testing if it does what is intended to do
relu_custom = MyReLu()

In [86]:
relu_custom.myrelu(tf.random.normal(shape = (2,2), mean = -10, stddev= -4))

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[0., 0.],
       [0., 0.]], dtype=float32)>

In [87]:
#We define the dense layer from scratch using the following class: 

In [88]:
class DenseLayer(tf.keras.layers.Layer):
  def __init__(self, units, *args, **kwargs):
    super(DenseLayer, self).__init__(*args, **kwargs)
    self.units = units

  def build(self, inputs_dim):
    self.w = self.add_weight(shape = (inputs_dim[-1], self.units),
                             trainable = True, initializer = 'random_normal',
                             name = 'weights')
    self.b = self.add_weight(shape = (self.units, ), trainable = True,
                             initializer = 'zeros', name = 'bias')
    
  def call(self, inputs_tensor):
    x = tf.matmul(inputs_tensor, self.w) + self.b
    return x



In [89]:
#Define the model class: To build a simple multi-layers perceptron

In [90]:
class MLP(tf.keras.models.Model):
  def __init__(self,num_classes = 10, *args, **kwargs):
    super(MLP, self).__init__(*args, **kwargs)
    self.dense1 = DenseLayer(units = 128, name = 'dense_1')
    self.dense2 = DenseLayer(units = 64, name = 'dense_2')
    self.outputs = DenseLayer(units = 10, name = 'outputs')
    self.act = MyReLu()

  def call(self, inputs_tensor, training = False):
    x = self.dense1(inputs_tensor, training = training)
    x = self.act(x)
    x = self.dense2(x, training = training)
    x = self.act(x)
    x = self.outputs(x, training = training)
    return x

In [91]:
model = MLP()

In [92]:
#The customized fit for our simple mlp model:

In [157]:
class CustomFit(tf.keras.Model):
  def __init__(self, model):
    super(CustomFit, self).__init__()
    self.model = model

  def compile(self,optimizer,loss):
    super(CustomFit,self).compile()
    self.loss = loss
    self.optimizer = optimizer

  def train_step(self, data):
        x, y = data

        with tf.GradientTape() as tape:
            # Caclulate predictions
            y_pred = self.model(x, training=True)

            # Loss
            loss = self.loss(y, y_pred)

        # Gradients
        training_vars = self.trainable_variables
        gradients = tape.gradient(loss, training_vars)

        # Step with optimizer
        self.optimizer.apply_gradients(zip(gradients, training_vars))
        acc_metric.update_state(y, y_pred)

        return {"loss": loss, "accuracy": acc_metric.result()}

  def test_step(self, data):
    x, y = data
    y_pred = self.model(x, training=False)
    loss = self.loss(y, y_pred)
    acc_metric.update_state(y, y_pred)
    return {"loss": loss, "accuracy": acc_metric.result()}

In [150]:
#Loading and preprocess the data from keras:
def _get_data():
  (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
  x_train, x_test = x_train.astype(np.float32)/255.0, x_test.astype(np.float32)/255.0
  x_train, x_test = x_train.reshape(-1, 784), x_test.reshape(-1, 784)
  #y_train, y_test = tf.keras.utils.to_categorical(y_train, num_classes = 10), tf.keras.utils.to_categorical(y_test, num_classes = 10)
  return (x_train, y_train, x_test, y_test)

In [151]:
x_train, y_train, x_test, y_test = _get_data()

In [152]:
print(f"x_train_shape: {x_train.shape}, y_train_shape: {y_train.shape}\nx_test_shape: {x_test.shape}, y_test_shape: {y_test.shape}")

x_train_shape: (60000, 784), y_train_shape: (60000,)
x_test_shape: (10000, 784), y_test_shape: (10000,)


In [153]:
#We can now train our model using the above data as follows:

In [158]:
tic = time.time()
acc_metric = tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy")

training = CustomFit(model)
training.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=3e-4),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
)

training.fit(x_train, y_train, batch_size=64, epochs=2)
training.evaluate(x_test, y_test, batch_size=64)
toc = time.time()
print(f"\nTotal time for traoning and evaluation without GPU: {time_fmt(toc - tic)}")

Epoch 1/2
Epoch 2/2

Total time for traoning and evaluation without GPU: 0: 00: 04.00
