<a href="https://colab.research.google.com/github/martinpius/Behind-Keras-Layers-and-Models/blob/main/Building%2C_training_and_saving_tensorflow_keras_model_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)
try:
  COLAB = True
  import tensorflow as tf
  print(f"You are on CoLab with tensorflow version: {tf.__version__}")
except Exception as e:
  print(f"{type(e)}: {e}/n...please load your drive...")
  COLAB = False
def time_fmt(t:float = 123.728)->float:
  h = int(t / (60 * 60))
  m = int(t % (60 * 60) / 60)
  s = int(t % 60)
  return f"{h}: {m:>02}: {s:>05.2f}"
print(f"...time testing....\n<<<time elapse: {time_fmt()}>>>")

Mounted at /content/drive
You are on CoLab with tensorflow version: 2.4.1
...time testing....
<<<time elapse: 0: 02: 03.00>>>


In [92]:
#We construct our model from scratch as follows:
import time, os
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub


In [None]:
#Defining the activation functions (relu and softmax)
class MyActivation(tf.keras.layers.Layer):
  def __init__(self, *args, **kwargs):
    super(MyActivation, self).__init__(*args, **kwargs)
  def my_relu(self, inputs_tensor):
    return tf.math.maximum(inputs_tensor, 0)


In [None]:
class DenseLayer(tf.keras.layers.Layer):
  '''This class construct our dense layer'''
  def __init__(self, units, *args, **kwargs):
    super(DenseLayer, self).__init__(*args, **kwargs)
    self.units = units
  def build(self, inputs_shape):
    self.w = self.add_weight(shape = (inputs_shape[-1], self.units),
                             name = 'weights', initializer = 'random_normal',
                             trainable = True)
    self.b = self.add_weight(shape = (self.units,), name = 'bias',
                             initializer = 'zeros',
                             trainable = True)
  def call(self, inputs_tensor):
    x = tf.matmul(inputs_tensor, self.w) + self.b
    return x

In [None]:
class MyMLP(tf.keras.models.Model):
  '''This class is responsible to build our 
  simple multilayer perceptron'''
  def __init__(self, num_classes = 10, *args, **kwargs):
    super(MyMLP, self).__init__(*args, **kwargs)
    self.dense1 = DenseLayer(128, name = 'dense1')
    self.dense2 = DenseLayer(64, name = 'dense2')
    self.outputs = DenseLayer(10, name = 'outputs')
    self.act = MyActivation()

  def call(self, inputs_tensor, training = False):
    x = self.dense1(inputs_tensor, training = training)
    x = self.act(x)
    x = self.dense2(x, training = training)
    x = self.act(x)
    x = self.outputs(x, training = training)
    return tf.nn.softmax(x)


In [None]:
model = MyMLP(name = 'MLP')

In [None]:
#Get the data from keras
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train.reshape(-1, 28*28).astype(np.float32)/255.0, x_test.reshape(-1,28*28).astype(np.float32)/255.0
y_train, y_test = tf.keras.utils.to_categorical(y_train, num_classes = 10), tf.keras.utils.to_categorical(y_test, num_classes = 10)
print(f"x_train shape: {x_train.shape}, x_test shape: {x_test.shape}\ny_train_shape: {y_train.shape}, y_test shape: {y_test.shape}")

x_train shape: (60000, 784), x_test shape: (10000, 784)
y_train_shape: (60000, 10), y_test shape: (10000, 10)


In [None]:
#Convert to tensorflow data:
BATCH_SIZE, EPOCHS, BUFFER = 64, 20, len(x_train)
train_data, test_data = tf.data.Dataset.from_tensor_slices((x_train, y_train)), tf.data.Dataset.from_tensor_slices((x_test, y_test))
train_data = train_data.shuffle(BUFFER).batch(BATCH_SIZE, drop_remainder = True)
test_data = test_data.batch(BATCH_SIZE, drop_remainder = True)
x_train_sample_batch, y_train_sample_batch = next(iter(train_data))
print(f"{x_train_sample_batch.shape}, {y_train_sample_batch.shape}")

(64, 784), (64, 10)


In [None]:
#The train step from scratch:

In [None]:
tic = time.time()
optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001)
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits = False)
train_metric = tf.keras.metrics.CategoricalAccuracy()
val_metric = tf.keras.metrics.CategoricalAccuracy()
for epoch in range(EPOCHS):
  print(f"...the start of epoch: {epoch + 1}...\ntraining...\nplease wait for a moment....")
  for (step, (x_train_batch, y_train_batch)) in enumerate(train_data):
    with tf.GradientTape() as tape:
      preds = model(x_train_batch, training = True)
      train_loss = loss_fn(y_train_batch, preds)
    grads = tape.gradient(train_loss, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    train_metric.update_state(y_train_batch, preds)
    train_acc = train_metric.result()
    train_metric.reset_states()
    if step % 200 == 0:
      print(f"epoch: {epoch + 1}, train accuracy: {float(train_acc):.4f}")
      print(f"batch number: {step}, train loss: {float(train_loss):.4f}")
  for (step, (x_val_batch, y_val_batch)) in enumerate(test_data):
    preds = model(x_val_batch, training = False)
    val_loss = loss_fn(y_val_batch, preds)
    val_metric.update_state(y_val_batch, preds)
    val_acc = val_metric.result()
    val_metric.reset_states()
    if step % 200 == 0:
      print(f"epoch: {epoch + 1}, validation accuracy: {float(val_acc):.4f}")
      print(f"batch number: {step}, validation loss: {float(val_loss):.4f}")
print(f"{model.summary()}")
toc = time.time()
print(f"\nthis model takes a total time of: {time_fmt(toc - tic)} to train and evaluate")

#saving the model:

...the start of epoch: 1...
training...
please wait for a moment....
epoch: 1, train accuracy: 0.1250
batch number: 0, train loss: 2.2870
epoch: 1, train accuracy: 0.8281
batch number: 200, train loss: 0.6822
epoch: 1, train accuracy: 0.9062
batch number: 400, train loss: 0.2579
epoch: 1, train accuracy: 0.8594
batch number: 600, train loss: 0.7246
epoch: 1, train accuracy: 0.9531
batch number: 800, train loss: 0.2144
epoch: 1, validation accuracy: 0.9531
batch number: 0, validation loss: 0.2051
...the start of epoch: 2...
training...
please wait for a moment....
epoch: 2, train accuracy: 0.9375
batch number: 0, train loss: 0.1723
epoch: 2, train accuracy: 0.9219
batch number: 200, train loss: 0.2157
epoch: 2, train accuracy: 0.9062
batch number: 400, train loss: 0.2137
epoch: 2, train accuracy: 0.8906
batch number: 600, train loss: 0.4188
epoch: 2, train accuracy: 0.9062
batch number: 800, train loss: 0.2743
epoch: 2, validation accuracy: 0.9531
batch number: 0, validation loss: 0.233

In [93]:
class CnnBlock(tf.keras.layers.Layer):
  def __init__(self, num_filters, kernels = 3, *args, **kwargs):
    super(CnnBlock, self).__init__(self, *args, **kwargs)
    self.layer1 = tf.keras.layers.Conv2D(filters = num_filters, 
                                         padding = 'same',
                                         kernel_size = kernels,
                                         activation = 'relu', 
                                         kernel_initializer = 'random_normal')
    self.bn = tf.keras.layers.BatchNormalization()
  
  def call(self, inputs_tensor, training = False):
    x = self.layer1(inputs_tensor, training = training)
    x = self.bn(x)
    return x


In [94]:
class ResBlock(tf.keras.layers.Layer):
  def __init__(self, num_filters, *args, **kwargs):
    super(ResBlock, self).__init__(*args, **kwargs)
    self.block1 = CnnBlock(num_filters = num_filters[0])
    self.block2 = CnnBlock(num_filters = num_filters[1])
    self.block3 = CnnBlock(num_filters = num_filters[2])
    self.maxpool = tf.keras.layers.MaxPooling2D()
    self.id_map = tf.keras.layers.Conv2D(filters = num_filters[1], kernel_size = 3, padding = 'same', activation = 'relu')

  def call(self, inputs_tensor, training = False):
    x = self.block1(inputs_tensor, training = training)
    x = self.block2(x, training = training)
    x = self.block3(x + self.id_map(inputs_tensor), training = training)
    x = self.maxpool(x)
    return x

In [95]:
class MyModel(tf.keras.models.Model):
  def __init__(self, classes = 10, *args, **kwargs):
    super(MyModel, self).__init__(*args, **kwargs)
    self.resblock1 = ResBlock([32,64,128], name = 'resblock1')
    self.resblock2 = ResBlock([64,128,256], name = 'resblock2')
    self.resblock3 = ResBlock([128,256,512], name = 'resblock3')
    self.glb = tf.keras.layers.GlobalAveragePooling2D()
    self.dense1 = tf.keras.layers.Dense(units = 512, activation = 'relu')
    self.drp = tf.keras.layers.Dropout(rate = 0.5)
    self.dense2 = tf.keras.layers.Dense(units = 256, activation = 'relu')
    self.out = tf.keras.layers.Dense(units = 10, activation = 'softmax')
  
  def call(self, inputs_tensor, training = False):
    x = self.resblock1(inputs_tensor, training = training)
    x = self.resblock2(x, training = training)
    x = self.resblock3(x, training = training)
    x = self.glb(x)
    x = self.dense1(x, training = training)
    x = self.dense2(x, training = training)
    x = self.out(x, training = training)
    return x

In [96]:
model2 = MyModel()

In [97]:
model2.compile(loss = tf.keras.losses.CategoricalCrossentropy(from_logits = False),
               optimizer = 'Adam', metrics = ['accuracy'])

In [98]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train, x_test = x_train.astype(np.float32)/255.0, x_test.astype(np.float32)/255.0
y_train, y_test = tf.keras.utils.to_categorical(y_train, num_classes = 10), tf.keras.utils.to_categorical(y_test, num_classes = 10)
print(f"x_train shape: {x_train.shape}, x_test shape: {x_test.shape}\ny_train_shape: {y_train.shape}, y_test shape: {y_test.shape}")

x_train shape: (50000, 32, 32, 3), x_test shape: (10000, 32, 32, 3)
y_train_shape: (50000, 10), y_test shape: (10000, 10)


In [100]:
BUFFER = len(x_train)
BATCH_SIZE = 64
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.shuffle(BUFFER).batch(batch_size = BATCH_SIZE, drop_remainder = True)
test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_data = test_data.shuffle(BUFFER).batch(batch_size = BATCH_SIZE, drop_remainder = True)

In [101]:
model2.fit(train_data, epochs = 20,verbose = 2)
model2.evaluate(test_data, verbose = 2)

Epoch 1/20
781/781 - 25s - loss: 1.1943 - accuracy: 0.5693
Epoch 2/20
781/781 - 24s - loss: 0.7169 - accuracy: 0.7499
Epoch 3/20
781/781 - 24s - loss: 0.5197 - accuracy: 0.8207
Epoch 4/20
781/781 - 24s - loss: 0.3959 - accuracy: 0.8620
Epoch 5/20
781/781 - 24s - loss: 0.3006 - accuracy: 0.8966
Epoch 6/20
781/781 - 24s - loss: 0.2268 - accuracy: 0.9221
Epoch 7/20
781/781 - 24s - loss: 0.1684 - accuracy: 0.9417
Epoch 8/20
781/781 - 25s - loss: 0.1254 - accuracy: 0.9569
Epoch 9/20
781/781 - 25s - loss: 0.1065 - accuracy: 0.9625
Epoch 10/20
781/781 - 25s - loss: 0.0920 - accuracy: 0.9685
Epoch 11/20
781/781 - 25s - loss: 0.0757 - accuracy: 0.9737
Epoch 12/20
781/781 - 25s - loss: 0.0701 - accuracy: 0.9762
Epoch 13/20
781/781 - 25s - loss: 0.0642 - accuracy: 0.9785
Epoch 14/20
781/781 - 25s - loss: 0.0576 - accuracy: 0.9804
Epoch 15/20
781/781 - 25s - loss: 0.0530 - accuracy: 0.9819
Epoch 16/20
781/781 - 25s - loss: 0.0555 - accuracy: 0.9807
Epoch 17/20
781/781 - 25s - loss: 0.0508 - accura

[0.8441156148910522, 0.8355368375778198]