In this notebook I will train Residual Network 50 with MNIST. Let's get started. 
First of all import all necessary libraries.

In [None]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.data import Dataset
import tensorflow.keras as keras
from keras import layers
from keras.losses import BinaryCrossentropy
from keras.metrics import BinaryAccuracy,Accuracy
from keras.layers import Reshape,Input,Add,Lambda, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D
from keras.optimizers import SGD,RMSprop,Adam
from keras.layers import Layer,UpSampling2D,Conv2DTranspose,AveragePooling2D, MaxPooling2D, Dropout, GlobalMaxPooling2D, GlobalAveragePooling2D
from keras.models import Model,Sequential
from keras.callbacks import Callback,ModelCheckpoint,EarlyStopping,LearningRateScheduler
from keras.preprocessing import image
from keras.utils import layer_utils,to_categorical
from keras.utils.data_utils import get_file
from keras.applications.imagenet_utils import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pydot
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model
import keras.backend as K
K.set_image_data_format('channels_last')
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from keras.datasets import cifar100, mnist
%matplotlib inline
tf.keras.backend.set_floatx('float32')
%load_ext tensorboard
EPOCHS=50
BATCHSIZE=32
NCOLS=5
NROWS=2
NEXAM=10

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Load MNIST database, Expand Dimension of 3rd axis(batch_size,28,28,1). For well-training resizing image by 64x64. Create validation, test sets. Startup Image Generator, flow it. 


In [None]:
(tX,tY),(teX,teY) = mnist.load_data()
tX,teX=np.expand_dims(tX,axis=3),np.expand_dims(teX,axis=3)
resizer = Lambda(lambda x: tf.image.resize(x,(64,64)))
tX,teX = resizer(tX),resizer(teX)
valX,valY = teX[:4000],teY[:4000]
print(valY.shape)
train_data = ImageDataGenerator(rescale=1./255,
                                horizontal_flip=True,
                                vertical_flip=True,
                                rotation_range=40,
                                width_shift_range=0.2,
                                height_shift_range=0.2,
                                shear_range=0.2,
                                zoom_range=0.2)
val_data = ImageDataGenerator(rescale=1./255)
train_data.fit(tX)
val_data.fit(valX)
train_gen = train_data.flow(tX,tY,batch_size=BATCHSIZE,shuffle=True)
val_gen = val_data.flow(valX,valY,batch_size=BATCHSIZE-20,shuffle=True)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
(4000,)


Create Residual and Residual Convolutional layers. 

In [None]:
def identity(X,f,F):
  F1,F2,F3=F
  Xs = X
  X = Conv2D(filters=F1,kernel_size=(1,1),kernel_initializer='he_uniform',kernel_regularizer=keras.regularizers.l2(0.001))(X)
  X = BatchNormalization(axis=3)(X)
  X = Activation('relu')(X)

  X = Conv2D(filters=F2,kernel_size=(f,f),padding='same',kernel_initializer='he_uniform',kernel_regularizer=keras.regularizers.l2(0.001))(X)
  X = BatchNormalization(axis=3)(X)
  X = Activation('relu')(X)

  X = Conv2D(filters=F3,kernel_size=(1,1),kernel_initializer='he_uniform',kernel_regularizer=keras.regularizers.l2(0.001))(X)
  X = BatchNormalization(axis=3)(X)
  X = Add()([X,Xs])
  X = Activation('relu')(X)
  return X
def conv_block(X,f,F,s=2):
  F1,F2,F3=F
  Xs = X
  X = Conv2D(filters=F1,kernel_size=(1,1),strides=(s,s),kernel_initializer='he_uniform',kernel_regularizer=keras.regularizers.l2(0.001))(X)
  X = BatchNormalization(axis=3)(X)
  X = Activation('relu')(X)

  X = Conv2D(filters=F2,kernel_size=(f,f),padding='same',kernel_initializer='he_uniform',kernel_regularizer=keras.regularizers.l2(0.001))(X)
  X = BatchNormalization(axis=3)(X)
  X = Activation('relu')(X)

  X = Conv2D(filters=F3,kernel_size=(1,1),kernel_initializer='he_uniform',kernel_regularizer=keras.regularizers.l2(0.001))(X)
  X = BatchNormalization(axis=3)(X)
  
  Xs = Conv2D(filters=F3,kernel_size=(1,1),strides=(s,s),kernel_initializer='he_uniform',kernel_regularizer=keras.regularizers.l2(0.001))(Xs)
  Xs = BatchNormalization(axis=3)(Xs)
  X = Add()([X,Xs])
  X = Activation('relu')(X)
  
  return X

def conv_block(X,f,F,s=2):
  F1,F2,F3=F
  Xs = X
  X = Conv2D(filters=F1,kernel_size=(1,1),strides=(s,s),kernel_initializer='he_uniform',kernel_regularizer=keras.regularizers.l2(0.001))(X)
  X = BatchNormalization(axis=3)(X)
  X = Activation('relu')(X)

  X = Conv2D(filters=F2,kernel_size=(f,f),padding='same',kernel_initializer='he_uniform',kernel_regularizer=keras.regularizers.l2(0.001))(X)
  X = BatchNormalization(axis=3)(X)
  X = Activation('relu')(X)

  X = Conv2D(filters=F3,kernel_size=(1,1),kernel_initializer='he_uniform',kernel_regularizer=keras.regularizers.l2(0.001))(X)
  X = BatchNormalization(axis=3)(X)
  
  Xs = Conv2D(filters=F3,kernel_size=(1,1),strides=(s,s),kernel_initializer='he_uniform',kernel_regularizer=keras.regularizers.l2(0.001))(Xs)
  Xs = BatchNormalization(axis=3)(Xs)
  X = Add()([X,Xs])
  X = Activation('relu')(X)
  
  return X

In [None]:
class ResidualConv(Layer):
  def __init__(self, f,F,s,**kwargs):
        super(ResidualConv, self).__init__(**kwargs)
        self.F,self.f,self.s=F,f,s
  def build(self,inputs):
        F1,F2,F3=self.F
        self.conv1=Conv2D(filters=F1,kernel_size=(1,1),strides=(self.s,self.s))
        self.bn1=BatchNormalization(axis=3)
        self.conv2=Conv2D(filters=F2,kernel_size=(self.f,self.f),padding='same')
        self.bn2=BatchNormalization(axis=3)
        self.conv3=Conv2D(filters=F3,kernel_size=(1,1))
        self.bn3=BatchNormalization(axis=3)
        self.conv4=Conv2D(filters=F3,kernel_size=(1,1),strides=(self.s,self.s))
        self.bn4=BatchNormalization(axis=3)

  def call(self, x):
     xs = x
     x = self.conv1(x)
     x = self.bn1(x)
     x = tf.nn.relu(x)
    
     x = self.conv2(x)
     x = self.bn2(x)
     x = tf.nn.relu(x)

     x = self.conv3(x)
     x = self.bn3(x)
     xs = self.conv4(xs)
     xs = self.bn4(xs)
     x+=xs
     
     x=tf.nn.relu(x)
     return x
class Residual(Layer):
  def __init__(self, f,F,**kwargs):
        super(Residual, self).__init__(**kwargs)
        self.F,self.f=F,f
  def build(self,inputs):
        F1,F2,F3=self.F
        self.conv1=Conv2D(filters=F1,kernel_size=(1,1))
        self.bn1=BatchNormalization(axis=3)
        self.conv2=Conv2D(filters=F2,kernel_size=(self.f,self.f),padding='same')
        self.bn2=BatchNormalization(axis=3)
        self.conv3=Conv2D(filters=F3,kernel_size=(1,1))
        self.bn3=BatchNormalization(axis=3)
  def call(self, x):
     xs = x
     x = self.conv1(x)
     x = self.bn1(x)
     x = tf.nn.relu(x)
     x = self.conv2(x)
     x = self.bn2(x)
     x = tf.nn.relu(x)
     x = self.conv3(x)
     x = self.bn3(x)
     x += xs
     x = tf.nn.relu(x)
     return x

class ZP(Layer):
  def __init__(self, pad,**kwargs):
        super(ZP, self).__init__(**kwargs)
        self.zp=ZeroPadding2D(pad)
  def call(self, x):
    x = self.zp(x)
    return x

Creating Residual Network 50 and training it on MNIST. I used 100 epochs, 32 batch size for training, 12 batch size for validation set.

In [None]:

model=Sequential(
      [ZP((3,3),input_shape=(64,64,1)),
       Conv2D(64,(7,7),strides=(2,2)),
       BatchNormalization(axis=3),
       Activation('relu'),
       MaxPooling2D(2,2),
       ResidualConv(3,[64,64,256],s=1),
       Residual(3,[64,64,256]),
       Residual(3,[64,64,256]),
       ResidualConv(3,[128,128,512],s=2),
       Residual(3,[128,128,512]),
       Residual(3,[128,128,512]),
       Residual(3,[128,128,512]),
       ResidualConv(3,[256,256,1024],s=2),
       Residual(3,[256,256,1024]),
       Residual(3,[256,256,1024]),
       Residual(3,[256,256,1024]),
       Residual(3,[256,256,1024]),
       Residual(3,[256,256,1024]),
       ResidualConv(3,[512,512,2048],s=2),
       Residual(3,[512,512,2048]),
       Residual(3,[512,512,2048]),
       AveragePooling2D(2,2),
       Flatten(),
       Dense(10,activation='softmax')])
model.compile(optimizer=Adam(lr=5e-4),loss='sparse_categorical_crossentropy',metrics=['acc'])
model.summary()
history = model.fit_generator(train_gen,
                              steps_per_epoch=len(tX)/BATCHSIZE,
                              epochs=EPOCHS,
                              verbose=1,
                              validation_data=val_gen,
                              validation_steps=len(valX)/(BATCHSIZE-20))



model.save_weights('/content/wab.h5')



Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
zp_7 (ZP)                    (None, 70, 70, 1)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 64)        3200      
_________________________________________________________

Plot some graphs of loss and acc


In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title("Model loss on training and val sets")
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title("Model acc on training and val sets")
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

Now get results of Test sets loss and accuracy.

In [None]:
test_loss,test_acc = model.evaluate(teX,teY,batch_size=(BATCHSIZE-10))
print(f"Test loss: {test_loss}\nTest acc: {test_acc*100}%")


NameError: ignored