In [1]:
import os 
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from tensorflow.keras.datasets import mnist

physical_devices = tf.config.list_physical_devices('GPU')
print('Physical Devices', physical_devices)
tf.config.experimental.set_memory_growth(physical_devices[0], True)

Physical Devices [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


### Load MNIST Data

In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [3]:
x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') /  255.
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') /  255.

In [4]:
x_train.shape

(60000, 28, 28, 1)

### Model Subclassing

Here the idea is to use the Layer class, inherit it and build a model the same way we would do in Pytorch. Here, we inherit the Layer class in keras.layers. 
We can define the layers the same way we do with Pytorch. Then, we can create an object the class, and then treat them the same way we would treat other Keras Layers. 
<br>
The layers need to be defined in the __init__ method and the input to the layer and passing them to the layers should be done using the call method.

#### Simple CNN Block
We are going to declare a CNN block layer, which contains a conv layer, followed by a bn layer and then a Relu layer.

In [5]:
class CNNBlock(layers.Layer):
    def __init__(self, out_channels, kernel_size=3):
        super(CNNBlock, self).__init__()
        self.conv = layers.Conv2D(out_channels, kernel_size, padding='same')
        self.bn = layers.BatchNormalization()
    
    def call(self, input_tensor, training = False):
        x = self.conv(input_tensor)
        x = self.bn(x, training=training)
        x = tf.nn.relu(x)
        return x

Now we are going to use the Class inside a Sequential Model. Just add the CNNBlock object to the Sequential.

In [6]:
model = keras.Sequential([
    CNNBlock(32),
    CNNBlock(64),
    CNNBlock(128),
    layers.Flatten(),
    layers.Dense(10)
])

In [7]:
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = keras.optimizers.Adam(lr=0.001),
    metrics=['accuracy']
)

  super(Adam, self).__init__(name, **kwargs)


In [8]:
model.fit(x_train, y_train, batch_size=64, epochs=10, verbose=2)

Epoch 1/10
938/938 - 12s - loss: 0.5886 - accuracy: 0.9465 - 12s/epoch - 13ms/step
Epoch 2/10
938/938 - 10s - loss: 0.0886 - accuracy: 0.9819 - 10s/epoch - 11ms/step
Epoch 3/10
938/938 - 10s - loss: 0.0344 - accuracy: 0.9896 - 10s/epoch - 10ms/step
Epoch 4/10
938/938 - 10s - loss: 0.0260 - accuracy: 0.9919 - 10s/epoch - 10ms/step
Epoch 5/10
938/938 - 10s - loss: 0.0253 - accuracy: 0.9917 - 10s/epoch - 11ms/step
Epoch 6/10
938/938 - 10s - loss: 0.0224 - accuracy: 0.9923 - 10s/epoch - 11ms/step
Epoch 7/10
938/938 - 10s - loss: 0.0171 - accuracy: 0.9944 - 10s/epoch - 10ms/step
Epoch 8/10
938/938 - 10s - loss: 0.0158 - accuracy: 0.9949 - 10s/epoch - 11ms/step
Epoch 9/10
938/938 - 10s - loss: 0.0118 - accuracy: 0.9959 - 10s/epoch - 10ms/step
Epoch 10/10
938/938 - 10s - loss: 0.0111 - accuracy: 0.9963 - 10s/epoch - 11ms/step


<keras.callbacks.History at 0x7fc3900e00d0>

In [9]:
model.save('pretrained_model/')



INFO:tensorflow:Assets written to: pretrained_model/assets


INFO:tensorflow:Assets written to: pretrained_model/assets


#### Lets now move forward with creating Resnet Blocks

We'll make use of the CNN Block here. 

In [16]:
class ResBlock(layers.Layer):
    def __init__(self, channels):
        super(ResBlock, self).__init__()
        self.cnn1 = CNNBlock(channels[0])
        self.cnn2 = CNNBlock(channels[1])
        self.cnn3 = CNNBlock(channels[2])
        self.pooling = layers.MaxPooling2D()
        
        self.identity_mapping = layers.Conv2D(channels[1], 1, padding='same')
        
    def call(self, input_tensor, training=False):
        x = self.cnn1(input_tensor, training=training) #out => channels[0]
        x = self.cnn2(x, training=training) #out => channels[1]
        x = self.cnn3(
            x + self.identity_mapping(input_tensor), training=training
        ) #out => channels[2]
    
        return self.pooling(x)

Let's now use the ResBlock to create a simple ResNet

In [23]:
class Resnet(keras.Model):
    def __init__(self, num_classes=10):
        super(Resnet, self).__init__()
        self.block1 = ResBlock([32, 32, 64])
        self.block2 = ResBlock([128, 128, 256])
        self.block3 = ResBlock([128, 256, 512])
        
        self.pool = layers.GlobalAveragePooling2D()
        
        self.classifier = layers.Dense(10)
        
    def call(self, input_tensor, training=False):
        x = self.block1(input_tensor, training=training)
        x = self.block2(x, training=training)
        x = self.block3(x, training=training)
        x = self.pool(x) #(None, 512)
        return self.classifier(x)

In [24]:
model = Resnet()

In [25]:
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = keras.optimizers.Adam(lr=0.001),
    metrics=['accuracy']
)

In [26]:
model.fit(x_train, y_train, batch_size=64, epochs=10, verbose=2)

Epoch 1/10
938/938 - 24s - loss: 0.0820 - accuracy: 0.9758 - 24s/epoch - 26ms/step
Epoch 2/10
938/938 - 21s - loss: 0.0344 - accuracy: 0.9893 - 21s/epoch - 23ms/step
Epoch 3/10
938/938 - 21s - loss: 0.0282 - accuracy: 0.9914 - 21s/epoch - 23ms/step
Epoch 4/10
938/938 - 22s - loss: 0.0239 - accuracy: 0.9925 - 22s/epoch - 23ms/step
Epoch 5/10
938/938 - 21s - loss: 0.0221 - accuracy: 0.9932 - 21s/epoch - 23ms/step
Epoch 6/10
938/938 - 21s - loss: 0.0189 - accuracy: 0.9940 - 21s/epoch - 23ms/step
Epoch 7/10
938/938 - 21s - loss: 0.0158 - accuracy: 0.9948 - 21s/epoch - 23ms/step
Epoch 8/10
938/938 - 21s - loss: 0.0138 - accuracy: 0.9954 - 21s/epoch - 23ms/step
Epoch 9/10
938/938 - 22s - loss: 0.0120 - accuracy: 0.9962 - 22s/epoch - 23ms/step
Epoch 10/10
938/938 - 21s - loss: 0.0143 - accuracy: 0.9953 - 21s/epoch - 22ms/step


<keras.callbacks.History at 0x7f9282e6ee50>

In [27]:
model.summary()

Model: "resnet_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 res_block_3 (ResBlock)      multiple                  28640     
                                                                 
 res_block_4 (ResBlock)      multiple                  526976    
                                                                 
 res_block_5 (ResBlock)      multiple                  1839744   
                                                                 
 global_average_pooling2d_1   multiple                 0         
 (GlobalAveragePooling2D)                                        
                                                                 
 dense_4 (Dense)             multiple                  5130      
                                                                 
Total params: 2,400,490
Trainable params: 2,397,418
Non-trainable params: 3,072
____________________________________________