In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [2]:
(x_train, y_train), (x_test, y_test)  = mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1).astype('float32')/255.0
x_test  =  x_test.reshape(-1, 28, 28, 1).astype('float32')/255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


Subclassing

In [3]:
# CNN -> BatchNorm -> ReLU

class CNNBlock(layers.Layer):
  def __init__(self, out_channels, kernel_size=3):
    super(CNNBlock, self).__init__()
    self.conv = layers.Conv2D(out_channels,kernel_size, padding='same')
    self.bn = layers.BatchNormalization()
  
  # Forward in pytorch
  def call(self, input_tensor, training=False):
    x = self.conv(input_tensor)
    x = self.bn(x, training=training)
    x = keras.activations.relu(x)
    return x

model = keras.Sequential(
    [
     CNNBlock(32),
     CNNBlock(64),
     CNNBlock(128),
     layers.Flatten(),
     layers.Dense(10),
    ]
)


In [6]:
print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cnn_block (CNNBlock)         (None, 28, 28, 32)        448       
_________________________________________________________________
cnn_block_1 (CNNBlock)       (None, 28, 28, 64)        18752     
_________________________________________________________________
cnn_block_2 (CNNBlock)       (None, 28, 28, 128)       74368     
_________________________________________________________________
flatten (Flatten)            (None, 100352)            0         
_________________________________________________________________
dense (Dense)                (None, 10)                1003530   
Total params: 1,097,098
Trainable params: 1,096,650
Non-trainable params: 448
_________________________________________________________________
None


In [4]:
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

model.fit(x_train, y_train, batch_size=64, epochs=3, verbose=2)
model.evaluate(x_train, y_train, batch_size=64, verbose=2)

Epoch 1/3
938/938 - 711s - loss: 0.6157 - accuracy: 0.9444
Epoch 2/3
938/938 - 703s - loss: 0.0929 - accuracy: 0.9816
Epoch 3/3
938/938 - 701s - loss: 0.0341 - accuracy: 0.9896
938/938 - 168s - loss: 0.0217 - accuracy: 0.9931


[0.02168496884405613, 0.993149995803833]

ResNet

In [14]:
class ResBlock(layers.Layer):
  def __init__(self, channels):
    super(ResBlock, self).__init__()
    self.cnn1 = CNNBlock(channels[0])
    self.cnn2 = CNNBlock(channels[1])
    self.cnn3 = CNNBlock(channels[2])
    self.pooling = layers.MaxPooling2D()
    self.identity_mapping = layers.Conv2D(channels[1], 1, padding='same')
  
  def call(self, input_tensor, training=False):
    x = self.cnn1(input_tensor, training=training)
    x = self.cnn2(x, training=training)
    x = self.cnn3(
        x + self.identity_mapping(input_tensor), training=training
    )
    x = self.pooling(x)
    return x


# Models have all functionality of layers with additional
# built-in (fit, predict, evaluatiion, model.layers, summary, serialization)
class ResNet_like(keras.Model):
  def __init__(self, num_classes=10):
    super(ResNet_like, self).__init__()
    self.block1 = ResBlock([32, 32, 64])
    self.block2 = ResBlock([128, 128, 256])
    self.block3 = ResBlock([128, 256, 512])
    self.pool = layers.GlobalAveragePooling2D()
    self.classifier = layers.Dense(num_classes)
  
  def call(self, input_tensor, training=False):
    x = self.block1(input_tensor, training=training)
    x = self.block2(x, training=training)
    x = self.block3(x, training=training)
    x = self.pool(x)
    return self.classifier(x)
  
  # without this output shape will not be shown
  def model(self):
    x = keras.Input(shape=(28,28,1))
    return keras.Model(inputs=[x], outputs=self.call(x))


model = ResNet_like(num_classes=10)
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)
model.fit(x_train, y_train, batch_size=64, epochs=1, verbose=2)
model.evaluate(x_test, y_test, batch_size=64, verbose=2)
print(model.summary())

938/938 - 14s - loss: 0.0841 - accuracy: 0.9737
157/157 - 1s - loss: 0.0607 - accuracy: 0.9807
Model: "res_net_like_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
res_block_15 (ResBlock)      multiple                  28640     
_________________________________________________________________
res_block_16 (ResBlock)      multiple                  526976    
_________________________________________________________________
res_block_17 (ResBlock)      multiple                  1839744   
_________________________________________________________________
global_average_pooling2d_5 ( multiple                  0         
_________________________________________________________________
dense_6 (Dense)              multiple                  5130      
Total params: 2,400,490
Trainable params: 2,397,418
Non-trainable params: 3,072
_________________________________________________________________
None


In [15]:
print(model.model().summary())

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
res_block_15 (ResBlock)      (None, 14, 14, 64)        28640     
_________________________________________________________________
res_block_16 (ResBlock)      (None, 7, 7, 256)         526976    
_________________________________________________________________
res_block_17 (ResBlock)      (None, 3, 3, 512)         1839744   
_________________________________________________________________
global_average_pooling2d_5 ( (None, 512)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 10)                5130      
Total params: 2,400,490
Trainable params: 2,397,418
Non-trainable params: 3,072
________________________________________