In [1]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [3]:
class ResidualBlock(keras.layers.Layer):
    def __init__(
        self,
        filters,
        name,
        output_fn=tf.nn.relu,
        kernel=3,
        stride=1,
        trainable=True,
        dtype=None,
        dynamic=False,
        **kwargs,
    ):
        super().__init__(
            trainable=trainable, name=name, dtype=dtype, dynamic=dynamic, **kwargs
        )

        # * ShortCut
        self.sc_conv2d = layers.Conv2D(
            filters,
            1,
            strides=stride,
            kernel_initializer="he_normal",
            use_bias=False,
            name=name + "_sc_conv",
        )
        self.sc_bn = layers.BatchNormalization(name=name + "_sc_bn")

        # * First ConvBlock
        self.conv2d1 = layers.Conv2D(
            filters,
            kernel,
            padding="same",
            strides=stride,
            kernel_initializer="he_normal",
            name=name + "_1_conv",
        )
        self.bn1 = layers.BatchNormalization(name=name + "_1_bn")
        self.actv1 = layers.Activation(tf.nn.relu, name=name + "_1_relu")

        # * Second Conv Block
        self.conv2d2 = layers.Conv2D(
            filters,
            kernel,
            padding="same",
            strides=stride,
            kernel_initializer="he_normal",
            name=name + "_2_conv",
        )
        self.bn2 = layers.BatchNormalization(name=name + "_2_bn")

        # * shortcut (local skip connection)
        self.add = layers.Add(name=name + "_add")

        # * output
        self.actv2 = layers.Activation(output_fn, name=name + "_2_relu")

    def call(self, inputs, *args, **kwargs):
        # * shortcut
        shortcut = self.sc_conv2d(inputs)
        shortcut = self.sc_bn(shortcut)

        # * First ConvBlock
        x = self.conv2d1(inputs)
        x = self.bn1(x)
        x = self.actv1(x)

        # * Second ConvBlock
        x = self.conv2d2(x)
        x = self.bn2(x)

        # * Add Residual Conncetion
        x = self.add([shortcut, x])
        # * Activation of ConvBlock
        output = self.actv2(x)
        return output

In [4]:

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28, 28, 1))
test_images = test_images.reshape((10000, 28, 28, 1))

train_images, test_images = train_images / 255.0, test_images / 255.0

model = keras.Sequential(
    [
        layers.Input((28, 28, 1)),

        ResidualBlock(32, "res1"),
        layers.MaxPooling2D(2),

        ResidualBlock(64, "res2"),
        layers.MaxPooling2D(2),

        ResidualBlock(64, "res3"),

        layers.Flatten(),
        layers.Dense(64, activation="relu"),
        layers.Dense(10, activation="softmax"),
    ]
)
model.summary()
model.compile(
    optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)

model.fit(train_images, train_labels, epochs=5)
model.evaluate(test_images, test_labels)

2021-12-29 12:25:22.657589: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2021-12-29 12:25:22.657688: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 res1 (ResidualBlock)        (None, 28, 28, 32)        9984      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 32)       0         
 )                                                               
                                                                 
 res2 (ResidualBlock)        (None, 14, 14, 64)        58240     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 7, 7, 64)         0         
 2D)                                                             
                                                                 
 res3 (ResidualBlock)        (None, 7, 7, 64)          78720     
                                             

2021-12-29 12:25:23.077146: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2021-12-29 12:25:23.407486: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/5
Epoch 3/5

KeyboardInterrupt: 