In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [3]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [4]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [5]:
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

In [6]:
X_train.shape

(60000, 28, 28)

In [7]:
class CNNBlock(layers.Layer):
    def __init__(self, output_channels, kernel_size=3):
        super(CNNBlock, self).__init__()
        self.conv = layers.Conv2D(output_channels, kernel_size, padding='same')
        self.bn = layers.BatchNormalization()

    def call(self, input, training=False):
        x = self.conv(input)
        x = self.bn(x, training=training)
        x = tf.keras.activations.relu(x)
        return x

In [8]:
model = keras.Sequential(
    [
        layers.Input((28,28,1)),
        CNNBlock(32),
        layers.MaxPooling2D(),
        CNNBlock(64),
        layers.MaxPooling2D(),
        CNNBlock(128),
        layers.Flatten(),
        layers.Dense(10)
    ]
)

In [9]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 cnn_block (CNNBlock)        (None, 28, 28, 32)        448       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 32)       0         
 )                                                               
                                                                 
 cnn_block_1 (CNNBlock)      (None, 14, 14, 64)        18752     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 7, 7, 64)         0         
 2D)                                                             
                                                                 
 cnn_block_2 (CNNBlock)      (None, 7, 7, 128)         74368     
                                                                 
 flatten (Flatten)           (None, 6272)              0

In [10]:
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = keras.optimizers.Adam(learning_rate=0.001),
    metrics = ['accuracy']
)

In [11]:
model.fit(
    X_train, y_train,
    batch_size = 32,
    epochs = 5,
    validation_data = (X_test, y_test)
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1be3988df10>

### Building Resnet

In [12]:
class CNNBlock(layers.Layer):
    def __init__(self, output_channels, kernel_size=3):
        super(CNNBlock, self).__init__()
        self.conv = layers.Conv2D(output_channels, kernel_size, padding='same')
        self.bn = layers.BatchNormalization()

    def call(self, input, training=False):
        x = self.conv(input)
        x = self.bn(x, training=training)
        x = tf.keras.activations.relu(x)
        return x

In [13]:
class ResBlock(layers.Layer):
    def __init__(self, channels):
        super(ResBlock, self).__init__()
        self.channels = channels
        self.cnn1 = CNNBlock(channels[0])
        self.cnn2 = CNNBlock(channels[1])
        self.cnn3 = CNNBlock(channels[2])
        self.pooling = layers.MaxPooling2D()
        self.identity_mapping = layers.Conv2D(channels[1], 1, padding='same')

    def call(self, input_tensor, training=False):
        x = self.cnn1(input_tensor, training=training)
        x = self.cnn2(x, training=training)
        x = self.cnn3(x + self.identity_mapping(input_tensor), training=training)
        x = self.pooling(x)
        return x

In [17]:
class ResNet_Like(keras.Model):
    def __init__(self, num_classes=10):
        super(ResNet_Like, self).__init__()
        self.block1 = ResBlock([32, 32, 64])
        self.block2 = ResBlock([128, 128, 256])
        self.block3 = ResBlock([128, 256, 512])
        self.pool = layers.GlobalAveragePooling2D()
        self.classifier = layers.Dense(num_classes)

    def call(self, input_tensor, training=False):
        x = self.block1(input_tensor, training=training)
        x = self.block2(x, training=training)
        x = self.block3(x, training=training)
        x = self.pool(x, training=training)
        x = self.classifier(x)
        return x

    def model(self):
        x = keras.Input(shape=(28,28,1))
        return keras.Model(inputs=[x], outputs=self.call(x))

In [18]:
model = ResNet_Like().model()

In [19]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 res_block_3 (ResBlock)      (None, 14, 14, 64)        28640     
                                                                 
 res_block_4 (ResBlock)      (None, 7, 7, 256)         526976    
                                                                 
 res_block_5 (ResBlock)      (None, 3, 3, 512)         1839744   
                                                                 
 global_average_pooling2d_1   (None, 512)              0         
 (GlobalAveragePooling2D)                                        
                                                                 
 dense_2 (Dense)             (None, 10)                5130      
                                                             

In [20]:
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

In [21]:
model.fit(
    X_train, y_train,
    batch_size=32,
    epochs=10,
    validation_data=(X_test, y_test)
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1bebfcff9a0>