In [1]:
# https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/TensorFlow/Basics/tutorial8_keras_subclassing.py

import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

# os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "1"
# os.environ["MKL_DEBUG_CPU_TYPE"] = "5"
# os.environ["KMP_BLOCKTIME"] = "1"
# os.environ["KMP_AFFINITY"] = "granularity=fine,compact,1,0"
# os.environ["OMP_NUM_THREADS"] = "<num_threads>"

gpus = tf.config.experimental.list_physical_devices("GPU")
if gpus:
      for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            physical_devices = tf.config.list_physical_devices("GPU")
            tf.config.experimental.set_memory_growth(physical_devices[0], True)
else:
      print("No GPU device found")

No GPU device found


In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# the extra 1 at the end is "for the number of channels"
x_train = x_train.reshape(-1, 28, 28, 1).astype("float32") / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype("float32") / 255.0

In [3]:
# CNN -> BatchNorm -> ReLU (common structure)
# x10 (a lot of code to write!)

# kinda like pytorch
# so instead of writing the same 3 lines multiple times,
# you can use a class instead and make your code look cleaner.
class CNNBlock(layers.Layer):
    def __init__(self, out_channels, kernel_size=3):
        super(CNNBlock, self).__init__()
        # these layers aren't meant to be called outside __init__()
        self.conv = layers.Conv2D(out_channels, kernel_size, padding="same")
        self.bn = layers.BatchNormalization()
    
    # default training is false, because of batch norm
    def call(self, input_tensor, training=False):
        # remember, input_tensor is like a layer to be passed into this conv below
        x = self.conv(input_tensor)
        # print(x.shape)
        x = self.bn(x, training=training)
        x = tf.nn.relu(x)
        # return a tensor that was passed through these layers
        return x

In [4]:
model = keras.Sequential(
    [
        CNNBlock(32),
        CNNBlock(64),
        CNNBlock(128),
        layers.Flatten(),
        layers.Dense(10)
    ]
)

In [5]:
# a residual network (resnet) is a deep learning model in which the
# weight layers learn residual functions with reference to the layer
# inputs.
# overcomes the vanishing gradient problem
# this class is meant to be a shoddy representation of a typical resnet
class ResBlock(layers.Layer):
    # channels should be a list
    def __init__(self, channels):
        super(ResBlock, self).__init__()
        self.channels = channels
        # use the class from above to add more depth
        self.cnn1 = CNNBlock(channels[0], 3)
        self.cnn2 = CNNBlock(channels[1], 3)
        self.cnn3 = CNNBlock(channels[2], 3)
        self.pooling = layers.MaxPooling2D()
        # identity mapping to be used later
        self.identity_mapping = layers.Conv2D(channels[1], 3, padding="same")
        
    def call(self, input_tensor, training=False):
        x = self.cnn1(input_tensor, training=training)
        x = self.cnn2(x, training=training)
        # a resnet has skip connections that perform identity mappings,
        # merged with the layer outputs by addition.
        x = self.cnn3(x + self.identity_mapping(input_tensor), training=training)
        x = self.pooling(x)
        return x

In [6]:
# inheriting from Model gives us more flexibility than Layer has.
# can do the same things in Layer, but you can do much more in Model.
class ResNet_Like(keras.Model):
    def __init__(self, num_classes=10):
        super(ResNet_Like, self).__init__()
        # use the class from above to add more depth
        # remember, these lists are channels
        self.block1 = ResBlock([32, 32, 64])
        self.block2 = ResBlock([128, 128, 256])
        self.block3 = ResBlock([128, 256, 512])
        # y'know, pools crap.
        # kinda like flatten
        self.pool = layers.GlobalAveragePooling2D()
        self.classifier = layers.Dense(num_classes)
        
    def call(self, input_tensor, training=False):
        x = self.block1(input_tensor, training=training)
        x = self.block2(x, training=training)
        x = self.block3(x, training=training)
        x = self.pool(x, training=training)
        x = self.classifier(x)
        return x
    
    # overwrite the model call to modify the output shape column
    def model(self):
        x = keras.Input(shape=(28, 28, 1))
        return keras.Model(inputs=[x], outputs=self.call(x))

In [8]:
model = ResNet_Like().model()
base_input = model.layers[0].input
base_output = model.layers[2].output
output = layers.Dense(10)(layers.Flatten()(base_output))
model = keras.Model(base_input, output)

model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"]
)

model.fit(x_train, y_train, batch_size=64, epochs=1, verbose=1)
print(model.model().summary())
model.evaluate(x_test, y_test, batch_size=64, verbose=1)
model.save("ds/pretrained1")



KeyboardInterrupt: 