## **Setup**

In [1]:
# Import the necesary packages
import numpy as np

import tensorflow as tf

from sklearn.model_selection import train_test_split

In [2]:
config = {
    # Basic information
    "AUTHOR": "Kiernan",
    
    # Training params
    "LR": 0.001,
    "BATCH_SIZE": 32,
    "EPOCHS": 30,
    
    # Model params
    "CONV_LAYERS": 4,
    "N_FILTERS": 8,
    "KERNEL_SIZE": (3,3),
    "EMBEDDING_SIZE": 16
}

## **Loading Data**

In [3]:
(X, y), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

X = (X.astype(np.float32) - 127.5) / 127.5
X_test = (X_test.astype(np.float32) - 127.5) / 127.5

X = X.reshape((*X.shape, 1))
X_test = X_test.reshape((*X_test.shape, 1))

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=X_test.shape[0], shuffle=True)
print(f"Train data shape: {X_train.shape} Val data shape: {X_val.shape} Test data shape: {X_test.shape}")

Train data shape: (50000, 28, 28, 1) Val data shape: (10000, 28, 28, 1) Test data shape: (10000, 28, 28, 1)


## **Create Model**

In [4]:
def create_body(image_shape):
    inputs = tf.keras.layers.Input(shape=image_shape)
    x = inputs
    
    def conv_block(layer_inputs, n_filters, kernel_size):
        x = tf.keras.layers.Conv2D(n_filters, kernel_size, padding="same")(layer_inputs)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.ReLU()(x)
        return x
    
    for _ in range(config["CONV_LAYERS"]):
        x = conv_block(x, config["N_FILTERS"], config["KERNEL_SIZE"])
    
    x = tf.keras.layers.Conv2D(config["EMBEDDING_SIZE"], (1,1), padding="same")(x)
    outputs = tf.keras.layers.GlobalAveragePooling2D()(x)
    return tf.keras.models.Model(inputs=inputs, outputs=outputs, name="body")

body = create_body(X_train.shape[1:])
body.summary()

Model: "body"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 28, 28, 8)         80        
_________________________________________________________________
batch_normalization (BatchNo (None, 28, 28, 8)         32        
_________________________________________________________________
re_lu (ReLU)                 (None, 28, 28, 8)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 8)         584       
_________________________________________________________________
batch_normalization_1 (Batch (None, 28, 28, 8)         32        
_________________________________________________________________
re_lu_1 (ReLU)               (None, 28, 28, 8)         0      

In [5]:
def create_head(n_classes):
    inputs = tf.keras.layers.Input(shape=(config["EMBEDDING_SIZE"]))
    outputs = tf.keras.layers.Dense(n_classes, activation='softmax')(inputs)
    return tf.keras.models.Model(inputs=inputs, outputs=outputs, name="head")

head = create_head(10)
head.summary()

Model: "head"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 16)]              0         
_________________________________________________________________
dense (Dense)                (None, 10)                170       
Total params: 170
Trainable params: 170
Non-trainable params: 0
_________________________________________________________________


In [6]:
model = tf.keras.models.Sequential([
    body,
    head
],
    name="combinedModel"
)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
optimizer = tf.keras.optimizers.Adam(learning_rate=config['LR'])
metrics = [tf.keras.metrics.SparseCategoricalAccuracy(),]
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
model.summary()

Model: "combinedModel"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
body (Functional)            (None, 16)                2104      
_________________________________________________________________
head (Functional)            (None, 10)                170       
Total params: 2,274
Trainable params: 2,210
Non-trainable params: 64
_________________________________________________________________


## **Training**

In [None]:
stopper = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2)
hist = model.fit(X_train, y_train,
                 validation_data=(X_val, y_val),
                 batch_size=config["BATCH_SIZE"],
                 validation_batch_size=config["BATCH_SIZE"],
                 epochs=config["EPOCHS"],
                 callbacks=[stopper, lr_reducer])

Epoch 1/30
Epoch 2/30
Epoch 3/30