In [1]:
# python standard library imports
from typing import Self, Any
from pathlib import Path

In [2]:
# model building imports
from keras import Model, Sequential, Input
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout, Dense
from keras.ops import add

In [3]:
# model training imports
from keras.optimizers import SGD
from keras.losses import CategoricalCrossentropy
from keras.metrics import CategoricalAccuracy, AUC, F1Score
from keras.callbacks import ModelCheckpoint, CSVLogger, LearningRateScheduler

In [4]:
# augmentation operations
from keras.layers import RandomBrightness, RandomFlip, RandomRotation
from keras.layers import Pipeline

In [5]:
# custom modules imports
from src.utils import load_cifar10_sample

In [6]:
# CIFAR-10

# specify input_shape and number of classes
input_shape = (32, 32, 3) # RGB
n_classes = 10

# 0. airplane
# 1. car
# 2. bird
# 3. cat
# 4. deer
# 5. dog
# 6. frog
# 7. horse
# 8. ship
# 9. truck

In [7]:
value_range = (0.0, 1.0)

augmentation_layer = Pipeline(
    [
        RandomBrightness(factor=0.1, value_range=value_range),
        RandomFlip(),
        RandomRotation(factor=0.1, fill_mode="reflect")
    ],
    name="augmentation_layer"
)

In [8]:
class MyTinyCNN(Model):
    """
    MyTinyCNN class, inherets from keras' Model class
    """

    def __init__(self: Self, activation: str = "relu") -> None:
        """
        Initialization
        """

        super().__init__(name="my_tiny_oo_cnn")

        self.augmentation_layer = augmentation_layer

        self.conv_layer_1 = Conv2D(
            filters=3 * 8,
            kernel_size=(3, 3),
            activation=activation,
            name="conv_layer_1"
        )
        self.max_pool_layer_1 = MaxPooling2D(
            pool_size=(2, 2),
            name="max_pool_layer_1"
        )

        # exemplify non-sequential nature of computation possible with
        # the functional and object-oriented methods
        self.conv_layer_2l = Conv2D(
            filters=3 * 16,
            kernel_size=(3, 3),
            activation=activation,
            name="conv_layer_2l",
            padding="same"
        )
        self.conv_layer_2r = Conv2D(
            filters=3 * 16,
            kernel_size=(2, 2),
            activation=activation,
            name="conv_layer_2r",
            padding="same"
        )
        self.max_pool_layer_2 = MaxPooling2D(
            pool_size=(2, 2),
            name="max_pool_layer_2"
        )

        self.flatten_layer = Flatten(name="flatten_layer")
        self.dropout = Dropout(rate=0.3)
        self.dense_layer = Dense(
            n_classes,
            activation="softmax",
            name="classification_head"
        )

    def call(self: Self, inputs: Any) -> Any:
        """
        Forward call
        """

        x = self.augmentation_layer(inputs)


        x = self.conv_layer_1(x)
        x = self.max_pool_layer_1(x)

        # exemplify non-sequential nature of computation possible with
        # the functional and object-oriented methods
        x_l = self.conv_layer_2l(x)
        x_r = self.conv_layer_2r(x)
        x = add(x_l, x_r)
        x = self.max_pool_layer_2(x)

        x = self.flatten_layer(x)
        x = self.dropout(x)

        return  self.dense_layer(x)

In [9]:
# Train our regularized MyTinyCNN:

In [10]:
X_train, y_train, X_test, y_test = load_cifar10_sample(1024, 128)

In [11]:
epochs = 32
batch_size = 16

In [12]:
# add L2 weight decay to the optimizer directly, don't add a new loss term
model = MyTinyCNN()
optimizer = SGD(learning_rate=0.01, name="optimizer", weight_decay=0.01)
loss = CategoricalCrossentropy(name="loss")

In [13]:
# metrics
categorical_accuracy = CategoricalAccuracy(name="accuracy")
auc = AUC(name="auc")
f1_score = F1Score(average="macro", name="f1_score")
metrics = [categorical_accuracy, auc, f1_score]

In [14]:
# traces the computation
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

In [15]:
# What are callbacks?
root_dir_path = Path(".")
checkpoint_file_path = root_dir_path / "checkpoint.keras"
metrics_file_path = root_dir_path = root_dir_path / "metrics.csv"

checkpoint_callback = ModelCheckpoint(
    checkpoint_file_path,
    monitor="val_loss",
    verbose=0
)
metrics_callback = CSVLogger(metrics_file_path)

In [16]:
# What is a learning rate scheduler ?
def exp_decay_lr_scheduler(
    epoch: int,
    current_lr: float,
    factor: float = 0.95
) -> float:
    """
    Exponential decay learning rate scheduler
    """

    current_lr *= factor

    return current_lr

In [17]:
lr_scheduler_callback = LearningRateScheduler(exp_decay_lr_scheduler)

In [18]:
callbacks = [
    checkpoint_callback,
    metrics_callback,
    lr_scheduler_callback
]

In [19]:
# train the model
_ = model.fit(
    X_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_split=0.2,
    callbacks=callbacks,
    verbose=2
)

Epoch 1/32
52/52 - 1s - 18ms/step - accuracy: 0.1111 - auc: 0.5205 - f1_score: 0.0768 - loss: 2.3106 - val_accuracy: 0.1317 - val_auc: 0.5223 - val_f1_score: 0.0818 - val_loss: 2.3034 - learning_rate: 0.0095
Epoch 2/32
52/52 - 0s - 6ms/step - accuracy: 0.1392 - auc: 0.5492 - f1_score: 0.0855 - loss: 2.2909 - val_accuracy: 0.1220 - val_auc: 0.5476 - val_f1_score: 0.0791 - val_loss: 2.2923 - learning_rate: 0.0090
Epoch 3/32
52/52 - 0s - 5ms/step - accuracy: 0.1392 - auc: 0.5675 - f1_score: 0.0821 - loss: 2.2818 - val_accuracy: 0.1171 - val_auc: 0.5484 - val_f1_score: 0.0703 - val_loss: 2.2862 - learning_rate: 0.0086
Epoch 4/32
52/52 - 0s - 5ms/step - accuracy: 0.1258 - auc: 0.5868 - f1_score: 0.0789 - loss: 2.2717 - val_accuracy: 0.1220 - val_auc: 0.5714 - val_f1_score: 0.0545 - val_loss: 2.2787 - learning_rate: 0.0081
Epoch 5/32
52/52 - 0s - 5ms/step - accuracy: 0.1502 - auc: 0.6080 - f1_score: 0.0890 - loss: 2.2553 - val_accuracy: 0.0976 - val_auc: 0.5822 - val_f1_score: 0.0680 - val_l

In [20]:
# evaluate on the test set
model.evaluate(
    X_test,
    y_test,
    batch_size=batch_size,
    return_dict=True,
    verbose=0
)

{'accuracy': 0.203125,
 'auc': 0.7194247841835022,
 'f1_score': 0.16481611132621765,
 'loss': 2.089552879333496}

In [None]:
# What is label smoothing?

In [21]:
# Next class:
# Real data, real models, real world