## Set up

In [1]:
import pickle
from dataclasses import dataclass, asdict
from pathlib import Path
from pprint import pprint
from typing import ClassVar

import keras as tfk
import numpy as np
import tensorflow as tf
from keras import layers as tfkl

2024-11-16 19:11:41.624499: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-16 19:11:41.625341: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-16 19:11:41.667919: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-16 19:11:41.824181: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
print(f"Keras version is {tfk.__version__}")

In [None]:
AUGMENTED_DIR = Path().absolute().parent / "data" / "augmented"
OUTPUT_DIR = Path().absolute().parent / "submission"
# If the data is not there, then we're running in Kaggle
if not AUGMENTED_DIR.exists():
    AUGMENTED_DIR = Path("/kaggle/input/an2dl-homework-1-augmented/augmented")
    OUTPUT_DIR = Path()

TRAIN_TFDS_DIR = AUGMENTED_DIR / "train"
VAL_TFDS_DIR = AUGMENTED_DIR / "val"

print(TRAIN_TFDS_DIR)
print(VAL_TFDS_DIR)
print(OUTPUT_DIR)

In [None]:
SEED = 42
KERAS_VERBOSITY = 2
BATCH_SIZE = 128
LOSS = "categorical_crossentropy"
METRICS = ["accuracy", "precision", "recall", "f1_score"]

In [None]:
tfk.utils.set_random_seed(SEED)

In [None]:
@dataclass
class Hyperparameters:
    # Model
    global_pooling_type: str = "avg"
    layer_units: tuple[int, ...] = (
        128,
        64,
    )
    activation: str = "silu"
    class_weight: ClassVar[dict[int, float]] = {
        0: 1.00,
        1: 1.00,
        2: 1.00,
        3: 1.00,
        4: 1.00,
        5: 1.00,
        6: 1.00,
        7: 1.00,
    }
    ## Regularisation
    regularisations: tuple[tuple[float, float]] = (
        (1e-3, 0.0),
        (0.0, 0.0),
    )
    dropout: float = 0.6
    noise_std: float = 0.075
    # Training
    optimiser: tfk.Optimizer = tfk.optimizers.AdamW
    learning_rate: float = 1e-4
    epochs: int = 50
    unfreeze_last: int = 70  # For fine-tuning
    ## Early stopping parameters
    es_patience: int = 5
    es_min_delta: float = 1e-3
    ## Learning rate schedule
    lr_patience: int = 20
    lr_min_delta: float = 1e-3
    lr_decay_factor: float = 0.1
    min_lr: float = 1e-8

hp = Hyperparameters()

### Load

In [None]:
train_dataset = tf.data.Dataset.load(str(TRAIN_TFDS_DIR)).batch(BATCH_SIZE)
val_dataset = tf.data.Dataset.load(str(VAL_TFDS_DIR)).batch(BATCH_SIZE)

train_dataset

## Model

In [None]:
def build(hp: Hyperparameters) -> tuple[tfk.Model, tfk.Model]:
    inputs = tfkl.Input((96, 96, 3))
    x = inputs

    x = tfkl.Rescaling(1.0/255.0)(x)
    x = tfkl.GaussianNoise(hp.noise_std)(x)
    x = tfkl.Rescaling(255.0)(x)

    # TODO Investigate changing `input_shape` to actual image shape
    # instead of resizing
    feature_extractor = tfk.applications.EfficientNetV2B3(
        input_shape=(96, 96, 3),
        include_top=False,
        weights="imagenet",
        pooling=hp.global_pooling_type,
    )
    feature_extractor.trainable = False
    x = feature_extractor(x)

    x = tfkl.BatchNormalization()(x)

    for units, regularisation in zip(hp.layer_units, hp.regularisations):
        x = tfkl.Dense(
            units,
            activation=hp.activation,
            kernel_regularizer=tfk.regularizers.L1L2(*regularisation),
        )(x)

    x = tfkl.Dropout(hp.dropout)(x)

    # Classification problem has 8 output classes
    # so the final layer has 8 neurons
    # with a softmax activation
    outputs = tfkl.Dense(8, activation="softmax")(x)

    model = tfk.Model(inputs, outputs)

    return feature_extractor, model

In [None]:
feature_extractor, model = build(hp)
model.summary()

In [None]:
def fit(
    hp: Hyperparameters,
    train: tf.data.Dataset,
    val: tf.data.Dataset,
    model: tfk.Model | None = None,
) -> tuple[tfk.Model, tfk.callbacks.History]:
    pprint(asdict(hp))

    if model is None:
        _, model = build(hp)
    model.compile(
        optimizer=hp.optimiser(hp.learning_rate),
        loss=LOSS,
        metrics=METRICS,
    )
    history = model.fit(
        train.prefetch(tf.data.AUTOTUNE),
        epochs=hp.epochs,
        validation_data=val.prefetch(tf.data.AUTOTUNE),
        class_weight=hp.class_weight,
        callbacks=[
            tfk.callbacks.EarlyStopping(
                min_delta=hp.es_min_delta,
                patience=hp.es_patience,
                restore_best_weights=True,
                verbose=1,
            ),
            tfk.callbacks.ReduceLROnPlateau(
                factor=hp.lr_decay_factor,
                patience=hp.lr_patience,
                min_delta=hp.lr_min_delta,
                min_lr=hp.min_lr,
                verbose=1,
            ),
        ],
        verbose=KERAS_VERBOSITY,
    )
    return model, history

In [None]:
histories = []

In [None]:
model, history = fit(hp, train_dataset, val_dataset)
histories.append(history)

In [None]:
for layer in feature_extractor.layers[-hp.unfreeze_last:]:
    if not isinstance(layer, tfkl.BatchNormalization):
        layer.trainable = True

hp.epochs = 200
hp.learning_rate = 1e-5
hp.es_patience = 30
hp.lr_patience = 10

model, history = fit(hp, train_dataset, val_dataset, model=model)
histories.append(history)

## Save results

In [None]:
model.save(OUTPUT_DIR / "model.keras")

In [None]:
with (OUTPUT_DIR / "histories.pkl").open("wb") as f:
    pickle.dump(histories, f)

In [None]:
np.save(
    OUTPUT_DIR / "train_predictions",
    model.predict(train_dataset, verbose=KERAS_VERBOSITY),
)
np.save(
    OUTPUT_DIR / "val_predictions",
    model.predict(val_dataset, verbose=KERAS_VERBOSITY),
)