## Set up

In [None]:
from enum import Enum
from dataclasses import dataclass

import numpy as np
import tensorflow as tf
import keras as tfk
import numpy as np
from sklearn.model_selection import train_test_split
from keras import layers as tfkl

from pathlib import Path

In [None]:
print(f"Keras version is {tfk.__version__}")

In [None]:
LOCAL_DATA_PATH = Path().absolute().parent / "data" / "training_set.npz"
DATA_PATH = LOCAL_DATA_PATH
OUTPUT_DIR = Path().absolute().parent / "submission"
# If the data is not there, then we're running in Kaggle
if not LOCAL_DATA_PATH.exists():
    print("Running on Kaggle")
    DATA_PATH = Path("/kaggle/input/an2dl-homework-1/training_set.npz")
    OUTPUT_DIR = Path()

print(DATA_PATH)
print(OUTPUT_DIR)

In [None]:
SEED = 42
BATCH_SIZE = 32
METRICS = ["accuracy", "recall", "f1_score"]

In [None]:
tfk.utils.set_random_seed(SEED)

In [None]:
@dataclass
class Hyperparameters:
    test_size = 1500
    # Augmentation
    augmentation_fill = "wrap"
    translation_factor = (-.15, .15)
    rotation_factor = 1.0
    # Model
    layer_units = (256, 64)
    activation = "silu"
    # Training
    optimiser = tfk.optimizers.AdamW
    learning_rate = 1e-4
    loss = "categorical_crossentropy"
    epochs = 500
    ## Early stopping parameters
    es_patience = 20
    es_min_delta = 1e-5
    ## Learning rate schedule
    lr_patience = 10
    lr_decay_factor = 0.1
    min_lr = 1e-7

hp = Hyperparameters()

In [None]:
class EmbeddingWidth(Enum):
    EfficientNetV2B3 = 1536

### Load

In [None]:
with np.load(DATA_PATH) as data:
    images = data["images"]
    labels = tfk.utils.to_categorical(data["labels"])

print(images.shape)
print(labels.shape)

#### Remove polluted images

In [None]:
image_means: np.ndarray = images.mean(axis=(1, 2, 3))

In [None]:
TOLERANCE = 1e-4
MIN_DUPLICATES = 5

mean_indices = {}
for mean in image_means:
    indices = (np.abs(mean - image_means) <= TOLERANCE).nonzero()[0]
    if len(indices) > MIN_DUPLICATES:
        mean_indices[mean] = indices

len(mean_indices.keys())

In [None]:
duplicated_indices = []
for indices in mean_indices.values():
    duplicated_indices.extend(indices)
duplicated_indices = np.array(duplicated_indices)
duplicated_indices.shape

In [None]:
original_indices = np.setdiff1d(
    list(range(images.shape[0])),
    duplicated_indices
)
original_indices.shape

In [None]:
clean_images = images[original_indices]
clean_labels = labels[original_indices]

print(clean_images.shape)
print(clean_labels.shape)

#### Separate data for validation

In [None]:
X_train, X_val, y_train, y_val = train_test_split(
    clean_images, clean_labels,
    test_size=hp.test_size,
    random_state=SEED,
    stratify=clean_labels,
)

print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(BATCH_SIZE)
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(BATCH_SIZE)

## Model

In [None]:
def build(hp: Hyperparameters) -> tfk.Model:
    inputs = tfkl.Input((96, 96, 3))
    x = inputs

    # Augmentation
    x = tfkl.RandomTranslation(
        hp.translation_factor,  # Height
        hp.translation_factor,  # Width
        fill_mode=hp.augmentation_fill,
        seed=SEED,
    )(x)
    x = tfkl.RandomRotation(
        hp.rotation_factor,
        fill_mode=hp.augmentation_fill,
        seed=SEED,
    )(x)

    # ImageNet images are 224x224 so we need to resize to use the
    # pre-trained backbone feature extractor
    x = tfkl.Resizing(224, 224)(x)

    # TODO Move feature extractor to hyperparameters
    # TODO Investigate changing `input_shape` to actual image shape
    # instead of resizing
    feature_extractor = tfk.applications.MobileNetV3Large(
        input_shape=(224, 224, 3),
        include_top=False,
        weights="imagenet",
        pooling="max",
    )
    feature_extractor.trainable = False
    x = feature_extractor(x)

    for units in Hyperparameters.layer_units:
        x = tfkl.Dense(
            units,
            activation=hp.activation,
        )(x)

    # Classification problem has 8 output classes
    # so the final layer has 8 neurons
    # with a softmax activation
    outputs = tfkl.Dense(8, activation="softmax")(x)

    return tfk.Model(inputs, outputs)

In [None]:
model = build(hp)
model.summary()

In [None]:
def fit(hp: Hyperparameters, train: tf.data.Dataset, val: tf.data.Dataset) -> tuple[tfk.Model, tfk.callbacks.History]:
    model = build(hp)
    model.compile(
        optimizer=hp.optimiser(hp.learning_rate),
        loss=hp.loss,
        metrics=METRICS,
    )
    history = model.fit(
        train.prefetch(tf.data.AUTOTUNE),
        batch_size=BATCH_SIZE,
        epochs=hp.epochs,
        validation_data=val.prefetch(tf.data.AUTOTUNE),
        callbacks=[
            tfk.callbacks.EarlyStopping(
                min_delta=hp.es_min_delta,
                patience=hp.es_patience,
                restore_best_weights=True,
                verbose=1,
            ),
            tfk.callbacks.ReduceLROnPlateau(
                factor=hp.lr_decay_factor,
                patience=hp.lr_patience,
                min_lr=hp.min_lr,
                verbose=1,
            )
        ],
        verbose=2,
    )
    return model, history

In [None]:
model, history = fit(hp, train_dataset, val_dataset)

In [None]:
model.save("model.keras")