

# AN2DL Homework 1 - Final Model Notebook

In [None]:
COLAB = False

if COLAB:
  from google.colab import drive
  !pip install keras_cv -qq
  drive.mount('/gdrive')
  %cd /gdrive/My Drive/ANN_new


In [2]:
import numpy as np
import tensorflow as tf
import keras_cv as kcv
from tensorflow.keras.applications import EfficientNetB2
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, GlobalAveragePooling2D, Dropout, BatchNormalization, Resizing, Rescaling, LeakyReLU, ELU
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.utils.class_weight import compute_class_weight

SEED = 42

# Choose a name for the model
model_string = 'finalModel'



2024-11-24 23:17:45.694652: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732486665.778315   90305 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732486665.803126   90305 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-24 23:17:46.075591: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Load dataset
data = np.load('data/training_set_clean.npz')
X = data['images']
y = data['labels']

print(f'X shape: {X.shape}')
print(f'y shape: {y.shape}')

X shape: (11951, 96, 96, 3)
y shape: (11951,)


## Augmentations

In [4]:
# -------------------- #
# Keras augmentations
# -------------------- #

augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip('horizontal'),
    tf.keras.layers.RandomRotation(0.7),
    tf.keras.layers.RandomBrightness(0.2),
    tf.keras.layers.RandomTranslation(height_factor=0.15, width_factor=0.15),
    tf.keras.layers.RandomZoom(0.3)
])


# -------------------- #
# KerasCV
# -------------------- #

rand_augment = kcv.layers.RandAugment(
    value_range=(0, 255),
    augmentations_per_image=4,
    magnitude=0.6,
    magnitude_stddev=0.2,
    rate=0.8
)

random_cutout = kcv.layers.RandomCutout(
    height_factor=0.2,
    width_factor=0.2,
    fill_mode="constant",
    fill_value=0.0,
    seed=2378
)

def augment(images, labels, batch_index):
    # It is possbile to have different augments in different batches,
    # use an if statement based on 'batch_index'

    # Ensure images are float32
    images = tf.cast(images, tf.float32)

    images = rand_augment(images)
    images = random_cutout(images)

    return images, labels




2024-11-24 23:18:17.581093: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


![chosen_augs](./img/ourgmentations.png)

## Transfer Learning

In [5]:
# Normalize and preprocess images
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = tf.convert_to_tensor(X_train, dtype=tf.float32)
X_test = tf.convert_to_tensor(X_test, dtype=tf.float32)

# One-hot encode labels
y_train = tf.keras.utils.to_categorical(y_train, num_classes=8).astype(np.float32)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=8).astype(np.float32)

# autotune
AUTOTUNE = tf.data.AUTOTUNE

2024-11-24 23:18:31.648557: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1057259520 exceeds 10% of free system memory.


In [6]:

# ---------------- #
# Preparation
# ---------------- #

def prepare_dataset(images, labels, is_training=True, batch_size=32):

    # Create the base dataset
    dataset = tf.data.Dataset.from_tensor_slices((images, labels))

    if is_training:
        dataset = dataset.shuffle(buffer_size=1024)

    # Apply EfficientNet preprocessing
    def preprocess(images, labels):
        images = preprocess_input(images)
        return images, labels

    dataset = dataset.map(preprocess, num_parallel_calls=AUTOTUNE)

    # Batch before augmentation
    dataset = dataset.batch(batch_size)

    if is_training:

        # It is possbile to have different augments in different batches
        def augment_with_index(batch_index, data):
            images, labels = data
            return augment(images, labels, batch_index)

        dataset = dataset.enumerate().map(
            augment_with_index, num_parallel_calls=AUTOTUNE
        )

    return dataset.prefetch(buffer_size=AUTOTUNE)


# Prepare datasets
train_dataset = prepare_dataset(X_train, y_train, is_training=True, batch_size=32)
val_dataset = prepare_dataset(X_test, y_test, is_training=False, batch_size=32)



# ---------------- #
# Build model
# ---------------- #

# Create the model
def create_model(input_shape=(96, 96, 3), num_classes=8, augmentation=None):
    input_layer = Input(shape=input_shape)

    # Resizing layer for prediction to resize images to 224x224
    x = Resizing(260, 260)(input_layer)

    # Base model
    base_model = EfficientNetB2(weights='imagenet', include_top=False)
    base_model.trainable = False

    # Model architecture
    # with Activation Function LeakyReLU
    x = augmentation(x)
    x = base_model(x, training=False)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.2)(x)
    x = BatchNormalization()(x)
    x = Dense(512, activation=None)(x)
    x = LeakyReLU(negative_slope=0.05)(x)
    x = Dropout(0.1)(x)
    x = BatchNormalization()(x)
    x = Dense(256, activation=None)(x)
    x = LeakyReLU(negative_slope=0.05)(x)
    x = Dropout(0.1)(x)
    x = BatchNormalization()(x)
    x = Dense(128, activation=None)(x)
    x = LeakyReLU(negative_slope=0.05)(x)
    output_layer = Dense(num_classes, activation='softmax')(x)

    return Model(inputs=input_layer, outputs=output_layer)


# ---------------- #
# Create & Compile
# ---------------- #

model = create_model(augmentation=augmentation)
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=1000,
    decay_rate=0.95
)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)


# ---------------- #
# Other settings
# ---------------- #

# Callbacks
callbacks = [
    EarlyStopping(
        monitor='val_accuracy',
        patience=15,
        restore_best_weights=True
    ),
    ModelCheckpoint(
        'models/' + model_string + '.keras',
        monitor='val_accuracy',
        save_best_only=True,
        mode='max'
    )
]

# Compute class weights
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(np.argmax(y_train, axis=1)),
    y=np.argmax(y_train, axis=1)
)
class_weights = dict(enumerate(class_weights))

model.summary()


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb2_notop.h5
[1m31790344/31790344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 0us/step


In [7]:
# ---------------- #
# Train model
# ---------------- #

history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=100,
    callbacks=callbacks,
    class_weight=class_weights
)

Epoch 1/100
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 136ms/step - accuracy: 0.3673 - loss: 1.7950 - val_accuracy: 0.6642 - val_loss: 0.8978
Epoch 2/100
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 128ms/step - accuracy: 0.5165 - loss: 1.3558 - val_accuracy: 0.7900 - val_loss: 0.6592
Epoch 3/100
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 129ms/step - accuracy: 0.5408 - loss: 1.2956 - val_accuracy: 0.8114 - val_loss: 0.5425
Epoch 4/100
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 130ms/step - accuracy: 0.5403 - loss: 1.2872 - val_accuracy: 0.8210 - val_loss: 0.5359
Epoch 5/100
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 126ms/step - accuracy: 0.5589 - loss: 1.2259 - val_accuracy: 0.7465 - val_loss: 0.7003
Epoch 6/100
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 126ms/step - accuracy: 0.5758 - loss: 1.1992 - val_accuracy: 0.8009 - val_loss: 0.5939
Epoc

## Fine-tuning

In [8]:
# Reload model
model = tf.keras.models.load_model('models/' + model_string + '.keras')


# ---------------- #
# Unfreeze
# ---------------- #

N = 78 # Number of layers to freeze

for i, layer in enumerate(model.get_layer('efficientnetb2').layers):
    layer.trainable = True

for i, layer in enumerate(model.get_layer('efficientnetb2').layers):
    layer.trainable = False


for i, layer in enumerate(model.get_layer('efficientnetb2').layers):
    if isinstance(layer, tf.keras.layers.Conv2D) or isinstance(layer, tf.keras.layers.DepthwiseConv2D):
        layer.trainable = True


# Set the first N layers as non-trainable
for i, layer in enumerate(model.get_layer('efficientnetb2').layers[:N]):
    layer.trainable = False

# Print layer indices, names, and trainability status
for i, layer in enumerate(model.get_layer('efficientnetb2').layers):
    print(f"Layer {i}: {layer.name}, Type: {type(layer).__name__}, Trainable: {layer.trainable}")



# -------------------- #
# fine-tune settings
# -------------------- #

# Use a lower learning rate for fine-tuning
fine_tune_lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.00001,  # Small learning rate for fine-tuning
    decay_steps=1000,
    decay_rate=0.95
)
fine_tune_optimizer = tf.keras.optimizers.Lion(
    learning_rate=fine_tune_lr_schedule
)
# fine_tune_optimizer = tf.keras.optimizers.Adam(learning_rate=fine_tune_lr_schedule)
model.compile(optimizer=fine_tune_optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Additional callbacks
fine_tune_early_stopping = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
fine_tune_checkpoint = ModelCheckpoint('models/' + model_string + '_ft.keras', monitor='val_accuracy', save_best_only=True, mode='max')



Layer 0: input_layer_1, Type: InputLayer, Trainable: False
Layer 1: rescaling, Type: Rescaling, Trainable: False
Layer 2: normalization, Type: Normalization, Trainable: False
Layer 3: rescaling_1, Type: Rescaling, Trainable: False
Layer 4: stem_conv_pad, Type: ZeroPadding2D, Trainable: False
Layer 5: stem_conv, Type: Conv2D, Trainable: False
Layer 6: stem_bn, Type: BatchNormalization, Trainable: False
Layer 7: stem_activation, Type: Activation, Trainable: False
Layer 8: block1a_dwconv, Type: DepthwiseConv2D, Trainable: False
Layer 9: block1a_bn, Type: BatchNormalization, Trainable: False
Layer 10: block1a_activation, Type: Activation, Trainable: False
Layer 11: block1a_se_squeeze, Type: GlobalAveragePooling2D, Trainable: False
Layer 12: block1a_se_reshape, Type: Reshape, Trainable: False
Layer 13: block1a_se_reduce, Type: Conv2D, Trainable: False
Layer 14: block1a_se_expand, Type: Conv2D, Trainable: False
Layer 15: block1a_se_excite, Type: Multiply, Trainable: False
Layer 16: block1a_p

In [9]:
# -------------------- #
# Fine-tune
# -------------------- #

fine_tune_history = model.fit(
    train_dataset,
    batch_size=16, # Smaller batch size for fine-tuning
    validation_data=val_dataset,
    epochs=30,
    callbacks=[fine_tune_early_stopping, fine_tune_checkpoint],
    class_weight=class_weights
).history

Epoch 1/30
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 195ms/step - accuracy: 0.6917 - loss: 0.8666 - val_accuracy: 0.9448 - val_loss: 0.1567
Epoch 2/30
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 182ms/step - accuracy: 0.7895 - loss: 0.6421 - val_accuracy: 0.9636 - val_loss: 0.1069
Epoch 3/30
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 182ms/step - accuracy: 0.8092 - loss: 0.5492 - val_accuracy: 0.9757 - val_loss: 0.0730
Epoch 4/30
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 179ms/step - accuracy: 0.8247 - loss: 0.5000 - val_accuracy: 0.9749 - val_loss: 0.0836
Epoch 5/30
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 182ms/step - accuracy: 0.8411 - loss: 0.4501 - val_accuracy: 0.9812 - val_loss: 0.0607
Epoch 6/30
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 179ms/step - accuracy: 0.8486 - loss: 0.4430 - val_accuracy: 0.9766 - val_loss: 0.0756
Epoch 7/30