### MobileNet model transfer learning: 86 layers + 12 additional layers (fine-tuned)

In [4]:
import os
import cv2
import csv
import numpy as np
import pandas as pd
import random
import gc
import tensorflow as tf
import keras_tuner as kt
from tensorflow import keras
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.mixed_precision import set_global_policy
from sklearn.model_selection import train_test_split

In [5]:
# Set the global policy to mixed_float16
set_global_policy('mixed_float16')

### Preprocessing images

In [6]:
# Ensure the script uses the GPU if available and set memory growth
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        # Memory growth must be set at program startup
        print(e)

In [6]:
# Load your preprocessed data
X_train = np.load('X_train-s.npy')
X_val = np.load('X_val-s.npy')
y_train = np.load('y_train-s.npy')
y_val = np.load('y_val-s.npy')

### Model training

- Include Dropout Layers: These layers will help prevent overfitting.
- Add Additional Convolutional Layers: This will allow the model to learn more complex features.
- Enable Fine-Tuning: Allow for the fine-tuning of more layers during the training process.
- Add Regularization to Dense Layers: This will help in avoiding overfitting.

In [7]:
def build_mobilenet_model(hp):
    # Hyperparameters
    freeze_layers = hp.Int('freeze_layers', min_value=0, max_value=20, step=5)
    dense_units = hp.Int('dense_units', min_value=32, max_value=128, step=16)
    dropout_rate = hp.Float('dropout_rate', min_value=0.2, max_value=0.5, step=0.1)
    learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    l2_reg = hp.Float('l2_reg', min_value=1e-5, max_value=1e-2, step=1e-5)

    # MobileNet base model
    image_input = Input(shape=(224, 224, 3))
    base_model = MobileNet(weights='imagenet', include_top=False, input_tensor=image_input)
    print("Initial number of layers in the base model:", len(base_model.layers))

    # Freeze the initial layers for finetuning
    for layer in base_model.layers[:freeze_layers]:
        layer.trainable = False
        
    # Custom layers on top of MobileNet
    x = base_model.output

    # Additional Convolutional Layers
    x = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(dropout_rate)(x)

    x = Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(dropout_rate)(x)

    # Flatten the output
    x = Flatten()(x)

    # Fully connected layers
    x = Dense(dense_units, activation='relu', kernel_regularizer=l2(l2_reg))(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(26, activation='softmax')(x)  # 26 classes for A-Z

    model = Model(inputs=image_input, outputs=x)
    print("Total number of layers in the model:", len(model.layers))

    # Compile the model
    model.compile(optimizer=keras.optimizers.Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    return model

In [9]:
# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='min')

# Set up the tuner for hyperparameter tuning using Hyperband
tuner = kt.Hyperband(
    build_mobilenet_model,
    objective='val_accuracy',
    max_epochs=10,
    factor=3,
    hyperband_iterations=2,  # Number of times to iterate over the full Hyperband algorithm
    directory='mobilenet-model-tuning',
    project_name='mobilenet-tuning'
)

Reloading Tuner from mobilenet-model-tuning/mobilenet-tuning/tuner0.json


In [None]:
# Search for the best hyperparameters
tuner.search(X_train, y_train, epochs=10, validation_data=(X_val, y_val), callbacks=[early_stopping])

In [10]:
# Get the best hyperparameters
best_hp = tuner.get_best_hyperparameters()[0]

# Print each hyperparameter and its corresponding best value
for hp in best_hp.space:
    print(f"{hp.name}: {best_hp.get(hp.name)}")

freeze_layers: 15
dense_units: 64
dropout_rate: 0.30000000000000004
learning_rate: 0.0001
l2_reg: 0.00595


In [11]:
# Retrieve all completed trials
trials = [t for t in tuner.oracle.trials.values() if t.status == 'COMPLETED']

# Prepare data for CSV
data_to_save = [["Trial Number", "Hyperparameters", "Validation Accuracy"]]

# Add data from each trial
for i, trial in enumerate(trials):
    trial_hyperparams = trial.hyperparameters.values
    val_accuracy = trial.score  
    data_to_save.append([f"Trial {i+1}", trial_hyperparams, val_accuracy])

# Write to CSV
with open('hyperparameter_trials.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(data_to_save)


In [12]:
# Define callbacks
def scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * tf.math.exp(-0.1)

early_stopping = EarlyStopping(monitor='val_loss', patience=5)
model_checkpoint = ModelCheckpoint('mobilenet-model.h5', save_best_only=True, monitor='val_loss', mode='min')

# Combine all callbacks
callbacks_list = [
    LearningRateScheduler(scheduler),
    early_stopping,
    model_checkpoint
]

# Train model with best hyperparameters
model = build_mobilenet_model(best_hp)
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val), callbacks=callbacks_list, verbose=1) # Adjust epochs based on HP << !!!!



2023-11-14 18:44:46.096834: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-11-14 18:44:46.098791: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-11-14 18:44:46.100690: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

Initial number of layers in the base model: 86
Total number of layers in the model: 98
Epoch 1/10


2023-11-14 18:45:11.931853: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8900


  2/487 [..............................] - ETA: 26s - loss: 5.9995 - accuracy: 0.0312    

2023-11-14 18:45:15.435156: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f4140004b60 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-11-14 18:45:15.435193: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2023-11-14 18:45:15.435200: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
2023-11-14 18:45:15.435205: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (2): Tesla T4, Compute Capability 7.5
2023-11-14 18:45:15.435211: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (3): Tesla T4, Compute Capability 7.5
2023-11-14 18:45:15.442135: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-11-14 18:45:15.574885: I ./tensorflow/compiler/jit/device



  saving_api.save_model(


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Model output

In [13]:
metrics_df = pd.DataFrame({
    'Epoch': range(1, len(history.history['loss']) + 1),
    'Loss': history.history['loss'],
    'Accuracy': history.history['accuracy'],
    'Val_Loss': history.history['val_loss'],
    'Val_Accuracy': history.history['val_accuracy'],
    'Learning_Rate': history.history['lr']
})

# Save the metrics to a CSV file
metrics_df.to_csv('mobilenet-metrics.csv', index=False)

# Save full model 
model.save('mobilenet-fullmodel-full.h5')
