Import the packages and set the directory.

In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from kerastuner.tuners import RandomSearch
import os
# Define data paths
train_dir = 'train'
val_dir = 'valid'
test_dir = 'test'



Using TensorFlow backend


  from kerastuner.tuners import RandomSearch


Set training, validilating and testing data. Each image has 112 x 112 resolution.  
The dataset can be found at: https://www.kaggle.com/datasets/gpiosenka/100-bird-species

In [5]:
# Define the ImageDataGenerator for training and validation
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)  

# Define the training, validation, and test generators
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(112, 112),
    batch_size=32,
    class_mode='categorical'
)

valid_generator = valid_datagen.flow_from_directory(
    val_dir,  # Corrected to val_dir
    target_size=(112, 112),
    batch_size=32,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(  # Added test generator
    test_dir,
    target_size=(112, 112),
    batch_size=32,
    class_mode='categorical'
)

Found 84635 images belonging to 525 classes.
Found 2625 images belonging to 525 classes.
Found 2625 images belonging to 525 classes.


Define CNNs by the implemention of VGG16, ResNet50 and MobileNetV2.
Reduce learning is set.
Each architecture has the following parameters:
+ the pre-trained base, a flattening layer, a dense layer with 512 units
+ batch normalization, ReLU activation, a dropout layer for regularization
+ the final classification layer with 525 units (representing the bird species) 
+ softmax activation for probability distribution. 
+ Adam optimizer
+ a learning rate of  10^{-3}
+ categorical cross-entropy loss function. 

In [17]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint, TensorBoard

# Reduce learning rate when a metric has stopped improving.
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-3)

# Stop training when a monitored quantity has stopped improving.
early_stopping = EarlyStopping(monitor='val_accuracy', patience=3, restore_best_weights=True)

def build_vgg_model():
    base_model = tf.keras.applications.VGG16(weights='imagenet', input_shape=(112, 112, 3), include_top=False)
    base_model.trainable = False

    model = models.Sequential([
        base_model,
        layers.Flatten(),
        layers.Dense(512, kernel_regularizer=l2(1e-4)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(0.2),
        layers.Dense(525),
        layers.BatchNormalization(),
        layers.Activation('softmax')
    ])

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

def build_resnet_model():
    base_model = tf.keras.applications.ResNet50(weights='imagenet', input_shape=(112, 112, 3), include_top=False)
    base_model.trainable = False

    model = models.Sequential([
        base_model,
        layers.Flatten(),
        layers.Dense(512, kernel_regularizer=l2(1e-4)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(0.2),
        layers.Dense(525),
        layers.BatchNormalization(),
        layers.Activation('softmax')
    ])

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

def build_mobilenet_model():
    base_model = tf.keras.applications.MobileNetV2(weights='imagenet', input_shape=(112, 112, 3), include_top=False)
    base_model.trainable = False

    model = models.Sequential([
        base_model,
        layers.Flatten(),
        layers.Dense(512, kernel_regularizer=l2(1e-4)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(0.2),
        layers.Dense(525),
        layers.BatchNormalization(),
        layers.Activation('softmax')
    ])

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model


Train each model with epoch 10. 
+ early stop is set to reduc computional burden
+ Tensorboard is set to monitor loss and accuracy.

In [19]:
# Train VGG model
vgg_model = build_vgg_model()
vgg_checkpoint = ModelCheckpoint(filepath='best_model_vgg_n.h5', save_best_only=True, monitor='val_loss', mode='min', verbose=1)
vgg_tensorboard = TensorBoard(log_dir=os.path.join(os.getcwd(), 'logs/vgg_n'))
vgg_history = vgg_model.fit(train_generator, validation_data=valid_generator, epochs=10, callbacks=[early_stopping, vgg_checkpoint, vgg_tensorboard, reduce_lr])

Epoch 1/10
Epoch 1: val_loss improved from inf to 3.32571, saving model to best_model_vgg_n.h5
Epoch 2/10


  saving_api.save_model(


Epoch 2: val_loss improved from 3.32571 to 2.98703, saving model to best_model_vgg_n.h5
Epoch 3/10
Epoch 3: val_loss improved from 2.98703 to 2.79049, saving model to best_model_vgg_n.h5
Epoch 4/10
Epoch 4: val_loss improved from 2.79049 to 2.68553, saving model to best_model_vgg_n.h5
Epoch 5/10
Epoch 5: val_loss did not improve from 2.68553
Epoch 6/10
Epoch 6: val_loss improved from 2.68553 to 2.58338, saving model to best_model_vgg_n.h5
Epoch 7/10
Epoch 7: val_loss improved from 2.58338 to 2.56924, saving model to best_model_vgg_n.h5
Epoch 8/10
Epoch 8: val_loss improved from 2.56924 to 2.54874, saving model to best_model_vgg_n.h5
Epoch 9/10
Epoch 9: val_loss improved from 2.54874 to 2.50257, saving model to best_model_vgg_n.h5
Epoch 10/10
Epoch 10: val_loss improved from 2.50257 to 2.49352, saving model to best_model_vgg_n.h5


In [20]:
# Train ResNet model
resnet_model = build_resnet_model()
resnet_checkpoint = ModelCheckpoint(filepath='best_model_resnet_n.h5', save_best_only=True, monitor='val_loss', mode='min', verbose=1)
resnet_tensorboard = TensorBoard(log_dir=os.path.join(os.getcwd(), 'logs/resnet_n'))
resnet_history = resnet_model.fit(train_generator, validation_data=valid_generator, epochs=10, callbacks=[early_stopping, resnet_checkpoint, resnet_tensorboard, reduce_lr])

Epoch 1/10
Epoch 1: val_loss improved from inf to 6.44562, saving model to best_model_resnet_n.h5
Epoch 2/10
Epoch 2: val_loss improved from 6.44562 to 5.91726, saving model to best_model_resnet_n.h5
Epoch 3/10
Epoch 3: val_loss did not improve from 5.91726
Epoch 4/10
Epoch 4: val_loss improved from 5.91726 to 5.71571, saving model to best_model_resnet_n.h5
Epoch 5/10
Epoch 5: val_loss improved from 5.71571 to 5.60896, saving model to best_model_resnet_n.h5
Epoch 6/10
Epoch 6: val_loss improved from 5.60896 to 5.54454, saving model to best_model_resnet_n.h5
Epoch 7/10
Epoch 7: val_loss did not improve from 5.54454
Epoch 8/10
Epoch 8: val_loss did not improve from 5.54454
Epoch 9/10
Epoch 9: val_loss did not improve from 5.54454
Epoch 10/10
Epoch 10: val_loss did not improve from 5.54454


In [21]:
# Train MobileNet model
mobilenet_model = build_mobilenet_model()
mobilenet_checkpoint = ModelCheckpoint(filepath='best_model_mobilenet_n.h5', save_best_only=True, monitor='val_loss', mode='min', verbose=1)
mobilenet_tensorboard = TensorBoard(log_dir=os.path.join(os.getcwd(), 'logs/mobilenet_n'))
mobilenet_history = mobilenet_model.fit(train_generator, validation_data=valid_generator, epochs=10, callbacks=[early_stopping, mobilenet_checkpoint, mobilenet_tensorboard, reduce_lr])

Epoch 1/10
Epoch 1: val_loss improved from inf to 2.55653, saving model to best_model_mobilenet_n.h5
Epoch 2/10
Epoch 2: val_loss improved from 2.55653 to 2.39618, saving model to best_model_mobilenet_n.h5
Epoch 3/10
Epoch 3: val_loss improved from 2.39618 to 2.37381, saving model to best_model_mobilenet_n.h5
Epoch 4/10
Epoch 4: val_loss improved from 2.37381 to 2.31300, saving model to best_model_mobilenet_n.h5
Epoch 5/10
Epoch 5: val_loss improved from 2.31300 to 2.28259, saving model to best_model_mobilenet_n.h5
Epoch 6/10
Epoch 6: val_loss did not improve from 2.28259
Epoch 7/10
Epoch 7: val_loss improved from 2.28259 to 2.24897, saving model to best_model_mobilenet_n.h5
Epoch 8/10
Epoch 8: val_loss did not improve from 2.24897
Epoch 9/10
Epoch 9: val_loss did not improve from 2.24897
Epoch 10/10
Epoch 10: val_loss did not improve from 2.24897


Open Tensorboard to monitor loss and accuracy during the training

In [23]:
%load_ext tensorboard
%tensorboard --logdir logs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 9300), started 16:30:38 ago. (Use '!kill 9300' to kill it.)

The trained models for each architectures are loaded and utilised to predict bird species of the test data to obtain the accuracies.

In [22]:
from tensorflow.keras.models import load_model
import os

# Define the architecture names
architectures = ['vgg', 'resnet', 'mobilenet']

# Load the best models for each architecture
models = {}
for arch in architectures:
    model_path = f'best_model_{arch}_n.h5'
    if os.path.exists(model_path):  # Check if the model file exists
        models[arch] = load_model(model_path)
    else:
        print(f"Model file for {arch} not found!")

# Evaluate each model on the test set
accuracies = {}
for arch, model in models.items():
    loss, acc = model.evaluate(test_generator, verbose=0)
    accuracies[arch] = acc

accuracies

{'vgg': 0.5580952167510986,
 'resnet': 0.0476190485060215,
 'mobilenet': 0.6666666865348816}