In [1]:
import math
import numpy as np
from keras.layers import (
    Dense,
    Activation,
    Dropout,
    Flatten,
    AveragePooling2D,
)
from keras_preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.optimizers import Adam
from keras.models import Model
from keras.callbacks import LearningRateScheduler
from tensorflow.keras.applications import InceptionV3
from keras import backend as K
from keras.utils.generic_utils import get_custom_objects
from google.colab import drive
drive.mount("/content/drive/")

orig_path = '/content/drive/My Drive/bird_classification/' + '/data/bird_dataset/'

Mounted at /content/drive/


In [2]:
# Seed value (can actually be different for each attribution step)
seed_value= 0

# 1. Set `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 3. Set `numpy` pseudo-random generator at a fixed value
np.random.seed(seed_value)

# 4. Set `tensorflow` pseudo-random generator at a fixed value
import tensorflow as tf
tf.random.set_seed(seed_value) # tensorflow 2.x
# tf.set_random_seed(seed_value) # tensorflow 1.x

# Original Images

## Loading Data

In [3]:
img_rows, img_cols = (334,334)
train_batchsize = 16
val_batchsize = 16

train_datagen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=30,
      width_shift_range=0.3,
      height_shift_range=0.3,
      brightness_range=[0.2, 1.2],
      horizontal_flip=True)

validation_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        orig_path + 'train_images/',
        target_size=(img_rows, img_cols),
        batch_size=train_batchsize,
        class_mode='categorical',
        interpolation='bicubic')
 
validation_generator = validation_datagen.flow_from_directory(
        orig_path + 'val_images/',
        target_size=(img_rows, img_cols),
        batch_size=val_batchsize,
        class_mode='categorical',
        shuffle=False,
        interpolation='bicubic')

Found 1082 images belonging to 20 classes.
Found 103 images belonging to 20 classes.


## Utils

In [4]:
# Swish Activation Function
def swish(x):
    return K.sigmoid(x) * x

get_custom_objects().update({"swish": Activation(swish)})


# Learning Step Decay by 10e-1 after every 4 epochs
def step_decay(epoch):
    initial_lrate = 0.001
    drop = 0.1
    epochs_drop = 4.0
    lrate = initial_lrate * math.pow(drop, math.floor((epoch) / epochs_drop))
    return lrate

# Calculates Precision Accuracy
def precision(y_true, y_pred):
    """Precision metric.
    Computes the precision, a metric for multi-label classification of
    how many selected items are relevant.
    """
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision


# Calculates Recall Accuracy
def recall(y_true, y_pred):
    """Recall metric.
    Computes the recall, a metric for multi-label classification of
    how many relevant items are selected.
    """
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall


# Calculates F1 score
def f1(y_true, y_pred):
    def precision(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision

    def recall(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2 * ((precision * recall) / (precision + recall + K.epsilon()))

## Phase 1 : Trained only on top layer / Inception No Trainable

In [None]:
base_model = InceptionV3(
        include_top=False,
        weights='imagenet',
        input_tensor=None,
        input_shape=(334,334,3)
    )

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
# Freeze InceptionResNetV2
for layer in base_model.layers:
  layer.trainable = False

# Add final layers
x = base_model.output
x = AveragePooling2D((8, 8), strides=(8, 8), name="avg_pool")(x)
x = Flatten(name="flatten")(x)
x = Dense(
          512,
          activation="swish",
          name="dense_1",
          kernel_initializer="he_uniform")(x)
x = Dropout(0.3)(x)
predictions = Dense(
    20,
    activation="softmax",
    name="predictions",
    kernel_initializer="he_uniform")(x)
model = Model(inputs=base_model.input, outputs=predictions)

# Compile Model
adam = Adam(0.0001)
model.compile(loss="categorical_crossentropy",
              optimizer=adam,
             metrics=[precision, recall, f1, 'acc'])

In [None]:
lrate = LearningRateScheduler(step_decay)
checkpoint = ModelCheckpoint("/content/drive/My Drive/bird_classification/models/inception_v3_freezed.h5",
                             monitor="val_acc",
                             mode="max",
                             save_best_only = True,
                             verbose=1)

nb_train_samples = 1082
nb_validation_samples= 103
epochs=12
batch_size=16

history = model.fit_generator(train_generator,
                                 steps_per_epoch=nb_train_samples // batch_size,
                                 epochs=epochs,
                                 callbacks=[lrate, checkpoint],
                                 validation_data=validation_generator,
                                 validation_steps=nb_validation_samples // batch_size)

In [None]:
score = model.evaluate(validation_generator, verbose=1)



- Loss : 0.4786
- Precision : 0.8730
- Recall : 0.7857
- f1_score : 0.8233
- accuracy : 0.8155

## Phase 2 : Retrains last layer of inception

In [None]:
len(base_model.layers)

311

In [5]:
base_model = InceptionV3(
        include_top=False,
        weights='imagenet',
        input_tensor=None,
        input_shape=(334,334,3)
    )
for layer in base_model.layers[:len(base_model.layers)-22]:
  layer.trainable = False
for layer in base_model.layers[len(base_model.layers)-22:]:
  layer.trainable = True

# Add final layers
x = base_model.output
x = AveragePooling2D((8, 8), strides=(8, 8), name="avg_pool")(x)
x = Flatten(name="flatten")(x)
x = Dense(
          512,
          activation="swish",
          #activation='relu',
          name="dense_1",
          kernel_initializer="he_uniform")(x)
x = Dropout(0.25)(x)
predictions = Dense(
    20,
    activation="softmax",
    name="predictions",
    kernel_initializer="he_uniform")(x)
model_retrain = Model(inputs=base_model.input, outputs=predictions)

# Compile Model
adam = Adam(0.0001)
model_retrain.compile(loss="categorical_crossentropy",
              optimizer=adam,
             metrics=[precision, recall, f1, 'acc'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
lrate = LearningRateScheduler(step_decay)
checkpoint = ModelCheckpoint("/content/drive/My Drive/bird_classification/models/inception_v3_retrained.h5",
                             monitor="val_acc",
                             mode="max",
                             save_best_only = True,
                             verbose=1)

nb_train_samples = 1082
nb_validation_samples= 103
epochs=12
batch_size=16

history = model_retrain.fit_generator(train_generator,
                                 steps_per_epoch=nb_train_samples // batch_size,
                                 epochs=epochs,
                                 callbacks=[lrate, checkpoint],
                                 validation_data=validation_generator,
                                 validation_steps=nb_validation_samples // batch_size)

In [None]:
score = model_retrain.evaluate(validation_generator, verbose=1)



- Loss : 0.3717
- Precision : 0.9004
- Recall : 0.8482
- f1_score : 0.8724
- accuracy : 0.8641

Well good news, unfrezing the last layers help us improve the performance of the model. but I think it's overfitted a bit, so for this time we will reduce the number of layer to retrain

### Phase 3 : Retrain 5 less layers

In [8]:
base_model = InceptionV3(
        include_top=False,
        weights='imagenet',
        input_tensor=None,
        input_shape=(334,334,3)
    )
for layer in base_model.layers[:len(base_model.layers)-17]:
  layer.trainable = False
for layer in base_model.layers[len(base_model.layers)-17:]:
  layer.trainable = True

# Add final layers
x = base_model.output
x = AveragePooling2D((8, 8), strides=(8, 8), name="avg_pool")(x)
x = Flatten(name="flatten")(x)
x = Dense(
          512,
          #activation="swish",
          activation='relu',
          name="dense_1",
          kernel_initializer="he_uniform")(x)
x = Dropout(0.25)(x)
predictions = Dense(
    20,
    activation="softmax",
    name="predictions",
    kernel_initializer="he_uniform")(x)
model_retrain_v2 = Model(inputs=base_model.input, outputs=predictions)

# Compile Model
adam = Adam(0.0001)
model_retrain_v2.compile(loss="categorical_crossentropy",
              optimizer=adam,
             metrics=[precision, recall, f1, 'acc'])

In [None]:
lrate = LearningRateScheduler(step_decay)
checkpoint = ModelCheckpoint("/content/drive/My Drive/bird_classification/models/inception_v3_relu_retrained_v2.h5",
                             monitor="val_acc",
                             mode="max",
                             save_best_only = True,
                             verbose=1)

nb_train_samples = 1082
nb_validation_samples= 103
epochs=12
batch_size=16

history = model_retrain_v2.fit_generator(train_generator,
                                 steps_per_epoch=nb_train_samples // batch_size,
                                 epochs=epochs,
                                 callbacks=[lrate, checkpoint],
                                 validation_data=validation_generator,
                                 validation_steps=nb_validation_samples // batch_size)

In [None]:
score = model_retrain_v2.evaluate(train_generator, verbose=1)



In [None]:
score = model_retrain_v2.evaluate(validation_generator, verbose=1)



- Loss : 0.3406
- Precision : 0.9249
- Recall : 0.8393
- f1_score : 0.8777
- accuracy : 0.8738

It's better than before
I just want to add that the model perform well also on the train set (better than the other models)

# Cropped Images

In [None]:
img_rows, img_cols = (224,224)
train_batchsize = 16
val_batchsize = 16

train_datagen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=30,
      width_shift_range=0.3,
      height_shift_range=0.3,
      brightness_range=[0.2, 1.2],
      horizontal_flip=True)

validation_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        orig_path + 'train_images_cropped/',
        target_size=(img_rows, img_cols),
        batch_size=train_batchsize,
        class_mode='categorical',
        interpolation='bicubic')
 
validation_generator = validation_datagen.flow_from_directory(
        orig_path + 'val_images_cropped/',
        target_size=(img_rows, img_cols),
        batch_size=val_batchsize,
        class_mode='categorical',
        shuffle=False,
        interpolation='bicubic')

Found 941 images belonging to 20 classes.
Found 92 images belonging to 20 classes.


In [None]:
base_model = InceptionV3(
        include_top=False,
        weights='imagenet',
        input_tensor=None,
        input_shape=(224,224,3)
    )

# Freeze InceptionResNetV2
for layer in base_model.layers:
  layer.trainable = False

# Add final layers
x = base_model.output
x = AveragePooling2D((4, 4), strides=(4, 4), name="avg_pool")(x)
x = Flatten(name="flatten")(x)
x = Dense(
          512,
          activation="swish",
          name="dense_1",
          kernel_initializer="he_uniform")(x)
x = Dropout(0.25)(x)
predictions = Dense(
    20,
    activation="softmax",
    name="predictions",
    kernel_initializer="he_uniform")(x)
model = Model(inputs=base_model.input, outputs=predictions)

# Compile Model
adam = Adam(0.0001)
model.compile(loss="categorical_crossentropy",
              optimizer=adam,
             metrics=[precision, recall, f1, 'acc'])

lrate = LearningRateScheduler(step_decay)
checkpoint = ModelCheckpoint("/content/drive/My Drive/bird_classification/models/inception_v3_cropped_freezed.h5",
                             monitor="val_acc",
                             mode="max",
                             save_best_only = True,
                             verbose=1)

nb_train_samples = 941
nb_validation_samples= 92
epochs=12
batch_size=16

history = model.fit_generator(train_generator,
                                 steps_per_epoch=nb_train_samples // batch_size,
                                 epochs=epochs,
                                 callbacks=[lrate, checkpoint],
                                 validation_data=validation_generator,
                                 validation_steps=nb_validation_samples // batch_size)

In [None]:
score = model.evaluate(train_generator, verbose=1)



In [None]:
score = model.evaluate(validation_generator, verbose=1)



- Loss : 0.5040
- Precision : 0.8746
- Recall : 0.7674
- f1_score : 0.8113
- accuracy : 0.8370

## Last Layer retrainable

In [None]:
base_model = InceptionV3(
        include_top=False,
        weights='imagenet',
        input_tensor=None,
        input_shape=(224,224,3)
    )
for layer in base_model.layers[:len(base_model.layers)-17]:
  layer.trainable = False
for layer in base_model.layers[len(base_model.layers)-17:]:
  layer.trainable = True

# Add final layers
x = base_model.output
x = AveragePooling2D((4, 4), strides=(4, 4), name="avg_pool")(x)
x = Flatten(name="flatten")(x)
x = Dense(
          512,
          activation="swish",
          name="dense_1",
          kernel_initializer="he_uniform")(x)
x = Dropout(0.25)(x)
predictions = Dense(
    20,
    activation="softmax",
    name="predictions",
    kernel_initializer="he_uniform")(x)
model_retrain = Model(inputs=base_model.input, outputs=predictions)

# Compile Model
adam = Adam(0.0001)
model_retrain.compile(loss="categorical_crossentropy",
              optimizer=adam,
             metrics=[precision, recall, f1, 'acc'])

lrate = LearningRateScheduler(step_decay)
checkpoint = ModelCheckpoint("/content/drive/My Drive/bird_classification/models/inception_v3_cropped_retrained.h5",
                             monitor="val_acc",
                             mode="max",
                             save_best_only = True,
                             verbose=1)

nb_train_samples = 941
nb_validation_samples= 92
epochs=12
batch_size=16

history = model_retrain.fit_generator(train_generator,
                                 steps_per_epoch=nb_train_samples // batch_size,
                                 epochs=epochs,
                                 callbacks=[lrate, checkpoint],
                                 validation_data=validation_generator,
                                 validation_steps=nb_validation_samples // batch_size)

In [None]:
score = model_retrain.evaluate(train_generator, verbose=1)



In [None]:
score = model_retrain.evaluate(validation_generator, verbose=1)



- Loss : 0.3911
- Precision : 0.9194
- Recall : 0.8472
- f1_score : 0.8801
- accuracy : 0.8804