# Step-5 Notebook for training ResNet and Combined/Fusion model with and without Image Augmentation 

In [1]:
# common library imports
import numpy as np
import os
import pandas as pd
from os import listdir, path
import pickle
from typing import Dict, List, Tuple
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, MaxPooling2D, GlobalAveragePooling2D, Dropout, Dense, Activation, add,concatenate
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.initializers import glorot_uniform, Constant
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import (ModelCheckpoint, ReduceLROnPlateau,
                                        EarlyStopping, TensorBoard, Callback)

from tensorflow.keras.models import load_model
from sklearn.utils import class_weight

kernel_init = glorot_uniform()
bias_init = Constant(value=0.1)
regularizer = regularizers.l2(0.0003)
IMAGE_SIZE = (101, 64, 3)


### Define model architecture

In [2]:
def cnn_residual_inspired(num_classes, 
                          input_shape, 
                          dropout_rate, 
                          num_modules):
    
    visible = Input(shape=input_shape)
    layer = conv2d_bn_layer(visible, 64, (7, 7))
    layer = MaxPooling2D((2, 2), strides=(2, 2))(layer)
    layer = residual_module(layer, 64)

    if num_modules >= 2:
        layer = residual_module(layer, 128)

    if num_modules >= 3:
        layer = residual_module(layer, 256)

    if num_modules >= 4:
        layer = residual_module(layer, 512)

    if num_modules >= 5:
        layer = residual_module(layer, 1024)

    if num_modules >= 6:
        layer = residual_module(layer, 2048)

    # output layers
    layer = GlobalAveragePooling2D(name='avg_pool')(layer)
    layer = Dropout(dropout_rate)(layer)
    layer = Dense(num_classes, activation="softmax")(layer)

    model = Model(inputs=visible, outputs=layer)

    return model

In [3]:
def conv2d_bn_layer(layer_in: Model, 
                    num_filters: int, 
                    filter_size: int) -> Model:
    """A standard Convolution -> Activation -> Batch Normalisation layer """
    layer_out = Conv2D(num_filters, (filter_size),
                       padding='same',
                       activation='relu',
                       kernel_initializer=kernel_init,
                       bias_initializer=bias_init,
                       kernel_regularizer=regularizer)(layer_in)

    layer_out = BatchNormalization(axis=3)(layer_out)

    return layer_out

In [4]:
def residual_module(layer_in: Model, 
                    n_filters: int) -> Model:
    """ Creates a residual module
    He, K., X. Zhang, S. Ren, and J. Sun. 2016. 
    Deep residual learning for image recognition. 
    Proceedings of the IEEE Computer Society Conference on 
    Computer Vision and Pattern Recognition. 770–778.
    """ 
    merge_input = layer_in

    # check if the number of filters needs to increase
    if layer_in.shape[-1] != n_filters:
        merge_input = Conv2D(n_filters, (1, 1), padding='same',
                             activation='relu', kernel_initializer=kernel_init,
                             bias_initializer=bias_init)(layer_in)

    # conv1
    conv1 = conv2d_bn_layer(layer_in, n_filters, (3, 3))
    # conv2
    conv2 = conv2d_bn_layer(conv1, n_filters, (3, 3))
    # add filters, assumes filters/channels last
    layer_out = add([conv2, merge_input])
    # activation function
    layer_out = Activation('relu')(layer_out)

    return layer_out



In [5]:
def get_image_augmentation_opt(batch_size: int) -> Dict:
    """ Gets passed to the image augmentation function, configures the
    ranges of various augmentations."""
    return {
        'ROT_RANGE': 37,
        'WIDTH_SHIFT_RANGE': 0.3,
        'HEIGHT_SHIFT_RANGE': 0.3,
        'SHEAR_RANGE': 10,
        'ZOOM_RANGE': 0.4,
        'HOR_FLIP': True,
        'VER_FLIP': True,
        'BATCH_SIZE': batch_size}

In [6]:
import psutil

# Define a custom callback to clear the session
class ClearMemory(Callback):
    def on_epoch_end(self, epoch, logs=None):
        tf.keras.backend.clear_session()

class MemoryUsageLogger(Callback):
    def on_epoch_end(self, epoch, logs=None):
        process = psutil.Process(os.getpid())
        print(f"Memory usage at epoch {epoch}: {process.memory_info().rss / (1024 ** 2)} MB")
        

class KerasModel():
    """ A wrapper around the Keras functioanlity to compile and train
    TensorFlow deep learning models """

    def __init__(self, model, verbose=True):
        self.model = model
        if verbose:
            model.summary()

    def compile_model(self, compile_opt: Dict = {}) -> None:
        # if no arguments passed, some defaults are provided.
        OPT = compile_opt.get('OPT', 'adam')
        loss = compile_opt.get('loss', 'categorical_crossentropy')
        metrics = compile_opt.get('metrics', ['accuracy'])

        self.model.compile(optimizer=OPT, loss=loss,  metrics=metrics)

    def evaluate_model(self,
                       X_test: np.array,
                       y_test: np.array,
                       # model: Model,
                       eval_opt: Dict = {}) -> float:
        
        BATCH_SIZE = eval_opt.get('BATCH_SIZE', 128)
        VERBOSE = eval_opt.get('VERBOSE', 1)
        score = self.model.evaluate(
            X_test, y_test, batch_size=BATCH_SIZE, verbose=VERBOSE)
        return score

    def load_model(self, model_filepath: path) -> None:
        self.model = load_model(model_filepath)

    def save_model(self, model_filepath: path) -> None:
        self.model.save(model_filepath)

    def train_model(self,
                    training_generator: Tuple[np.array, np.array],
                    validation_data: np.array,
                    model_name: str,
                    y_train: np.array,
                    save_model: bool = True,
                    training_opt: Dict = {}) -> List:
        
        # sets default values if training options were not passed
        BATCH_SIZE = training_opt.get('BATCH_SIZE', 16)
        NB_EPOCH = training_opt.get('NB_EPOCH', 10)
        VERBOSE = training_opt.get('VERBOSE', 1)

        output_path = path.join('./trained_models', '{}_model.h5'.format(model_name))
        
        y = [np.where(r == 1)[0][0] for r in y_train]
        for v in np.where(~y_train.any(axis=0))[0]:
            for _ in range(1000):
                y.append(v)
        class_weights = class_weight.compute_class_weight('balanced',
                                                          classes=np.unique(y),
                                                           y=y)
        class_weights = dict(enumerate(class_weights))

        # setup model training callbacks
        # save the best model so far when training
        
        checkpoint = ModelCheckpoint(
            output_path, monitor='val_accuracy', verbose=1,
            save_best_only=True, mode='max')

        # lower learning rate when models learning has plateaued
        lr_drop = ReduceLROnPlateau(
            monitor='loss', factor=0.5, patience=8, min_lr=0.000001)

        # stop training if signs of model convergence
        early_stopping = EarlyStopping(monitor='loss', patience=13)

        # enables tensorboard from console for diagnostic tools
        # tensor_board = TensorBoard(log_dir='Graph', histogram_freq=0,
        #                            write_graph=True, write_images=True)

        callbacks_list = [checkpoint, lr_drop, early_stopping,ClearMemory(), MemoryUsageLogger()]#, tensor_board]

        history = self.model.fit(
            training_generator,
            steps_per_epoch=y_train.shape[0] // BATCH_SIZE,
            validation_data=validation_data,
            validation_steps=32,
            epochs=NB_EPOCH,
            verbose=VERBOSE,
            callbacks=callbacks_list,
            class_weight=class_weights)

        return history

    def train_model_with_no_augmentation(self,
                                         X_train: np.array,
                                         y_train: np.array,
                                         X_val: np.array,
                                         y_val: np.array,
                                         model_name: str,
                                         training_opt: Dict = {}) -> List:
        """Same as above but with no image augmentation, not usually used"""
        BATCH_SIZE = training_opt.get('BATCH_SIZE', 46)
        NB_EPOCH = training_opt.get('NB_EPOCH', 250)
        VERBOSE = training_opt.get('VERBOSE', 1)

        output_path = path.join(
            './trained_models', '{}_model.h5'.format(model_name))

        y = [np.where(r == 1)[0][0] for r in y_train]
        for v in np.where(~y_train.any(axis=0))[0]:
            for _ in range(1000):
                y.append(v)
        classes=np.unique(y)
        class_weights = class_weight.compute_class_weight('balanced',
                                                          classes=classes,
                                                          y=y)
        class_weights = dict(zip(classes, class_weights))
        
        checkpoint = ModelCheckpoint(
            output_path, monitor='val_accuracy', verbose=1,
            save_best_only=True, mode='max')

        # lower learning rate when models learning has plateaued
        lr_drop = ReduceLROnPlateau(
            monitor='loss', factor=0.5, patience=8, min_lr=0.000001)

        # stop training if signs of model convergence
        early_stopping = EarlyStopping(monitor='loss', patience=13)

        # enables tensorboard from console for diagnostic tools
        # tensor_board = TensorBoard(log_dir='Graph', histogram_freq=0,
        #                            write_graph=True, write_images=True)

        callbacks_list = [checkpoint, lr_drop, early_stopping,ClearMemory(), MemoryUsageLogger()]#, tensor_board]

        history = self.model.fit(
            X_train,
            y_train,
            batch_size=BATCH_SIZE,
            validation_data=(X_val, y_val),
            epochs=NB_EPOCH,
            verbose=VERBOSE,
            callbacks=callbacks_list,
            class_weight=class_weights)
        
        return history

    def train_combined_model(self,
                            training_generator,
                            validation_data,
                            model_name: str,
                            y_train: np.array,
                            training_opt: Dict = {}) -> List:
      
        BATCH_SIZE = training_opt.get('BATCH_SIZE', 46)
        NB_EPOCH = training_opt.get('NB_EPOCH', 250)
        VERBOSE = training_opt.get('VERBOSE', 1)

        output_path = path.join(
            './trained_models', '{}_com_model.h5'.format(model_name))

        y = [np.where(r == 1)[0][0] for r in y_train]
        for v in np.where(~y_train.any(axis=0))[0]:
            for _ in range(1000):
                y.append(v)
        classes=np.unique(y)
        class_weights = class_weight.compute_class_weight('balanced',
                                                          classes=classes,
                                                          y=y)
        class_weights = dict(zip(classes, class_weights))
        
        checkpoint = ModelCheckpoint(
            output_path, monitor='val_accuracy', verbose=1,
            save_best_only=True, mode='max')
        
        lr_drop = ReduceLROnPlateau(
            monitor='loss', factor=0.5, patience=8, min_lr=0.000001)
        
        # stop training if signs of model convergence
        early_stopping = EarlyStopping(monitor='loss', patience=13)

        # enables tensorboard from console for diagnostic tools
        # tensor_board = TensorBoard(log_dir='Graph', histogram_freq=0,
        #                            write_graph=True, write_images=True)

        callbacks_list = [checkpoint, lr_drop, early_stopping,ClearMemory(), MemoryUsageLogger()]#, tensor_board]
        
        history = self.model.fit(
            training_generator,
            steps_per_epoch=y_train.shape[0] // BATCH_SIZE,
            validation_data=validation_data,
            validation_steps=32,
            batch_size=BATCH_SIZE,
            epochs=NB_EPOCH,
            verbose=VERBOSE,
            callbacks=callbacks_list,
            class_weight=class_weights)
        
        return history



### Load the training, validation and testing data

In [7]:
import pickle
processed_training_data_path = "./flowcam_split_data/plankton_data_101x64_final.pkl"
with open(processed_training_data_path, "rb") as file:
    trainAttrX, valAttrX, testAttrX, trainImagesX, \
        valImagesX, testImagesX, y_train, y_val, y_test = pickle.load(file)

### Calling the defined resnet model

In [10]:
dropout_rate=0.2
num_modules=3
model = cnn_residual_inspired(
            num_classes=y_train.shape[1],
            input_shape=IMAGE_SIZE,
            dropout_rate=dropout_rate,
            num_modules=num_modules)

In [11]:
keras_model= KerasModel(model)

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 101, 64, 3)  0           []                               
                                ]                                                                 
                                                                                                  
 conv2d_9 (Conv2D)              (None, 101, 64, 64)  9472        ['input_2[0][0]']                
                                                                                                  
 batch_normalization_7 (BatchNo  (None, 101, 64, 64)  256        ['conv2d_9[0][0]']               
 rmalization)                                                                                     
                                                                                            

### Defining the image augmentation parameters

In [12]:
batch_size=32
train_gen_opt: Dict = get_image_augmentation_opt(batch_size)

In [13]:
train_gen_opt

{'ROT_RANGE': 37,
 'WIDTH_SHIFT_RANGE': 0.3,
 'HEIGHT_SHIFT_RANGE': 0.3,
 'SHEAR_RANGE': 10,
 'ZOOM_RANGE': 0.4,
 'HOR_FLIP': True,
 'VER_FLIP': True,
 'BATCH_SIZE': 32}

In [14]:
from utils import create_augmented_images_generator
# create_augmented_images_generator(X_attributes: np.array,
#                                   X_images: np.array,
#                                   Y: np.array,
#                                   opt: Dict = {},
#                                   only_images: bool = False,
#                                   multiple_inputs: bool = False)

In [15]:
train_gen = create_augmented_images_generator(trainAttrX,
                                              trainImagesX,
                                              y_train,
                                              train_gen_opt,
                                              only_images=True)

### Trainining the ResNet model with image augmentation and storing the weights at "./trained_models" folder

In [16]:
batch_size=32
num_epochs=2
cnn_model_name="cnn3_with_aug"

training_opt: Dict = {'BATCH_SIZE': batch_size, 'NB_EPOCH': num_epochs}
keras_model.compile_model()

In [17]:
history = keras_model.train_model(
    train_gen,
    (valImagesX, y_val),
    cnn_model_name,
    y_train,
    training_opt=training_opt)

Epoch 1/2


2024-08-14 16:47:36.373370: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8401



Epoch 1: val_accuracy improved from -inf to 0.26087, saving model to ./trained_models/cnn3_with_aug_model.h5
Memory usage at epoch 0: 4326.42578125 MB
Epoch 2/2
Memory usage at epoch 1: 4337.48828125 MB


### Trainining the ResNet model without image augmentation and storing the weights at "./trained_models" folder

In [18]:
batch_size=32
num_epochs=2
cnn_model_name="cnn3_no_aug"

training_opt: Dict = {'BATCH_SIZE': batch_size, 'NB_EPOCH': num_epochs}
keras_model = KerasModel(model)
keras_model.compile_model()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 101, 64, 3)  0           []                               
                                ]                                                                 
                                                                                                  
 conv2d_9 (Conv2D)              (None, 101, 64, 64)  9472        ['input_2[0][0]']                
                                                                                                  
 batch_normalization_7 (BatchNo  (None, 101, 64, 64)  256        ['conv2d_9[0][0]']               
 rmalization)                                                                                     
                                                                                            

In [19]:
history = keras_model.train_model_with_no_augmentation(
                                         trainImagesX,
                                         y_train,
                                         valImagesX,
                                         y_val,
                                         model_name = cnn_model_name,
                                         training_opt = training_opt)

Epoch 1/2
Epoch 1: val_accuracy improved from -inf to 0.26087, saving model to ./trained_models/cnn3_no_aug_model.h5
Memory usage at epoch 0: 4273.578125 MB
Epoch 2/2

Epoch 2: val_accuracy did not improve from 0.26087
Memory usage at epoch 1: 4275.3046875 MB


# Combined or Fusion model

In [20]:
def combine_multiple_cnn_models(models: List[Model],
                                y: np.array,
                                dropout_rate: float = 0.5) -> Model:
    """This will combine multiple deep learning models
    (each individual should have their SoftMax layer removed)
    into a single model, by providing a concatanation layer and a fully
    connected layer
    """
    combinedInput = concatenate(
        [model.output for model in models], name="cnn_concat")

    x = Dense(1024,
              activation='relu',
              kernel_initializer=kernel_init,
              bias_initializer=bias_init)(combinedInput)
    x = BatchNormalization(name='combined_bn')(x)

    x = Dense(y.shape[1], activation='softmax', name='combined_pred')(x)

    model = Model(inputs=[model.input for model in models], outputs=x)

    return model


### loading the saved weights of ResNet model and MLP models trained earlier

In [21]:
model1=load_model('./trained_models/cnn3_with_aug_model.h5')
model2=load_model('./trained_models/mlp_model_flowcam_data.h5')

In [22]:
model1 = Model(model1.input, model1.layers[-2].output)
for l in model1.layers:
    l.trainable = False

In [23]:
model2 = Model(model2.input, model2.layers[-2].output)
for l in model2.layers:
    l.trainable = False


In [24]:
models=[]
for i, mod in enumerate([model1,model2]):
        for j, layer in enumerate(mod.layers):
            layer._name = "model_{}_layer_{}".format(i, j)
        models.append(mod)

### Calling the Fusion model 

In [25]:
model_com = combine_multiple_cnn_models(models,
                                    y_train,
                                    dropout_rate = 0.5)

In [26]:
keras_model = KerasModel(model_com)
batch_size=64
num_epochs=2

# train_gen_opt: Dict = get_image_augmentation_opt(batch_size)

val_gen_opt: Dict = {
    'ROT_RANGE': 0,
    'WIDTH_SHIFT_RANGE': 0.0,
    'HEIGHT_SHIFT_RANGE': 0.0,
    'SHEAR_RANGE': 0.0,
    'ZOOM_RANGE': 0.0,
    'HOR_FLIP': False,
    'VER_FLIP': False,
    'BATCH_SIZE': batch_size}

'''WARNING comment below line for making augmentations'''
train_gen_opt=val_gen_opt

train_gen = create_augmented_images_generator(
    trainAttrX, trainImagesX, y_train, train_gen_opt, only_images=False)
val_gen = create_augmented_images_generator(
    valAttrX, valImagesX, y_val, val_gen_opt, only_images=False)

training_opt: Dict = \
    {'BATCH_SIZE': batch_size, 'NB_EPOCH': num_epochs}
keras_model.compile_model()



Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 model_0_layer_0 (InputLayer)   [(None, 101, 64, 3)  0           []                               
                                ]                                                                 
                                                                                                  
 model_0_layer_1 (Conv2D)       (None, 101, 64, 64)  9472        ['model_0_layer_0[0][0]']        
                                                                                                  
 model_0_layer_2 (BatchNormaliz  (None, 101, 64, 64)  256        ['model_0_layer_1[0][0]']        
 ation)                                                                                           
                                                                                            

### Training and storing the weights of Fusion model at "./trained_models" folder

In [27]:
concat_model_name="cnn_aug_mlp"
hist = keras_model.train_combined_model(
    train_gen,
    val_gen,
    concat_model_name,
    y_train,
    training_opt=training_opt)

Epoch 1/2
Epoch 1: val_accuracy improved from -inf to 0.69565, saving model to ./trained_models/cnn_aug_mlp_com_model.h5
Memory usage at epoch 0: 5127.12109375 MB
Epoch 2/2
Epoch 2: val_accuracy improved from 0.69565 to 0.86957, saving model to ./trained_models/cnn_aug_mlp_com_model.h5
Memory usage at epoch 1: 5205.1484375 MB
