<h1>Comparison of Transfer Learning Models - Occipital View </h1>

<hr>

<p>Continuing with the model selection process, we proceed to train the three models demonstrating the highest accuracy from the previous step over a span of 10 epochs. It's important to note that, at present, we are not employing any form of data augmentations.</p>

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import inspect
import glob
import os
import matplotlib.image as mpimg
import numpy as np
import seaborn as sns
import pandas as pd
import inspect
from tqdm import tqdm
from keras.preprocessing import image
from sklearn import metrics
from sklearn.metrics import ConfusionMatrixDisplay
from google.colab import drive
from sklearn.model_selection import StratifiedKFold

In [None]:
from keras.preprocessing import image

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
batch_size = 32
data_path = "/content/drive/MyDrive/sve_O - bez augumentacija/Train"
n_folds =5
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
all_files = glob.glob(os.path.join(data_path, '*/*'))
labels = [os.path.basename(os.path.dirname(fp)) for fp in all_files]

In [None]:
# List all available models
model_dictionary = {m[0]:m[1] for m in inspect.getmembers(tf.keras.applications, inspect.isfunction)}

In [None]:
len(model_dictionary)

71

In [None]:
new_model_dict = {key: model_dictionary[key] for key in model_dictionary  if key in ['MobileNet', 'ResNet101V2','ResNet50V2']}

In [None]:
len(new_model_dict)

3

In [None]:
def preprocess_data_for_fold(train_files, val_files, img_size):
    #datagen = image.ImageDataGenerator(rescale=1./255., rotation_range=15, brightness_range=[0.2, 1.0])
    datagen = image.ImageDataGenerator(rescale=1./255.)

    # Extracting the labels from the directory structure
    train_labels = [os.path.basename(os.path.dirname(fp)) for fp in train_files]
    val_labels = [os.path.basename(os.path.dirname(fp)) for fp in val_files]

    train_gen = datagen.flow_from_dataframe(
        dataframe=pd.DataFrame({'filename': train_files, 'class': train_labels}),
        directory=None,
        x_col="filename",
        y_col="class",
        class_mode="binary",
        target_size=img_size,
        batch_size=batch_size,
        shuffle=True
    )

    val_gen = datagen.flow_from_dataframe(
        dataframe=pd.DataFrame({'filename': val_files, 'class': val_labels}),
        directory=None,
        x_col="filename",
        y_col="class",
        class_mode="binary",
        target_size=img_size,
        batch_size=batch_size,
        shuffle=False
    )

    return train_gen, val_gen


In [None]:
def test_preprocces(img_size):
  test_datagen = image.ImageDataGenerator(rescale=1./255.)
  test_generator = test_datagen.flow_from_directory("/content/drive/MyDrive/sve_O - bez augumentacija/Test",
                                        target_size=img_size,
                                        class_mode='binary',
                                        batch_size=batch_size,
                                        shuffle=False)
  return test_generator

In [None]:
def build_model(model_1,input_shape):
    pre_trained_model = model_1(include_top=False, pooling='avg', input_shape=input_shape)
    pre_trained_model.trainable = False

    model = tf.keras.models.Sequential()
    model.add(pre_trained_model)
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam',loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# Loop over each model available in Keras
model_benchmarks = {'model_name': [], 'num_model_params': [], 'validation_accuracy': [], 'test_accuracy': []}
for model_name, model_1 in tqdm(list(new_model_dict.items())):
    # Special handling for "NASNetLarge" since it requires input images with size (331,331)

    fold_no = 1
    all_acc = []
    all_loss = []

    for train_indices, val_indices in kfold.split(all_files,labels):
        print('------------------------------------------------------------------------')
        print(f'Training for fold {fold_no} ...')

    # Preprocess data and build model (assuming these functions are defined)
        train_files = [all_files[i] for i in train_indices]
        val_files = [all_files[i] for i in val_indices]
        if 'NASNetLarge' in model_name:
          img_size =(331,331)
          input_shape =(331,331,3)
          train_gen, val_gen = preprocess_data_for_fold(train_files, val_files, img_size)
        elif 'InceptionResNetV2' or 'InceptionV3' or 'Xception' in model_name:
          img_size=(299,299)
          input_shape=(299,299,3)
          train_gen, val_gen = preprocess_data_for_fold(train_files, val_files, img_size)
        else:
          img_size=(224,224)
          input_shape=(224,224,3)
          train_gen, val_gen = preprocess_data_for_fold(train_files, val_files, img_size)


        model = build_model(model_1,input_shape)

    # Fit data to model
        history = model.fit(train_gen, epochs=10, validation_data=val_gen)

    # Evaluate the model on validation set
        scores = model.evaluate(val_gen, verbose=0)
        print(f'Score for fold {fold_no}: Loss of {scores[0]}; Accuracy of {round(scores[1]*100, 2)}%')
        all_acc.append(scores[1] * 100)
        all_loss.append(scores[0])

    # Increase fold number
        fold_no += 1

# Provide average scores
    print('------------------------------------------------------------------------')
    print('Score per fold')
    for i in range(len(all_acc)):
        print('------------------------------------------------------------------------')
        print(f'> Fold {i+1} - Loss: {all_loss[i]} - Accuracy: {all_acc[i]}%')
    print('------------------------------------------------------------------------')
    print('Average scores for all folds:')
    print(f'> Accuracy: {np.mean(all_acc)} (+- {np.std(all_acc)})')
    print(f'> Loss: {np.mean(all_loss)}')
    print('------------------------------------------------------------------------')
    if 'NASNetLarge' in model_name:
        test_processed = test_preprocces(img_size)
    elif 'InceptionResNetV2' or 'InceptionV3' or 'Xception' in model_name:
        test_processed = test_preprocces(img_size)
    else:
        test_processed = test_preprocces(img_size)

    test_acc = model.evaluate(test_processed)
    predictions = model.predict(test_processed)
    predicted_classes = np.where(predictions < 0.5, 0, 1)
    true_classes = test_processed.classes
    class_labels = list(test_processed.class_indices.keys())

    report = metrics.classification_report(true_classes, predicted_classes, target_names=class_labels)
    print(report)

    # Calculate all relevant metrics
    model_benchmarks['model_name'].append(model_name)
    model_benchmarks['num_model_params'].append(model.count_params())
    model_benchmarks['validation_accuracy'].append(np.mean(all_acc))
    model_benchmarks['test_accuracy'].append(test_acc[-1])


  0%|          | 0/3 [00:00<?, ?it/s]

------------------------------------------------------------------------
Training for fold 1 ...
Found 125 validated image filenames belonging to 2 classes.
Found 32 validated image filenames belonging to 2 classes.




Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_1_0_224_tf_no_top.h5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 1: Loss of 0.613815426826477; Accuracy of 62.5%
------------------------------------------------------------------------
Training for fold 2 ...
Found 125 validated image filenames belonging to 2 classes.
Found 32 validated image filenames belonging to 2 classes.




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 2: Loss of 0.6811779737472534; Accuracy of 53.12%
------------------------------------------------------------------------
Training for fold 3 ...
Found 126 validated image filenames belonging to 2 classes.
Found 31 validated image filenames belonging to 2 classes.




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 3: Loss of 0.6309963464736938; Accuracy of 77.42%
------------------------------------------------------------------------
Training for fold 4 ...
Found 126 validated image filenames belonging to 2 classes.
Found 31 validated image filenames belonging to 2 classes.




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 4: Loss of 0.5961403250694275; Accuracy of 74.19%
------------------------------------------------------------------------
Training for fold 5 ...
Found 126 validated image filenames belonging to 2 classes.
Found 31 validated image filenames belonging to 2 classes.




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 5: Loss of 0.6800780296325684; Accuracy of 48.39%
------------------------------------------------------------------------
Score per fold
------------------------------------------------------------------------
> Fold 1 - Loss: 0.613815426826477 - Accuracy: 62.5%
------------------------------------------------------------------------
> Fold 2 - Loss: 0.6811779737472534 - Accuracy: 53.125%
------------------------------------------------------------------------
> Fold 3 - Loss: 0.6309963464736938 - Accuracy: 77.4193525314331%
------------------------------------------------------------------------
> Fold 4 - Loss: 0.5961403250694275 - Accuracy: 74.19354915618896%
------------------------------------------------------------------------
> Fold 5 - Loss: 0.6800780296325684 - Accuracy: 48.38709533214569%
--------------------------------------------------------------

 33%|███▎      | 1/3 [03:10<06:20, 190.48s/it]

              precision    recall  f1-score   support

           F       0.66      0.82      0.73        33
           M       0.77      0.59      0.67        34

    accuracy                           0.70        67
   macro avg       0.71      0.70      0.70        67
weighted avg       0.71      0.70      0.70        67

------------------------------------------------------------------------
Training for fold 1 ...
Found 125 validated image filenames belonging to 2 classes.
Found 32 validated image filenames belonging to 2 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet101v2_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 1: Loss of 0.5376505851745605; Accuracy of 84.38%
------------------------------------------------------------------------
Training for fold 2 ...
Found 125 validated image filenames be

 67%|██████▋   | 2/3 [06:36<03:19, 199.71s/it]

              precision    recall  f1-score   support

           F       0.78      0.55      0.64        33
           M       0.66      0.85      0.74        34

    accuracy                           0.70        67
   macro avg       0.72      0.70      0.69        67
weighted avg       0.72      0.70      0.69        67

------------------------------------------------------------------------
Training for fold 1 ...
Found 125 validated image filenames belonging to 2 classes.
Found 32 validated image filenames belonging to 2 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 1: Loss of 0.5647867918014526; Accuracy of 75.0%
------------------------------------------------------------------------
Training for fold 2 ...
Found 125 validated image filenames belo

100%|██████████| 3/3 [09:06<00:00, 182.04s/it]

              precision    recall  f1-score   support

           F       0.69      0.82      0.75        33
           M       0.79      0.65      0.71        34

    accuracy                           0.73        67
   macro avg       0.74      0.73      0.73        67
weighted avg       0.74      0.73      0.73        67






In [None]:
# Convert Results to DataFrame for easy viewing
benchmark_df = pd.DataFrame(model_benchmarks)
benchmark_df.sort_values('num_model_params', inplace=True) # sort in ascending order of num_model_params column
benchmark_df

Unnamed: 0,model_name,num_model_params,validation_accuracy,test_accuracy
0,MobileNet,3229889,63.124999,0.701493
2,ResNet50V2,23566849,75.221775,0.731343
1,ResNet101V2,42628609,80.282258,0.701493
