# Imports

In [None]:
# Import necessary libraries
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
import glob
import os
import shutil
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.utils import img_to_array, array_to_img
from tqdm import tqdm
from PIL import Image, ImageFilter
import numpy as np
import pandas as pd

# Split data into training and testing sets
from sklearn.model_selection import train_test_split

# Import additional Keras libraries
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import Callback, EarlyStopping

# Import metrics for evaluating the model
from sklearn.metrics import confusion_matrix, classification_report

# Dataset Loading

In [None]:
# Load dataset
path = '/kaggle/input/monkey-species-image-final'
path_imgs = list(glob.glob(path + '/**/*.jpg'))

## Label Gen

In [None]:
labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], path_imgs))
file_path = pd.Series(path_imgs, name='File_Path').astype(str)
labels = pd.Series(labels, name='Labels')

## Split Data

In [None]:
data = pd.concat([file_path, labels], axis=1)
data = data.sample(frac=1).reset_index(drop=True)
train_df, test_df = train_test_split(data, test_size=0.2, random_state=2)

# EDA & Visualization

In [None]:
# Create subplots with 4 rows, 4 columns, and a specified figure size
fig, axes = plt.subplots(nrows=4, ncols=4, figsize=(15, 7),
                        subplot_kw={'xticks': [], 'yticks': []})

# Iterate through the subplots and display images with corresponding labels
for i, ax in enumerate(axes.flat):
    # Use 'plt.imread' to read and display the image corresponding to the file path in 'data.File_Path[i]'
    ax.imshow(plt.imread(data.File_Path[i]))
    
    # Set the title of the subplot to the corresponding label from 'data.Labels[i]'
    ax.set_title(data.Labels[i])

# Adjust layout for better visualization
plt.tight_layout()

# Display the plot
plt.show()

# Calculate the count of each unique label in the 'Labels' column of the 'data' DataFrame
counts = data.Labels.value_counts()

# Use Seaborn to create a bar plot of label counts
sns.barplot(x=counts.index, y=counts)

# Set the x-axis label
plt.xlabel('Labels')

# Set the y-axis label
plt.ylabel('Count')

# Rotate x-axis labels for better readability
plt.xticks(rotation=50)

# Display the plot

# Define Functions

# Data Gen

In [None]:
# Define a function 'gen' that takes three parameters: preprocessing function 'pre', training data 'train', and test data 'test'
def gen(pre, train, test):
    # Create an ImageDataGenerator for training data with specified preprocessing function and 20% validation split
    train_datagen = ImageDataGenerator(preprocessing_function=pre, validation_split=0.2)
    
    # Create an ImageDataGenerator for test data with the specified preprocessing function
    test_datagen = ImageDataGenerator(preprocessing_function=pre)
    
    # Generate training data flow using train_datagen.flow_from_dataframe
    train_gen = train_datagen.flow_from_dataframe(
        dataframe=train,
        x_col='File_Path',
        y_col='Labels',
        target_size=(100, 100),
        class_mode='categorical',
        batch_size=32,
        shuffle=True,
        seed=0,
        subset='training',  # Use the subset parameter to specify it's the training set
        rotation_range=30,
        zoom_range=0.15,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.15,
        horizontal_flip=True,
        fill_mode="nearest"
    )
    
    # Generate validation data flow using train_datagen.flow_from_dataframe
    valid_gen = train_datagen.flow_from_dataframe(
        dataframe=train,
        x_col='File_Path',
        y_col='Labels',
        target_size=(100, 100),
        class_mode='categorical',
        batch_size=32,
        shuffle=False,  # Set shuffle to False for validation set
        seed=0,
        subset='validation',  # Use the subset parameter to specify it's the validation set
        rotation_range=30,
        zoom_range=0.15,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.15,
        horizontal_flip=True,
        fill_mode="nearest"
    )
    
    # Generate test data flow using test_datagen.flow_from_dataframe
    test_gen = test_datagen.flow_from_dataframe(
        dataframe=test,
        x_col='File_Path',
        y_col='Labels',
        target_size=(100, 100),
        color_mode='rgb',
        class_mode='categorical',
        batch_size=32,
        verbose=0,  # Set verbose to 0 to suppress output during testing
        shuffle=False  # Set shuffle to False for test set
    )
    
    # Return the generated training, validation, and test data generators
    return train_gen, valid_gen, test_gen

# Model parameter

In [None]:
# Define a function 'func' that takes the model name as a parameter
def func(name_model):
    # Create a pre-trained model with specified configuration
    pre_model = name_model(input_shape=(100, 100, 3),
                           include_top=False,
                           weights='imagenet',
                           pooling='avg')
    
    # Freeze the layers of the pre-trained model
    pre_model.trainable = False
    
    # Define model inputs as the inputs of the pre-trained model
    inputs = pre_model.input
    
    # Add additional dense layers on top of the pre-trained model
    x = Dense(100, activation='relu')(pre_model.output)
    x = Dense(100, activation='relu')(x)
    
    # Define the model outputs
    outputs = Dense(10, activation='softmax')(x)
    
    # Create the final model using the pre-trained model and added layers
    model = Model(inputs=inputs, outputs=outputs)
    
    # Compile the model with specified loss, optimizer, and metrics
    model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])
    
    # Define early stopping callback to prevent overfitting
    my_callbacks = [EarlyStopping(monitor='val_loss',
                                  min_delta=0,
                                  patience=100,
                                  mode='auto')]
    
    # Return the compiled model and the early stopping callback
    return model, my_callbacks

# Drawing Plots

In [None]:
# Define a function 'plot' that takes history, test_gen, train_gen, model, and test_df as parameters
def plot(history, test_gen, train_gen, model, test_df):
    # Plotting Accuracy and Loss over epochs
    fig, ax = plt.subplots(1, 2, figsize=(10, 3))
    ax = ax.ravel()

    for i, met in enumerate(['accuracy', 'loss']):
        ax[i].plot(history.history[met])
        ax[i].plot(history.history['val_' + met])
        ax[i].set_title('Model {}'.format(met))
        ax[i].set_xlabel('epochs')
        ax[i].set_ylabel(met)
        ax[i].legend(['Train', 'Validation'])
    
    # Predictions on the test data
    pred = model.predict(test_gen)
    pred = np.argmax(pred, axis=1)
    
    # Convert numerical labels back to original class labels
    labels = (train_gen.class_indices)
    labels = dict((v, k) for k, v in labels.items())
    pred = [labels[k] for k in pred]
    
    # Classification report
    cm = confusion_matrix(test_df.Labels, pred)
    clr = classification_report(test_df.Labels, pred)
    print(clr)
    
    # Display 12 pictures of the dataset with their true and predicted labels
    fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(12, 8),
                        subplot_kw={'xticks': [], 'yticks': []})

    for i, ax in enumerate(axes.flat):
        ax.imshow(plt.imread(test_df.File_Path.iloc[i+1]))
        ax.set_title(f"True: {test_df.Labels.iloc[i+1]}\nPredicted: {pred[i+1]}")
    plt.tight_layout()
    plt.show()
    
    # Return the training history for potential further analysis or plotting
    return history

## Result View

In [None]:
# Define a function 'result_test' that takes 'test' (test data) and 'model_use' (the trained model) as parameters
def result_test(test, model_use):
    # Evaluate the model on the test data
    results = model_use.evaluate(test, verbose=0)
    
    # Print the test loss and accuracy
    print("    Test Loss: {:.5f}".format(results[0]))
    print("Test Accuracy: {:.2f}%".format(results[1] * 100))
    
    # Return the evaluation results for potential further analysis or reporting
    return results

# EfficientNet B7

In [None]:
### Assuming you have imported your data and created the train and test DataFrames (train_df and test_df)

# Generate data for EfficientNetB7
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.applications.efficientnet import preprocess_input

ENet_pre = preprocess_input
train_gen_ENet, valid_gen_ENet, test_gen_ENet = gen(ENet_pre, train_df, test_df)
ENet_model, callback_ENet = func(DenseNet201)

# Modified training loop to include per-epoch accuracy printing and validation accuracy printing at each step
history_ENet = ENet_model.fit(
    train_gen_ENet,
    validation_data=valid_gen_ENet,
    epochs=100,
    callbacks=callback_ENet,
    verbose=1,  # Set to 1 for per-epoch accuracy printing
    steps_per_epoch=len(train_gen_ENet),
    validation_steps=len(valid_gen_ENet)
)

history_ENet = plot(history_ENet, test_gen_ENet, train_gen_ENet, ENet_model, test_df)
result_ENet = result_test(test_gen_ENet, ENet_model)

# Define a function to plot confusion matrix
def plot_confusion_matrix(y_true, y_pred, classes, model_name):
    cm = confusion_matrix(y_true, y_pred)

    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=classes, yticklabels=classes)
    plt.title(f"Confusion Matrix - {model_name}")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.show()

# Assuming you have the true labels and predicted labels for EfficientNetB7
true_labels_ENet = test_gen_ENet.classes
predictions_ENet = ENet_model.predict(test_gen_ENet)
predicted_labels_ENet = predictions_ENet.argmax(axis=1)

# Display confusion matrix with full model name for EfficientNetB7
full_model_name_ENet = "EfficientNetB7"
plot_confusion_matrix(true_labels_ENet, predicted_labels_ENet, classes=test_gen_ENet.class_indices.keys(), model_name=full_model_name_ENet)

In [None]:
# Save the EfficientNetB7 model
ENet_model.save("models/EfficientNetB7.h5")

# DenseNet 201

In [None]:
# Generate data for DenseNet201
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.applications.densenet import preprocess_input

DenseNet201_pre = preprocess_input
train_gen_DenseNet201, valid_gen_DenseNet201, test_gen_DenseNet201 = gen(DenseNet201_pre, train_df, test_df)
DenseNet201_model, callback_DenseNet201 = func(DenseNet201)

history_DenseNet201 = DenseNet201_model.fit(
    train_gen_DenseNet201,
    validation_data=valid_gen_DenseNet201,
    epochs=100,
    callbacks=callback_DenseNet201,
    verbose=1,
    steps_per_epoch=len(train_gen_DenseNet201),
    validation_steps=len(valid_gen_DenseNet201)
)

history_DenseNet201 = plot(history_DenseNet201, test_gen_DenseNet201, train_gen_DenseNet201, DenseNet201_model, test_df)
result_DenseNet201 = result_test(test_gen_DenseNet201, DenseNet201_model)

# Assuming your test_gen_DenseNet201 yields both input images and labels
true_labels_DenseNet201 = test_gen_DenseNet201.classes
predictions_DenseNet201 = DenseNet201_model.predict(test_gen_DenseNet201)
predicted_labels_DenseNet201 = predictions_DenseNet201.argmax(axis=1)

# Display confusion matrix with full model name for DenseNet201
full_model_name_DenseNet201 = "DenseNet201"
plot_confusion_matrix(true_labels_DenseNet201, predicted_labels_DenseNet201, classes=test_gen_DenseNet201.class_indices.keys(), model_name=full_model_name_DenseNet201)

In [None]:
# Save the DenseNet201 model
DenseNet201_model.save("models/DenseNet201.h5")

# VGG 19

In [None]:
# Generate data for VGG19
from tensorflow.keras.applications import VGG19

VGG19_pre = preprocess_input
train_gen_VGG19, valid_gen_VGG19, test_gen_VGG19 = gen(VGG19_pre, train_df, test_df)
VGG19_model, callback_VGG19 = func(VGG19)

history_VGG19 = VGG19_model.fit(
    train_gen_VGG19,
    validation_data=valid_gen_VGG19,
    epochs=100,
    callbacks=callback_VGG19,
    verbose=1,
    steps_per_epoch=len(train_gen_VGG19),
    validation_steps=len(valid_gen_VGG19)
)

history_VGG19 = plot(history_VGG19, test_gen_VGG19, train_gen_VGG19, VGG19_model, test_df)
result_VGG19 = result_test(test_gen_VGG19, VGG19_model)

# Assuming your test_gen_VGG19 yields both input images and labels
true_labels_VGG19 = test_gen_VGG19.classes

# Make predictions using the trained VGG19 model
predictions_VGG19 = VGG19_model.predict(test_gen_VGG19)

# Convert predictions to class labels
predicted_labels_VGG19 = predictions_VGG19.argmax(axis=1)

# Display confusion matrix with the full model name for VGG19
full_model_name_VGG19 = "VGG19"
plot_confusion_matrix(true_labels_VGG19, predicted_labels_VGG19, classes=test_gen_VGG19.class_indices.keys(), model_name=full_model_name_VGG19)

In [None]:
# Save the VGG19 model
VGG19_model.save("models/VGG19.h5")

# ResNet 152

In [None]:
# Generate data for ResNet152
from tensorflow.keras.applications import ResNet152

ResNet152_pre = preprocess_input
train_gen_ResNet152, valid_gen_ResNet152, test_gen_ResNet152 = gen(ResNet152_pre, train_df, test_df)
ResNet152_model, callback_ResNet152 = func(ResNet152)

history_ResNet152 = ResNet152_model.fit(
    train_gen_ResNet152,
    validation_data=valid_gen_ResNet152,
    epochs=100,
    callbacks=callback_ResNet152,
    verbose=1,
    steps_per_epoch=len(train_gen_ResNet152),
    validation_steps=len(valid_gen_ResNet152)
)

history_ResNet152 = plot(history_ResNet152, test_gen_ResNet152, train_gen_ResNet152, ResNet152_model, test_df)
result_ResNet152 = result_test(test_gen_ResNet152, ResNet152_model)

# Assuming your test_gen_ResNet152 yields both input images and labels
true_labels_ResNet152 = test_gen_ResNet152.classes

# Make predictions using the trained ResNet152 model
predictions_ResNet152 = ResNet152_model.predict(test_gen_ResNet152)

# Convert predictions to class labels
predicted_labels_ResNet152 = predictions_ResNet152.argmax(axis=1)

# Display confusion matrix with the full model name for ResNet152
full_model_name_ResNet152 = "ResNet152"
plot_confusion_matrix(true_labels_ResNet152, predicted_labels_ResNet152, classes=test_gen_ResNet152.class_indices.keys(), model_name=full_model_name_ResNet152)

In [None]:
# Save the ResNet152 model
ResNet152_model.save("models/ResNet152.h5")

# Inception V3

In [None]:
# Generate data for InceptionV3
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input

InceptionV3_pre = preprocess_input
train_gen_InceptionV3, valid_gen_InceptionV3, test_gen_InceptionV3 = gen(InceptionV3_pre, train_df, test_df)
InceptionV3_model, callback_InceptionV3 = func(InceptionV3)

history_InceptionV3 = InceptionV3_model.fit(
    train_gen_InceptionV3,
    validation_data=valid_gen_InceptionV3,
    epochs=100,
    callbacks=callback_InceptionV3,
    verbose=1,
    steps_per_epoch=len(train_gen_InceptionV3),
    validation_steps=len(valid_gen_InceptionV3)
)

history_InceptionV3 = plot(history_InceptionV3, test_gen_InceptionV3, train_gen_InceptionV3, InceptionV3_model, test_df)
result_InceptionV3 = result_test(test_gen_InceptionV3, InceptionV3_model)

# Assuming your test_gen_InceptionV3 yields both input images and labels
true_labels_InceptionV3 = test_gen_InceptionV3.classes
predictions_InceptionV3 = InceptionV3_model.predict(test_gen_InceptionV3)
predicted_labels_InceptionV3 = predictions_InceptionV3.argmax(axis=1)

# Display confusion matrix with the full model name for InceptionV3
full_model_name_InceptionV3 = "InceptionV3"
plot_confusion_matrix(true_labels_InceptionV3, predicted_labels_InceptionV3, classes=test_gen_InceptionV3.class_indices.keys(), model_name=full_model_name_InceptionV3)


In [None]:
# Save the InceptionV3 model
InceptionV3_model.save("models/InceptionV3.h5")

# **MobileNet V2**

In [None]:
# Generate data for MobileNetV2
from tensorflow.keras.applications import MobileNetV2

MobileNetV2_pre = preprocess_input
train_gen_MobileNetV2, valid_gen_MobileNetV2, test_gen_MobileNetV2 = gen(MobileNetV2_pre, train_df, test_df)
MobileNetV2_model, callback_MobileNetV2 = func(MobileNetV2)

history_MobileNetV2 = MobileNetV2_model.fit(
    train_gen_MobileNetV2,
    validation_data=valid_gen_MobileNetV2,
    epochs=100,
    callbacks=callback_MobileNetV2,
    verbose=1,
    steps_per_epoch=len(train_gen_MobileNetV2),
    validation_steps=len(valid_gen_MobileNetV2)
)

history_MobileNetV2 = plot(history_MobileNetV2, test_gen_MobileNetV2, train_gen_MobileNetV2, MobileNetV2_model, test_df)
result_MobileNetV2 = result_test(test_gen_MobileNetV2, MobileNetV2_model)

# Assuming your test_gen_InceptionV3 yields both input images and labels
true_labels_MobileNetV2 = test_gen_MobileNetV2.classes
predictions_MobileNetV2 = MobileNetV2_model.predict(test_gen_MobileNetV2)
predicted_labels_MobileNetV2 = predictions_MobileNetV2.argmax(axis=1)

# Display confusion matrix with the full model name for MobileNetV2
full_model_name_MobileNetV2 = "MobileNetV2"
plot_confusion_matrix(true_labels_MobileNetV2, predicted_labels_MobileNetV2, classes=test_gen_MobileNetV2.class_indices.keys(), model_name=full_model_name_MobileNetV2)

In [None]:
# Save the MobileNetV2 model
MobileNetV2_model.save("models/MobileNetV2.h5")

# **Xception**

In [None]:
# Generate data for Xception
from tensorflow.keras.applications import Xception

Xception_pre = preprocess_input
train_gen_Xception, valid_gen_Xception, test_gen_Xception = gen(Xception_pre, train_df, test_df)
Xception_model, callback_Xception = func(Xception)

history_Xception = Xception_model.fit(
    train_gen_Xception,
    validation_data=valid_gen_Xception,
    epochs=100,
    callbacks=callback_Xception,
    verbose=1,
    steps_per_epoch=len(train_gen_Xception),
    validation_steps=len(valid_gen_Xception)
)

history_Xception = plot(history_Xception, test_gen_Xception, train_gen_Xception, Xception_model, test_df)
result_Xception = result_test(test_gen_Xception, Xception_model)

# Assuming your test_gen_InceptionV3 yields both input images and labels
true_labels_Xception = test_gen_Xception.classes
predictions_Xception = Xception_model.predict(test_gen_Xception)
predicted_labels_Xception = predictions_Xception.argmax(axis=1)

# Display confusion matrix with the full model name for Xception
full_model_name_Xception = "Xception"
plot_confusion_matrix(true_labels_Xception, predicted_labels_Xception, classes=test_gen_Xception.class_indices.keys(), model_name=full_model_name_Xception)

In [None]:
# Save the Xception model
Xception_model.save("models/Xception.h5")

# Zip and Download

In [None]:
z = '/kaggle/working/models'
shutil.make_archive(z, 'zip', z)