In [None]:
#Importing relevant libraries

import numpy as np
import pandas as pd
from pathlib import Path
import os.path
import matplotlib.pyplot as plt
from IPython.display import Image, display
import matplotlib.cm as cm
from sklearn.model_selection import train_test_split
import tensorflow as tf



In [None]:
#Set the image directory using Path and isolate labels and image names using path.split through the anonymous function method.

image_dir = Path('Data\Fish_Dataset\Fish_Dataset')

filespaths = list(image_dir.glob(r'**/*.png'))
labelspaths = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], filespaths))

filespaths = pd.Series(filespaths, name='Filepath').astype(str)
labels = pd.Series(labelspaths, name='Label')

#Create the image dataframe with the image paths and labels as the columns. 

image_frame = pd.concat([filespaths, labels], axis=1)

#Removing the GT Images

image_frame = image_frame[image_frame['Label'].apply(lambda x: x[-2:] != 'GT')]



In [None]:
#Shuffle the dataframe by sampling it.

image_frame = image_frame.sample(frac=1).reset_index(drop= True)

image_frame.head()

In [None]:
#SPlitting the dataset into the training and test dataframes with a 10% test size.

df_train, df_test = train_test_split(image_frame, train_size=0.9, shuffle= True, random_state=1)


In [None]:
#Initialize the generators and set the validation size to 20% of the training set. The validation dataframe allows for overfitting monitoring through the validation loss.

train_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function = tf.keras.applications.mobilenet_v2.preprocess_input, validation_split = 0.2
)

test_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function = tf.keras.applications.mobilenet_v2.preprocess_input
)

In [None]:
train_im = train_gen.flow_from_dataframe(
    dataframe = df_train, 
    x_col = 'Filepath', 
    y_col = 'Label', 
    target_size = (224, 244),
    color_mode = 'rgb', 
    class_mode = 'categorical', 
    batch_size = 32, 
    shuffle = True, 
    seed = 50, 
    subset = 'training'
)

val_im = train_gen.flow_from_dataframe(
    dataframe=df_train,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=50,
    subset='validation'
)

test_im = test_gen.flow_from_dataframe(
    dataframe=df_test,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=False
)

In [None]:
#Load the Imagenet pretrained MobileNetV2 architecture without the output layer and an average pooling.
pre_mod = tf.keras.applications.MobileNetV2(
    input_shape = (224, 224, 3),
    include_top = False, 
    weights = 'imagenet', 
    pooling = 'avg'
)
#Freeze the lower layers in order for the model to perform as a stand-alone feature extractor and predictor
pre_mod.trainable = False

In [None]:
inputs = pre_mod.input

#Replacing the FC layers of the MobileNetV2 with 2 128 FC Layers.
x = tf.keras.layers.Dense(128, activation='relu')(pre_mod.output)
x = tf.keras.layers.Dense(128, activation='relu')(x)

outputs = tf.keras.layers.Dense(9, activation='softmax')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer = 'adam',
    loss = 'categorical_crossentropy', 
    metrics = ['accuracy']
)

#In order to monitor overfit, set earlystopping rounds with a patience of 1 so that there is a limit of one instance of validation loss increasing per epoch.
history = model.fit(
    train_im, 
    validation_data = val_im, 
    epochs = 50, 
    callbacks = [ 
        tf.keras.callbacks.EarlyStopping(
            monitor = 'val_loss',
            patience=1,
            restore_best_weights = True
        )
    ]
)

In [None]:
pd.DataFrame(history.history)[['loss', 'val_loss']].plot()
plt.title('Loss')
plt.show()

In [None]:
pd.DataFrame(history.history)[['loss','val_loss']].plot()
plt.title("Loss")
plt.show()

In [None]:
results = model.evaluate(test_im, verbose=0)

print("Loss: ", results[0])
print("Accuracy: ", results[1])