# Import Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import os
import cv2

from sklearn.preprocessing import LabelEncoder

from sklearn.metrics import confusion_matrix, classification_report

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import load_img, img_to_array

from sklearn.model_selection import train_test_split

gpus = tf.config.experimental.list_physical_devices("GPU")
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)

        logical_gpus = tf.config.experimental.list_logical_devices("GPU")
        print(len(gpus), "Physical GPUs.")
        print(len(logical_gpus), "Logical GPUs.")

    except Exception as e:
        print(e)

# Load Data

In [None]:
root_dir = "/mnt/hdd/Datasets/aquatic_animals"
folders = os.listdir(root_dir)
folders

In [None]:
data = []
img_paths = []
labels = []

for label in folders:
    path = f'{root_dir}/{label}/'
    folder_data = os.listdir(path)
    for image_path in folder_data:
        img_path = os.path.join(path, image_path)
        img = cv2.imread(path + image_path)
        img = cv2.resize(img, (224, 224))
        data.append(np.array(img))
        labels.append(label)
        img_paths.append(img_path)

In [None]:
data = np.array(data)
labels = np.array(labels)
img_paths = np.array(img_paths)

In [None]:
data.shape, labels.shape, img_paths.shape

In [None]:
df = pd.DataFrame({"Path": img_paths, "Label": labels})
df.head()

In [None]:
def visualize_df(df: pd.DataFrame):
    fig, axes = plt.subplots(4, 4, figsize=(12, 12))
    
    for i, ax in enumerate(axes.ravel()):
        if i < len(df):
            img_path = df.sample(frac=1).iloc[i]["Path"]
            label = df.sample(frac=1).iloc[i]["Label"]
            
            img = plt.imread(img_path)
            ax.imshow(img)
            ax.set_title(label)
            ax.axis("off")
        else:
            ax.axis("off")
    
    plt.tight_layout()
    plt.show()

In [None]:
visualize_df(df)

# Model

In [None]:
df = pd.DataFrame({"file_path": img_paths, "label": labels})
df.head()

In [None]:
df.shape

In [None]:
train_data = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    vertical_flip=True,
    width_shift_range=0.2,
    height_shift_range=0.2,
    rotation_range=45,
    fill_mode="nearest"
)

test_data = ImageDataGenerator(
    rescale=1./255
)

valid_data = ImageDataGenerator(
    rescale=1./255
)

In [None]:
def train_validate_test_split(df):
    np.random.seed(42)
    perm = np.random.permutation(df.index)
    train_end = int(.8 * len(df.index))
    validate_end = int(.1 * len(df.index)) + train_end
    train = df.iloc[perm[:train_end]]
    validate = df.iloc[perm[train_end:validate_end]]
    test = df.iloc[perm[validate_end:]]
    return train, validate, test

df_train, df_validation, df_test = train_validate_test_split(df)
print(len(df_train), len(df_validation), len(df_test))

In [None]:
train_gen = train_data.flow_from_dataframe(
    dataframe=df_train,
    x_col="file_path",
    y_col="label",
    batch_size=32,
    shuffle=True,
    class_mode="categorical",
    target_size=(224, 224),
)

valid_gen = valid_data.flow_from_dataframe(
    dataframe=df_validation,
    x_col="file_path",
    y_col="label",
    batch_size=32,
    shuffle=True,
    class_mode="categorical",
    target_size=(224, 224),
)

test_gen = test_data.flow_from_dataframe(
    dataframe=df_test,
    x_col="file_path",
    y_col=None,
    batch_size=32,
    shuffle=True,
    class_mode=None,
    target_size=(224, 224),
)

# MobileNetV2

In [None]:
base_model = MobileNetV2(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

for layer in base_model.layers:
    layer.trainable = False

In [None]:
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation="relu")(x)
predictions = Dense(8, activation="softmax")(x)

model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [None]:
callbacks = [EarlyStopping(monitor="val_loss", patience=5)]

In [None]:
history = model.fit(
    train_gen, 
    steps_per_epoch=train_gen.samples // train_gen.batch_size, 
    epochs=10, 
    validation_data=valid_gen, 
    validation_steps=valid_gen.samples // valid_gen.batch_size,
)

In [None]:
#model.save("aquatic.h5")

In [None]:
history_df = pd.DataFrame(history.history)
history_df.head()

In [None]:
plt.figure()
plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend(["train", "valid"])
plt.show()

In [None]:
plt.figure()
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend(["train", "valid"])
plt.show()

In [None]:
test_loss, test_acc = model.evaluate(valid_gen)
print('Test accuracy:', test_acc)