### Includes

In [1]:
# I like to put all includes at the start of the script so we can make sure we have all the neccessary packages installed already 
import os
from pathlib import Path
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import tensorflow as tf
from tensorflow.keras.optimizers import Adam

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D

### Import Data

In [1]:
dataset_path = Path(r'../input/a-large-scale-fish-dataset/Fish_Dataset/Fish_Dataset')

file_path = list(dataset_path.glob(r'**/*.png'))

labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], file_path))

In [1]:
file_path = pd.Series(file_path).astype(str)
labels = pd.Series(labels)

df = pd.concat([file_path, labels], axis=1)

df.columns = ['image', 'label']

df.head()

## Display Images

In [1]:
fig, axes = plt.subplots(nrows=5, ncols=5, figsize=(15,10))
for i, ax in enumerate(axes.flat):
    ax.imshow(plt.imread(df.image[i]))
    ax.set_title(df.label[i])
    
plt.show()

### List Each Directory (Fish) With The Number Of Images For That Category (Fish)

In [1]:
df = df[df['label'].apply(lambda x: x[-2:] != 'GT')].reset_index(drop=True)
df.label.value_counts()

### Create train, test and validation dataset

In [1]:
x_train, x_test = train_test_split(df, test_size=0.3,random_state=30)
x_train, x_val = train_test_split(x_train, test_size=0.2, random_state=30)

In [1]:
print("Training Data Shape", x_train.shape)
print("Test Data Shape", x_test.shape)
print("Validation Data Shape", x_val.shape)

In [1]:
image_data_generator = ImageDataGenerator(rescale = 1./255,
    rotation_range=40,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest')

train = image_data_generator.flow_from_dataframe(dataframe=x_train, x_col='image', y_col='label', target_size=(200,200), color_mode='rgb', class_mode='categorical', shuffle=False)
test = image_data_generator.flow_from_dataframe(dataframe=x_test, x_col='image', y_col='label', target_size=(200,200), color_mode='rgb', class_mode='categorical', shuffle=False)
val = image_data_generator.flow_from_dataframe(dataframe=x_val, x_col='image', y_col='label', target_size=(200,200), color_mode='rgb', class_mode='categorical',shuffle=False)

## Define the model

In [1]:
input_shape = (200, 200, 3)

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64, (3,3), activation='relu', input_shape=input_shape ),
    tf.keras.layers.MaxPool2D(pool_size = (2,2)),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size = (2,2)),
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size = (2,2)),
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size = (2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(9, activation='softmax')
])

model.summary()

### Start Training

In [1]:
model.compile(optimizer="adam", 
              loss='categorical_crossentropy', 
              metrics=["accuracy"]
             )

callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=4)

history = model.fit(train, 
                    validation_data=val, 
                    epochs=20, 
                    callbacks=callback)

### Save Model

In [1]:
model.save('model-1.h5')

### Check The Accuracy

In [1]:
accuracy = history.history['accuracy']
val_accuracy  = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

### Plot The Accuracy

In [1]:
plt.figure(figsize=(15,10))

plt.subplot(2, 2, 1)
plt.plot(accuracy, label = "Training_Accuracy")
plt.plot(val_accuracy, label="Validation_Accuracy")
plt.legend()
plt.title("Training Accuracy VS. Validation Accuracy")


plt.subplot(2,2,2)
plt.plot(loss, label = "Training_Loss")
plt.plot(val_loss, label="Validation_Loss")
plt.legend()
plt.title("Training Loss VS. Validation Loss")

plt.show()

In [1]:
pred = model.predict(test)

Extract the class with highest probability for all the test files

In [1]:
pred = np.argmax(pred, axis=1)

In [1]:
labels = train.class_indices

In [1]:
labels

In [1]:
labels = dict((v,k) for k, v in labels.items())

In [1]:
labels

In [1]:
y_pred = [labels[k] for k in pred]

In [1]:
print(classification_report(x_test.label, y_pred))

In [1]:
print(confusion_matrix(x_test.label, y_pred))

In [1]:
test_accuracy = model.evaluate(test)[1]

## My results from running the model for 20 Epochs is below
#### Training_Accuracy = 0.878
#### Validation_Accuracy= 0.8722

## No bad but definitly room for improvment