This notebook is a beginner's attempt at CNN based classification. I am a beginner's who is trying her hands at keras based CNN. Your feedback will be help me grow and learn more so please do give me the feedback in comments 👇

In [3]:
# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))


# Importing necessary Libraries

In [4]:
import numpy as np 
import pandas as pd 
import os
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
from glob import glob


# From tensorflow
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator

# Import Sklearn
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import classification_report, confusion_matrix


KeyboardInterrupt: 

# Reading data

In [None]:
base_path = Path("../input/a-large-scale-fish-dataset/Fish_Dataset") # This is convert the string to path
paths = list(base_path.glob(r'**/*.png')) # Will convert the generator to the list right away

# Creating Dataframe

In [None]:
Paths_Series = pd.Series(paths, name="FilePaths").astype(str) #Creating a Series of all paths

In [None]:
# Getting the label and removing the GTs in the label
lab = [path.split("/")[6] for path in Paths_Series if "GT" not in path.split("/")[6]]
label = pd.Series(lab, name = "Label").astype(str)

### Concatenating both label and index to get the data frame

In [None]:
df = pd.concat([Paths_Series,label],axis=1)
df.dropna(axis = 0, inplace= True)

In [None]:

pd.set_option('display.max_colwidth', None)
df.head(5)

### Plotting the Images

In [None]:
fig, ax = plt.subplots(4,4, figsize=(20,15))
for idx, axis in enumerate(ax.flat):
    _ = axis.imshow(plt.imread(df.FilePaths[idx]))
    _ = axis.set_title(df.Label[idx])

In [None]:
df.Label.value_counts()
# Looks like we do have uniformity over here!

### Splitting the data

In [None]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=0, shuffle=True)

Looks right!

In [None]:
print(train_df.count())
print(test_df.count())

### Generating Data

In [None]:
df.info()
# What was going wrong was that there were few null values cause the drop wasnt inplace!

In [None]:
train_generator = ImageDataGenerator(validation_split=0.2 ) # Rescaling the data and splitting the training data in validation data as well
test_generator = ImageDataGenerator(rescale = 1./255) # Rescaling the data

In [None]:
train_imgs = train_generator.flow_from_dataframe(
    dataframe = train_df,
    x_col = "FilePaths",
    y_col = "Label",
    target_size = (224, 224),
    color_mode = "rgb",
    class_mode = "categorical",
    batch_size = 32,
    shuffle = True,
    subset = "training"
)
val_imgs = train_generator.flow_from_dataframe(
    dataframe = train_df,
    x_col = "FilePaths",
    y_col = "Label",
    target_size = (224, 224),
    color_mode = "rgb",
    class_mode = "categorical",
    batch_size = 32,
    shuffle = True,
    subset = "validation"
)

test_imgs = test_generator.flow_from_dataframe(
    dataframe = test_df,
    x_col = "FilePaths",
    y_col = "Label",
    target_size = (224, 224),
    color_mode = "rgb",
    class_mode = "categorical",
    batch_size = 32,
    shuffle = False
)

### Building Model

In [None]:

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), padding="Same", activation="relu", input_shape=(224, 224, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), padding="Same", activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), padding="Same", activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), padding="Same", activation='relu'),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(9, activation='softmax'),
])
model.compile(
    optimizer='rmsprop',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()



In [None]:
history = model.fit(train_imgs, batch_size=32, validation_data=val_imgs, epochs=10)

In [None]:
train_accuracy =  history.history['accuracy']
train_loss =  history.history['loss']

val_accuracy =  history.history['val_accuracy']
val_loss =  history.history['val_loss']
epochs = range(1, len(train_accuracy) + 1)

figure, axis = plt.subplots(1, 2, figsize=(20, 10))
axis[0].plot(epochs, train_accuracy, label="train")
axis[0].plot(epochs, val_accuracy, label="validation")
axis[0].set_title('Accuracy')
axis[0].legend()
axis[1].plot(epochs, train_loss, label="train")
axis[1].plot(epochs, val_loss, label="validation")
axis[1].set_title('Loss')
axis[1].legend()

plt.show()

### Prediction

In [None]:
pred = model.predict(test_imgs)

In [None]:
output = np.argmax(pred, axis=1)

In [None]:
labels = test_imgs.class_indices # Will get the indices of the classes in integer

In [None]:
y_vals = []
pred_labels = dict((v, k) for k, v in labels.items()) # mapping the classes with the indices

for i in range(0, len(output)):
    y_vals.append(pred_labels[output[i]])

### Classification report and Confusion Matrix

In [None]:
print(classification_report(test_df.Label, y_vals))
print(confusion_matrix(test_df.Label, y_vals))