Identify the snake breed
https://www.hackerearth.com/challenges/competitive/hackerearth-deep-learning-challenge-snake-breed-detection/machine-learning/identify-the-snake-breed-5-66d9a9f5/

This is a challenge from HackerEarth.com, and one of the participant from HE has uploaded the dataset on Kaggle. Refer below details on the challenge.

# Problem statement
The government has been facing a long-standing issue of wild animals entering residential areas due to various reasons. It's of critical importance that if any such dangerous animal is encountered, the concerned authority should be notified immediately. Reptiles, especially snakes, are among the most dangerous animals and they often enter residential areas.

Recently due to an incident of a youngster getting bitten by a snake, the government decided to install cameras at every corner of the road to detect snakes and other animals.

You have been hired as a Deep Learning engineer to create a sophisticated model that can detect the breed of a snake from its image.

# Import Library

In [None]:
import pandas as pd
import numpy as np

import random, os

import cv2

import matplotlib.pyplot as plt 
import seaborn as sns
import matplotlib.image as mpimg
%matplotlib inline

import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.optimizers import Adam

# Set Directory Path

In [None]:
base_dir = '../input/hackerearth-deep-learning-identify-the-snake-breed/dataset'
train_img_dir = os.path.join(base_dir, 'train/')
train_images = os.listdir(train_img_dir)

test_img_dir = os.path.join(base_dir, 'test/')


In [None]:
print(train_img_dir)
print(len(train_images))

# Target Information from csv file

In [None]:
train_df = pd.read_csv(os.path.join(base_dir, 'train.csv'))
train_df.head()

In the image_id we do not have image extension, so lets first add the .jpg extension, so that we can refer to the image_id directly and refer to its image.

In [None]:
def append_ext(fn):
    return fn+".jpg"

train_df["image_id"]=train_df["image_id"].apply(append_ext)
train_df.head()

In [None]:
num_classes = len(train_df.breed.unique())
num_classes

# Converting Target (breed) into Numerical

In [None]:
train_df.info()

In [None]:
train_df["breed"] = train_df["breed"].astype('category')
train_df.dtypes

In [None]:
train_df["label"] = train_df["breed"].cat.codes
train_df.head()

In [None]:
# Check Categories.
LABEL = train_df["breed"].cat.categories
LABEL

In [None]:
LABEL[15], LABEL[25]

# Class Distribution
Lets visualize the Class distribution.

In [None]:
plt.figure(figsize=(20,8))
ax = sns.countplot(x="breed", data=train_df) 
plt.xticks(rotation = 90)

In [None]:
((train_df.groupby('breed').size()/train_df['breed'].count())*100 ).sort_values(ascending=False)

So we found that 9+% of data belongs to breed "thamnophis-sirtalis" and downup to 1+5 for breed "crotalus-scutulatus".

# Check for Duplicate

In [None]:
uniqueIds = train_df['image_id'].nunique()

if(uniqueIds == len(train_df)):
    print('There are no repeating Image IDs in the dataset')
else:
    print('There are {len(train_df) - uniqueIds} repeating Image IDs')

# Visualize the Images.
Already done in my other Notebooks Refer to 
1. https://www.kaggle.com/dskagglemt/identify-the-snake-breed
2. https://www.kaggle.com/dskagglemt/snake-breed-classification-vgg

# Training the Model

In [None]:
# Main parameters
BATCH_SIZE = 16
STEPS_PER_EPOCH = len(train_df)*0.8 / BATCH_SIZE
VALIDATION_STEPS = len(train_df)*0.2 / BATCH_SIZE
EPOCHS = 20
TARGET_SIZE = 300

In [None]:
train_df.label = train_df.label.astype('str')
train_df.head()

In [None]:
train_df.info()

In [None]:
train_datagen = ImageDataGenerator(
    validation_split = 0.2,
    preprocessing_function = None,
    rotation_range = 20,
    zoom_range = 0.2,
    cval = 0.1,
    horizontal_flip = True,
    vertical_flip = True,
    fill_mode = 'nearest',
    shear_range = 0.15,
    height_shift_range = 0.15,
    width_shift_range = 0.15,
    featurewise_center = True,
    featurewise_std_normalization = True
)

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    directory = train_img_dir,
    subset = "training",
    x_col = "image_id",
    y_col = "label",
    target_size = (TARGET_SIZE, TARGET_SIZE),
    batch_size = BATCH_SIZE,
    class_mode = "sparse"
)

In [None]:
validation_datagen = ImageDataGenerator(validation_split = 0.2)

validation_generator = validation_datagen.flow_from_dataframe(
    train_df,
    directory = train_img_dir,
    subset = "validation",
    x_col = "image_id",
    y_col = "label",
    target_size = (TARGET_SIZE, TARGET_SIZE),
    batch_size = BATCH_SIZE,
    class_mode = "sparse"
)

In [None]:
conv_base = EfficientNetB0(
    include_top = False, 
    weights = 'imagenet',
    input_shape = (TARGET_SIZE, TARGET_SIZE, 3)
)

model = conv_base.output
model = layers.GlobalAveragePooling2D()(model)
model = layers.Dense(5, activation = "softmax")(model)
model = models.Model(conv_base.input, model)

model.compile(
    optimizer = Adam(lr = 0.001),
    loss = "sparse_categorical_crossentropy",
    metrics = ["acc"]
)

model.summary()

In [None]:
early_stop = EarlyStopping(
    monitor = 'val_loss', 
    min_delta = 0.001, 
    patience = 5, 
    mode = 'min', 
    verbose = 1,
    restore_best_weights = True
)

reduce_lr = ReduceLROnPlateau(
    monitor = 'val_loss', 
    factor = 0.3, 
    patience = 2, 
    min_delta = 0.001, 
    mode = 'min', 
    verbose = 1
)

In [None]:
model_save = ModelCheckpoint(
    './SnakeBreed_EfficientNetB0_Model_1.h5', 
    save_best_only = True, 
    save_weights_only = True,
    monitor = 'val_loss', 
    mode = 'min', 
    verbose = 1
)

In [None]:
history = model.fit(
    train_generator,
    steps_per_epoch = STEPS_PER_EPOCH,
    epochs = EPOCHS,
    validation_data = validation_generator,
    validation_steps = VALIDATION_STEPS,
#     callbacks = [early_stop, reduce_lr]
#     callbacks = [model_save, early_stop, reduce_lr]
)

# Visualize the Performance

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
sns.set_style("white")
plt.suptitle('Train history', size = 15)

ax1.plot(epochs, acc, "bo", label = "Training acc")
ax1.plot(epochs, val_acc, "b", label = "Validation acc")
ax1.set_title("Training and validation acc")
ax1.legend()

ax2.plot(epochs, loss, "bo", label = "Training loss", color = 'red')
ax2.plot(epochs, val_loss, "b", label = "Validation loss", color = 'red')
ax2.set_title("Training and validation loss")
ax2.legend()

plt.show()

# Prediction

Image ID : 8b492b973d	

Breed : pantherophis-vulpinus
   

In [None]:
# image_path = os.path.join(train_dir,'8b492b973d'+'.jpg')
# image_path

In [None]:
# img = plt.imread(image_path)
# plt.imshow(img)
# plt.title('Original Bree --> pantherophis-vulpinus')
# plt.show()

In [None]:
# img_for_prediction = load_img(image_path, target_size = input_dim)
# img_for_prediction = img_to_array(img_for_prediction)
# img_for_prediction = img_for_prediction.reshape((1, *img_for_prediction.shape))
# img_for_prediction = preprocess_input(img_for_prediction)

In [None]:
# predictions = model.predict(img_for_prediction)
# pred = np.argsort(predictions)[0][-5:]
# pred 
# # the Order is from 0 to 5 and 5th Position breed is highest.

In [None]:
# le.inverse_transform(pred)