Identify the snake breed
https://www.hackerearth.com/challenges/competitive/hackerearth-deep-learning-challenge-snake-breed-detection/machine-learning/identify-the-snake-breed-5-66d9a9f5/

This is a challenge from HackerEarth.com, and one of the participant from HE has uploaded the dataset on Kaggle. Refer below details on the challenge.

# Problem statement
The government has been facing a long-standing issue of wild animals entering residential areas due to various reasons. It's of critical importance that if any such dangerous animal is encountered, the concerned authority should be notified immediately. Reptiles, especially snakes, are among the most dangerous animals and they often enter residential areas.

Recently due to an incident of a youngster getting bitten by a snake, the government decided to install cameras at every corner of the road to detect snakes and other animals.

You have been hired as a Deep Learning engineer to create a sophisticated model that can detect the breed of a snake from its image.

# Import Library

In [None]:
import pandas as pd
import numpy as np

import random, os

import matplotlib.pyplot as plt 
%matplotlib inline

from keras.applications.resnet50 import preprocess_input
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.applications import imagenet_utils

from keras.callbacks import EarlyStopping

# Train Images

In [None]:
base_dir = '../input/hackerearth-deep-learning-identify-the-snake-breed/dataset'
train_dir = os.path.join(base_dir, 'train')
files = os.listdir(train_dir)

# Target Information from csv file

In [None]:
train_df = pd.read_csv(os.path.join(base_dir, 'train.csv'))
train_df.head()

In [None]:
# train_file_df = pd.DataFrame({'image_id':list(map(lambda x:x.replace('.jpg', ''), files))})
# train_file_df.head()

# Mapping File with Breed.

In [None]:
# label_info = pd.merge(left = train_file_df, right = train_df)
# label_info.head()

# Convert Target to One-Hot Encoding.

In [None]:
num_classes = len(train_df.breed.unique())
num_classes

In [None]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils

In [None]:
le = LabelEncoder()
breed = le.fit_transform(train_df.breed)
y = np_utils.to_categorical(breed, num_classes = num_classes)

In [None]:
y.shape

# Convert Images to numpy array

In [None]:
input_dim = (224, 224)

X = np.zeros((y.shape[0], *input_dim, 3))

In [None]:
for i, img in enumerate(files):
    image = load_img(os.path.join(train_dir, img), target_size = input_dim)
    image = img_to_array(image)
    image = image.reshape((1, *image.shape))
    image = preprocess_input(image)
    X[i] = image

In [None]:
X.shape

# Create Callbacks

In [None]:
earlystop = EarlyStopping(
    monitor = 'val_loss',
    min_delta = 0,
    patience = 2,
    verbose = 0,
    mode = 'auto'
)

In [None]:
from keras.applications.vgg19 import VGG19
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D, Flatten, Dropout

In [None]:
vgg_model = VGG19(
    weights = 'imagenet',
    include_top = False
)

In [None]:
vgg_x = vgg_model.output
vgg_x = GlobalAveragePooling2D()(vgg_x)
vgg_x = Dropout(0.2)(vgg_x)
out = Dense(num_classes, activation = 'softmax')(vgg_x)

In [None]:
model = Model(inputs = vgg_model.input, outputs = out)


In [None]:
for layer in vgg_model.layers:
    layer.trainable = False

In [None]:
from keras.optimizers import Adam
opt = Adam()

In [None]:
model.compile(
    optimizer=opt,
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

model.summary()

In [None]:
hist = model.fit(
    X,
    y,
    batch_size = 256,
    epochs = 20,
    validation_split = 0.2, 
    verbose = 2,
    callbacks = [earlystop]
)
# model.save('snake_vgg_model1.h5')

In [None]:
hist.history.keys()

In [None]:
val_acc = hist.history.get('val_accuracy')
acc = hist.history.get('accuracy')

In [None]:
overfit_info = pd.DataFrame({'acc':acc, 'val_acc':val_acc})

In [None]:
overfit_info.plot.line()

# Predict 
Image ID : 8b492b973d	

Breed : pantherophis-vulpinus
   

In [None]:
image_path = os.path.join(train_dir,'8b492b973d'+'.jpg')
image_path

In [None]:
img = plt.imread(image_path)
plt.imshow(img)
plt.title('Original Bree --> pantherophis-vulpinus')
plt.show()

In [None]:
img_for_prediction = load_img(image_path, target_size = input_dim)
img_for_prediction = img_to_array(img_for_prediction)
img_for_prediction = img_for_prediction.reshape((1, *img_for_prediction.shape))
img_for_prediction = preprocess_input(img_for_prediction)

In [None]:
predictions = model.predict(img_for_prediction)
pred = np.argsort(predictions)[0][-5:]
pred 
# the Order is from 0 to 5 and 5th Position breed is highest.

In [None]:
le.inverse_transform(pred)

Model has predicted "thamnophis-sirtalis" with highest probability.

# More layers to train

In [None]:
vgg_x = vgg_model.output
vgg_x = GlobalAveragePooling2D()(vgg_x)
vgg_x = Dropout(0.3)(vgg_x)  # Change 1 : Increase the drop out
out = Dense(num_classes, activation = 'softmax')(vgg_x)

model2 = Model(inputs = vgg_model.input, outputs = out)

for layer in vgg_model.layers[:-2]:  # Change 2 : Skip training for last 2 layers
    layer.trainable = False

for layer in vgg_model.layers[-2:]:  # Change 3 : training last 2 layers
    layer.trainable = True

model2.compile(
    optimizer=opt,
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

model2.summary()

hist2 = model2.fit(
    X,
    y,
    batch_size = 256,
    epochs = 20,
    validation_split = 0.2, 
    verbose = 2,
    callbacks = [earlystop]
)

# model2.save('snake_vgg_model2.h5')

In [None]:
predictions = model2.predict(img_for_prediction)
pred = np.argsort(predictions)[0][-5:]

le.inverse_transform(pred)

# With Image Augmentation

In [None]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model

In [None]:
datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

datagen.fit(X)

In [None]:
# using the reference to the model created at very first.
hist_aug = model.fit_generator(
    datagen.flow(X, y, batch_size = 256),
    steps_per_epocs = len(X)/32,
    epochs = 20,
    verbose = 2
)

In [None]:
hist_aug.history.keys()

In [None]:
hist_aug.save('model_img_augement.h5')

In [None]:
model3 = load_model('model_img_augement.h5')

In [None]:
predictions = model3.predict(img_for_prediction)
pred = np.argsort(predictions)[0][-5:]

le.inverse_transform(pred)

# Data Augement with more layers.

In [None]:
# using the reference to the model created at very first.
hist2_aug = model2.fit_generator(
    datagen.flow(X, y, batch_size = 256),
    steps_per_epocs = len(X)/32,
    epochs = 20,
    verbose = 2
)

In [None]:
hist2_aug.save('model2_img_augement.h5')

model4 = load_model('model2_img_augement.h5')

predictions = model3.predict(img_for_prediction)
pred = np.argsort(predictions)[0][-5:]

le.inverse_transform(pred)