# **Load Packages**

In [None]:
import pandas as pd
import numpy as np
import glob
import os
import cv2
from IPython.display import Image
import matplotlib.pyplot as plt
import seaborn as sb
from tensorflow.keras.layers import *
from tensorflow.keras.models import * 
from tensorflow.keras.preprocessing import image
from sklearn.model_selection import train_test_split
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
from keras.applications import ResNet50
from keras import applications
from PIL import Image
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils

# EDA

In [None]:
#No. of folders
base_path = '../input/hackerearths-snakes-in-the-hood/dataset/'
folders = os.listdir(base_path)
print(folders)

In [None]:
#Load the dataset
df=pd.read_csv("../input/hackerearths-snakes-in-the-hood/dataset/train.csv")
df_t=pd.read_csv("../input/hackerearths-snakes-in-the-hood/dataset/test.csv")
df.head()

In [None]:
df.shape

In [None]:
df.nunique()

In [None]:
#Get the total numbers of snake present in each breed
df['breed'].value_counts()

In [None]:
#Get new columns
btype = df["breed"].str.split("-",expand = True)
df["Genus"]=btype[0]
df["Type"]=btype[1]

In [None]:
df.head()

In [None]:
df['Genus'].value_counts()

In [None]:
df['Type'].value_counts()

In [None]:
#Add path and .jpg to the cloumn
df["image_id"]="../input/hackerearths-snakes-in-the-hood/dataset/train/" + df["image_id"] + ".jpg"
df.head()

In [None]:
for i in range(len(df_t['image_id'])):
    df_t['image_id'][i]=df_t['image_id'][i]+'.jpg'

# Data Visualization

In [None]:
#plot first 6 images
fig = plt.figure(figsize=(20, 7))

rows = 2
columns = 3

Image1 = cv2.imread((df['image_id'][0]))
Image2 = cv2.imread(df['image_id'][1])
Image3 = cv2.imread(df['image_id'][2])
Image4 = cv2.imread(df['image_id'][3])
Image5 = cv2.imread(df['image_id'][4])
Image6 = cv2.imread(df['image_id'][5])

fig.add_subplot(rows, columns, 1)

plt.imshow(Image1)
plt.axis('off')
plt.title(df['breed'][0])
  
# Adds a subplot at the 2nd position
fig.add_subplot(rows, columns, 2)
  
# showing image
plt.imshow(Image2)
plt.axis('off')
plt.title(df['breed'][1])
  
# Adds a subplot at the 3rd position
fig.add_subplot(rows, columns, 3)
  
# showing image
plt.imshow(Image3)
plt.axis('off')
plt.title(df['breed'][2])
  
# Adds a subplot at the 4th position
fig.add_subplot(rows, columns, 4)
  
# showing image
plt.imshow(Image4)
plt.axis('off')
plt.title(df['breed'][3])

# Adds a subplot at the 4th position
fig.add_subplot(rows, columns, 5)
  
# showing image
plt.imshow(Image5)
plt.axis('off')
plt.title(df['breed'][4])

# Adds a subplot at the 4th position
fig.add_subplot(rows, columns, 6)
  
# showing image
plt.imshow(Image6)
plt.axis('off')
plt.title(df['breed'][5])

In [None]:
#plot the number of snakes in each breed
plt.figure(figsize = (20,10))
plt.axes(facecolor = "#000000")
plt.xticks(rotation = 'vertical')
sb.countplot(x="breed", data=df, palette="gist_rainbow_r");
plt.show()

In [None]:
#relation betwwen Genus and Type
plt.figure(figsize = (20,10))
plt.axes(facecolor = "#000000")
plt.xticks(rotation = 'vertical')
sb.stripplot(x="Genus", y = "Type",data=df)
plt.show()

In [None]:
#Make a word cloud
comment_words = ""
for i in df.Type: 
    i = str(i) 
    separate = i.split() 
    for j in range(len(separate)): 
        separate[j] = separate[j].lower() 
      
    comment_words += " ".join(separate)+" "

In [None]:

final_wordcloud = WordCloud(width = 800, height = 800, 
                background_color ='black', min_font_size = 10).generate(comment_words)

plt.figure( figsize=(20,20))
plt.tight_layout(pad=0)
plt.imshow(final_wordcloud, interpolation='spline36')
plt.axis("off")
plt.show()

# Model Work

In [None]:

llist = df['breed'].tolist()
label = {k: v for v, k in enumerate(set(llist))}
y = [label[k] for k in llist]
y = np.array(y)

In [None]:
from keras.utils import to_categorical
y = to_categorical(y)
y

In [None]:
y.shape

In [None]:
#To read each image
def read_img(img_path):
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img = cv2.resize(img, (224, 224))
    return img

In [None]:
from tqdm import tqdm
df_img = []
for img_name in tqdm(df['image_id'].values):
    df_img.append(read_img(img_name ))

In [None]:
X = np.array(df_img, np.float32) / 255

In [None]:
print(X.shape)

In [None]:
#Using Resnet50 model
base_model = applications.ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

In [None]:
# set model architechture 
add_model = Sequential()
add_model.add(Flatten())
add_model.add(Dense(256, activation='relu'))
add_model.add(Dropout(0.2))
add_model.add(Dense(128, activation='relu'))
add_model.add(Dropout(0.3))
add_model.add(Dense(64, activation='relu'))
add_model.add(Dropout(0.5))
add_model.add(Dense(35, activation='tanh'))

model = Model(inputs=base_model.input, outputs=add_model(base_model.output))

model.compile(loss='categorical_crossentropy', optimizer='adam',
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
#Data Augumentation
train_datagen = image.ImageDataGenerator(rescale = 1./255,rotation_range=120,width_shift_range=(-50,50), height_shift_range=(-50,50),shear_range = 0.4,zoom_range = (0.1,0.6), horizontal_flip = True,vertical_flip=True,channel_shift_range=0.3,fill_mode='nearest')

train_datagen.fit(X)

In [None]:
history = model.fit_generator( train_datagen.flow(X, y, batch_size=32),
                               epochs=20)

In [None]:
test_img = []
for img_name in tqdm(df_t['image_id'].values): 
    test_img.append(read_img("../input/hackerearths-snakes-in-the-hood/dataset/test/" + img_name))

In [None]:
Test_img = np.array(test_img, np.float32) / 255

In [None]:
#Prediction
preds = model.predict(Test_img)

In [None]:
preds = np.argmax(preds, axis=1)
predy = {v:k for k,v in label.items()}
predlabels = [predy[k] for k in preds]