In [None]:
import pandas as pd
import numpy as np
import os.path
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from keras.applications import ResNet50
from keras.optimizers import Adam
import cv2
from tqdm import tqdm
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K
from keras import applications
from keras.models import Model
from keras import optimizers
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.normalization import BatchNormalization

In [None]:
train = pd.read_csv('/kaggle/input/hackerearths-snakes-in-the-hood/dataset/train.csv')
test = pd.read_csv('/kaggle/input/hackerearths-snakes-in-the-hood/dataset/test.csv')
train.head()

In [None]:
print("training data size is : ",train.shape[0])
print("testing data size is  : ",test.shape[0])

In [None]:
train["breed"].unique()

In [None]:
label_counts = train.breed.value_counts()
plt.figure(figsize = (18,10))
sns.barplot(label_counts.index, label_counts.values, alpha = 0.9)
plt.xticks(rotation = 'vertical')
plt.xlabel('Image Class', fontsize =12)
plt.ylabel('Counts', fontsize = 12)
plt.show()

In [None]:
TRAIN_PATH = '/kaggle/input/hackerearths-snakes-in-the-hood/dataset/train/'
TEST_PATH = '/kaggle/input/hackerearths-snakes-in-the-hood/dataset/test/'

In [None]:
IMG_SIZE = 224

def read_img(img_path):
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    return img

In [None]:
train_img = []
for img_name in tqdm(train['image_id'].values):
    train_img.append(img_name + '.jpg')

In [None]:
train['image'] = train_img
train.head()

In [None]:
train_img = []
for img_name in tqdm(train['image'].values):
    train_img.append(read_img(TRAIN_PATH + img_name ))

In [None]:
x_train = np.array(train_img, np.float32) / 255

In [None]:
print(x_train.shape)

In [None]:
label_list = train['breed'].tolist()
label_numeric = {k: v+1 for v, k in enumerate(set(label_list))}
y_train = [label_numeric[k] for k in label_list]
y_train = np.array(y_train)

In [None]:
y_train = to_categorical(y_train)

In [None]:
print(y_train.shape)

In [None]:
base_model =   applications.ResNet50(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))

In [None]:
## set model architechture 
add_model = Sequential()
add_model.add(Flatten(input_shape=base_model.output_shape[1:]))
add_model.add(Dropout(0.3))
add_model.add(Dense(256, activation='relu'))
add_model.add(Dropout(0.3))
add_model.add(Dense(128, activation='relu'))
add_model.add(Dropout(0.3))
add_model.add(Dense(y_train.shape[1], activation='softmax'))

model = Model(inputs=base_model.input, outputs=add_model(base_model.output))

model.compile(loss='categorical_crossentropy', optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])

model.summary()

In [None]:
batch_size = 32      
epochs = 50                

train_datagen = ImageDataGenerator(
        rotation_range=30, 
        width_shift_range=0.1, 
        height_shift_range=0.1,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

train_datagen.fit(x_train)

In [None]:
history = model.fit_generator( train_datagen.flow(x_train, y_train, batch_size=batch_size),
                               steps_per_epoch=x_train.shape[0] // batch_size,
                               epochs=epochs)

In [None]:
from matplotlib import pyplot as plt
plt.plot(history.history['accuracy'])

In [None]:
plt.plot(history.history['loss'])

In [None]:
test_img = []
for img_name in tqdm(test['image_id'].values):
    test_img.append(img_name + '.jpg')

In [None]:
test['image'] = test_img

test_img = []
for img_name in tqdm(test['image'].values): 
    test_img.append(read_img(TEST_PATH + img_name))
    

In [None]:
x_test = np.array(test_img, np.float32) / 255

In [None]:
predictions = model.predict(x_test)

In [None]:
predictions = np.argmax(predictions, axis=1)
rev_y = {v:k for k,v in label_numeric.items()}
pred_labels = [rev_y[k] for k in predictions]

In [None]:
output = pd.DataFrame({'image_id': test.image_id, 'breed': pred_labels})
filename = 'solution.csv'
output.to_csv(filename, index=False) 
output.head()