# CNN with Keras Stater


### Please if this kernel is useful, <font color='red'>please upvote !!</font>

This kernel is based on: [CNN with Keras for Humpback Whale ID](https://www.kaggle.com/anezka/cnn-with-keras-for-humpback-whale-id)



### Importing Libraries

In [None]:
import numpy as np 
import pandas as pd 
import os
import gc
import sys
import matplotlib.pyplot as plt
import matplotlib.image as mplimg
from matplotlib.pyplot import imshow
from tqdm.autonotebook import tqdm

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

import keras.backend as K
from keras.models import Sequential
from keras import layers
from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input
from keras.layers import Input, Dense, Activation, BatchNormalization, Flatten, Conv2D
from keras.layers import AveragePooling2D, MaxPooling2D, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.models import Model



import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [None]:
train_df = pd.read_csv("../input/happy-whale-and-dolphin/train.csv")
#train_df=train_df.drop_duplicates(subset=['individual_id'],keep='last')
train_df.head()

In [None]:
train_df.shape

## Functions

In [None]:
def Loading_Images(data, m, dataset):
    print("Loading images")
    X_train = np.zeros((m, 32, 32, 3))
    count = 0
    for fig in tqdm(data['image']):
        img = image.load_img("../input/happy-whale-and-dolphin/"+dataset+"/"+fig, target_size=(32, 32, 3))
        x = image.img_to_array(img)
        x = preprocess_input(x)
        X_train[count] = x
        count += 1
    return X_train

def prepare_labels(y):
    values = np.array(y)
    label_encoder = LabelEncoder()
    integer_encoded = label_encoder.fit_transform(values)
    onehot_encoder = OneHotEncoder(sparse=False)
    integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
    onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
    y = onehot_encoded
    return y, label_encoder

In [None]:
X = Loading_Images(train_df, train_df.shape[0], "train_images")
X /= 255

In [None]:
# X = np.load('../input/image-arraynpy/image_array.npy')
# X /= 255

In [None]:
X.shape

In [None]:
y, label_encoder = prepare_labels(train_df['individual_id'])

In [None]:
y.shape

In [None]:
gc.collect()

In [None]:
model = Sequential()

model.add(Conv2D(32, (6, 6), strides = (1, 1), input_shape = (32, 32, 3)))
model.add(BatchNormalization(axis = 3))
model.add(Activation('relu'))

model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), strides = (1,1)))
model.add(Activation('relu'))
model.add(AveragePooling2D((3, 3)))

model.add(Flatten())
model.add(Dense(512, activation="relu"))
model.add(Dropout(0.85))

model.add(Dense(y.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['Accuracy', 'Precision', 'Recall'])
model.summary()

In [None]:
history = model.fit(X, y, epochs=20, batch_size=200, validation_split=0.2, verbose=1)
model.save('./last.h5')

In [None]:
del X
del y
gc.collect()

## Evaluation

In [None]:
# plt.figure(figsize=(15,5))
# plt.plot(history.history['accuracy'])
# plt.title('Model accuracy')
# plt.ylabel('Accuracy')
# plt.xlabel('Epoch')
# plt.show()

In [None]:
# plt.figure(figsize=(15,5))
# plt.plot(history.history['loss'])
# plt.title('Model loss')
# plt.ylabel('loss')
# plt.xlabel('Epoch')
# plt.show()

In [None]:
import matplotlib.pyplot as plt
def show_train_history(train_history):
  plt.plot(train_history.history['Accuracy'])
  plt.plot(train_history.history['val_Accuracy'])
  plt.xticks([i for i in range(0, len(train_history.history['Accuracy']))])
  plt.title('Train History')
  plt.ylabel('Accuracy')
  plt.xlabel('epoch')
  plt.legend(['train', 'validation'], loc = 'upper left')
  plt.show()

show_train_history(history)

## inference

In [None]:
test = os.listdir("../input/happy-whale-and-dolphin/test_images")
print(len(test))

In [None]:
col = ['image']
test_df = pd.DataFrame(test, columns=col)
test_df['predictions'] = ''
#test_df=test_df.head(n=250)

In [None]:
batch_size=5000
batch_start = 0
batch_end = batch_size
L = len(test_df)

while batch_start < L:
    limit = min(batch_end, L)
    test_df_batch = test_df.iloc[batch_start:limit]
    print(type(test_df_batch))
    X = Loading_Images(test_df_batch, test_df_batch.shape[0], "test_images")
    X /= 255
#     X = np.load('../input/test-image-array/test_image_array.npy')
#     X /= 255
    predictions = model.predict(np.array(X), verbose=1)
    for i, pred in enumerate(predictions):
        p=pred.argsort()[-5:][::-1]
        idx=-1
        s=''
        s1=''
        s2=''
        for x in p:
            idx=idx+1
            if pred[x]>0.7:
                s1 = s1 + ' ' +  label_encoder.inverse_transform(p)[idx]
            else:
                s2 = s2 + ' ' + label_encoder.inverse_transform(p)[idx]
        s= s1 + ' new_individual' + s2
        s = s.strip(' ')
        test_df.loc[ batch_start + i, 'predictions'] = s
    batch_start += batch_size   
    batch_end += batch_size
    del X
    del test_df_batch
    del predictions
    gc.collect()

In [None]:
test_df.to_csv('submission.csv',index=False)
test_df.head()

In [None]:
test_df