In [None]:
import numpy as np 
import pandas as pd 
import os
import gc
import sys
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mplimg
from matplotlib.pyplot import imshow
from tqdm.autonotebook import tqdm
from matplotlib.colors import Normalize
import matplotlib.cm as cm

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

import keras.backend as K
from keras.models import Sequential
from keras import layers
from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input
from keras.layers import Input, Dense, Activation, BatchNormalization, Flatten, Conv2D
from keras.layers import AveragePooling2D, MaxPooling2D, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.models import Model
from keras.models import load_model
import tensorflow as tf

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

BY: 
* 19bce128 : mihir prajapati
* 19bce139 : mitul nakrani
* 19bce163 : dhruva patel
* 19bce169 : esha patel
* 19bce292 : mithil vasava

In [None]:
train_df = pd.read_csv("../input/happy-whale-and-dolphin/train.csv")
train_df.head()
train_df_small = train_df[:50]
#print(train_df_small.image)

In [None]:
print("Total species before finding duplicates :",len(train_df.species.unique()))
train_df.species = train_df.species.str.replace('kiler_whale','killer_whale')
train_df.species = train_df.species.str.replace('bottlenose_dolpin','bottlenose_dolphin')
train_df['species'][(train_df['species'] =="pilot_whale") | (train_df['species'] =="globis" )]='short_finned_pilot_whale'
print("Total species after :",len(train_df.species.unique()))

In [None]:
animal_cnt = train_df.species.value_counts()
print("Occurences of different species:")
print(animal_cnt)
print(f"Total number of species: {len(animal_cnt)}")

In [None]:
specs = list(animal_cnt.keys())
values = list(animal_cnt.values)

cmap = cm.get_cmap('jet')
norm = Normalize(vmin=0,vmax=len(specs))
cols = np.arange(0,len(specs))

fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(1,1,1)
ax.set_axisbelow(True)
plt.grid(visible=True)
plt.bar(specs, values, color=cmap(norm(cols)))
plt.xticks(rotation='vertical')
plt.title('Occurences Of Different Species In The Dataset', fontsize=16, fontname="Times New Roman Bold")
plt.show()

In [None]:
print(train_df.shape)
print(train_df_small.shape)

In [None]:
train_jpg_path = "../input/happy-whale-and-dolphin/train_images"
test_jpg_peth = "../input/happy-whale-and-dolphin/test_images"
train_images_list = os.listdir('../input/happy-whale-and-dolphin/train_images')
#train_images_list

In [None]:
def Loading_Images(data, m, dataset):
    print("Loading images")
    X_train = np.zeros((m, 32, 32, 3))
    count = 0
    for fig in tqdm(data['image']):
        img = image.load_img("../input/happy-whale-and-dolphin/"+dataset+"/"+fig, target_size=(32, 32, 3))
        x = image.img_to_array(img)
        x = preprocess_input(x)
        X_train[count] = x
        count += 1
    return X_train

In [None]:
def prepare_labels(y):
    values = np.array(y)
    label_encoder = LabelEncoder()
    integer_encoded = label_encoder.fit_transform(values)
    onehot_encoder = OneHotEncoder(sparse=False)
    integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
    onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
    y = onehot_encoded
    return y, label_encoder

In [None]:
X = Loading_Images(train_df, train_df.shape[0], "train_images")
X /= 255

In [None]:
y, label_encoder = prepare_labels(train_df['individual_id'])

In [None]:
print(X.shape)
print(y.shape)
gc.collect()

In [None]:
y.shape

In [None]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf

base_model = EfficientNetB0(input_shape=(32,32,3), weights=None, include_top=False)

layer = base_model.output
#layer = GlobalAveragePooling2D()(layer)#extra
#layer = Dropout(0.5)(layer)#extra
layer = Dense(1024, activation='relu')(layer)
#layer = Dense(512, activation='relu')(layer)#extra
layer = Flatten()(layer)
predictions = Dense(y.shape[1], activation='softmax')(layer)
model = Model(inputs=base_model.input, outputs=predictions)

model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])
#model.summary()

In [None]:
train_datagen = ImageDataGenerator(horizontal_flip=True,
                                   vertical_flip=True,
                                   validation_split=0.20,
                                   )

#train_datagen.fit(X)

In [None]:
#history = model.fit(train_datagen.flow(X,y,batch_size=128,subset='training'),validation_data=train_datagen.flow(X,y,batch_size=128,subset='validation'),epochs=180)
history = model.fit(X, y, epochs = 200, batch_size=128, verbose=1)

In [None]:
model.save('./effb0_0.h5')

In [None]:
def cnn_model():
    model = Sequential()
    model.add(Conv2D(32, (6, 6), strides = (1, 1), input_shape = (32, 32, 3)))
    model.add(BatchNormalization(axis = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling2D((2, 2)))
      
    model.add(Conv2D(64, (3, 3), strides = (1,1)))
    model.add(Activation('relu'))
    model.add(AveragePooling2D((3, 3)))

    model.add(Flatten())
    model.add(Dense(512, activation="relu"))
    model.add(Dropout(0.85))

    model.add(Dense(y.shape[1], activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])
    
    return(model)

In [None]:
Cnn_model = cnn_model()

In [None]:
del X
del y
gc.collect()

In [None]:
plt.figure(figsize=(15,5))
plt.plot(history.history['accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.show()

In [None]:
plt.figure(figsize=(15,5))
plt.plot(history.history['loss'])
plt.title('Model loss')
plt.ylabel('loss')
plt.xlabel('Epoch')
plt.show()

In [None]:
test = os.listdir("../input/happy-whale-and-dolphin/test_images")
print(len(test))

In [None]:
col = ['image']
test_df = pd.DataFrame(test, columns=col)
test_df['predictions'] = ''

In [None]:
#model = load_model(r'../input/happywhaleanddolphin1/effb0_0.h5')

In [None]:
batch_size=5000
batch_start = 0
batch_end = batch_size
L = len(test_df)

while batch_start < L:
    limit = min(batch_end, L)
    test_df_batch = test_df.iloc[batch_start:limit]
    print(type(test_df_batch))
    X = Loading_Images(test_df_batch, test_df_batch.shape[0], "test_images")
    X /= 255
    predictions = model.predict(np.array(X), verbose=1)
    for i, pred in enumerate(predictions):
        p=pred.argsort()[-5:][::-1]
        idx=-1
        s=''
        s1=''
        s2=''
        for x in p:
            idx=idx+1
            if pred[x]>0.5:
                s1 = s1 + ' ' +  label_encoder.inverse_transform(p)[idx]
            else:
                s2 = s2 + ' ' + label_encoder.inverse_transform(p)[idx]
        s= s1 + ' new_individual' + s2
        s = s.strip(' ')
        test_df.loc[ batch_start + i, 'predictions'] = s
    batch_start += batch_size   
    batch_end += batch_size
    del X
    del test_df_batch
    del predictions
    gc.collect()

In [None]:
test_df.to_csv('submission.csv',index=False)
test_df.head()

In [None]:
test_df.to_csv('submission_whale_and_dolphin.csv', index = False)