In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import gridspec
import seaborn as sns

import os
import os, warnings
import PIL
import PIL.Image

import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory
import tensorflow_datasets as tfds
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from sklearn import preprocessing

import sys

if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

## HappyWhale - Whale and Dolphin Classification

In [None]:
df_train=pd.read_csv('../input/happy-whale-and-dolphin/train.csv')
df_train.head()

In [None]:
df_train.shape

In [None]:
df_train.info()

In [None]:
df_train['individual_id'].value_counts()

## Distribution Species

In [None]:
df_train.species.unique()

In [None]:
classe_names=['melon_headed_whale', 'humpback_whale', 'false_killer_whale',
       'bottlenose_dolphin', 'beluga', 'minke_whale', 'fin_whale',
       'blue_whale', 'gray_whale', 'southern_right_whale',
       'common_dolphin', 'kiler_whale', 'pilot_whale', 'dusky_dolphin',
       'killer_whale', 'long_finned_pilot_whale', 'sei_whale',
       'spinner_dolphin', 'bottlenose_dolpin', 'cuviers_beaked_whale',
       'spotted_dolphin', 'globis', 'brydes_whale', 'commersons_dolphin',
       'white_sided_dolphin', 'short_finned_pilot_whale',
       'rough_toothed_dolphin', 'pantropic_spotted_dolphin',
       'pygmy_killer_whale', 'frasiers_dolphin']

In [None]:
len(classe_names)

In [None]:
species_view=pd.DataFrame(df_train['species'].value_counts())
species_view

plt.figure(figsize=(14,7))
label=[species_view['species']]
sns.set_theme(style="whitegrid")
ax=sns.histplot(df_train, x="species",color='#0B606F', kde = True)
for rect in ax.patches:
    height = rect.get_height()
    ax.annotate(f'{int(height)}', xy=(rect.get_x()+rect.get_width()/2, height), 
                xytext=(0, 5), textcoords='offset points', ha='center', va='bottom') 
plt.xticks(rotation=90)
ax.set_title('Species count', x=0.54, y=1.1, fontsize=30)
plt.show()

## Preprocessing VGG16

In [None]:
datagen=ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input,rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    validation_split=0.25)


In [None]:
train_generator=datagen.flow_from_dataframe(
    df_train,
    directory='../input/happy-whale-and-dolphin/train_images',
    x_col='image',
    y_col='species',
    subset="training",
    target_size=(224,224),
    batch_size=32,
    rescale=1.0/255,
    seed=1042,
    shuffle=True,
    classes=classe_names,
    class_mode="categorical",)

In [None]:
valid_generator=datagen.flow_from_dataframe(
    df_train,
    directory='../input/happy-whale-and-dolphin/train_images',
    x_col='image',
    y_col='species',
    subset="validation",
    target_size=(224,224),
    batch_size=32,
    rescale=1.0/255,
    seed=1042,
    shuffle=True,
    classes=classe_names,
    class_mode="categorical",)

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=4, figsize=(15,15))

for i in range(4):
    image, label = valid_generator.next()
    ax[i].imshow(image[0])
    ax[i].axis('off')

In [None]:
df_submission=pd.read_csv('../input/happy-whale-and-dolphin/sample_submission.csv')
df_submission.head()

In [None]:
df_submission.shape

In [None]:
test_datagen=ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input)
test_generator=test_datagen.flow_from_dataframe(
    df_submission,
    directory='../input/happy-whale-and-dolphin/test_images',
    x_col='image',
    y_col=None,
    target_size=(224,224),
    batch_size=32,
    rescale=1.0/255,
    seed=2020,
    shuffle=False,
    class_mode=None,)

## The VGG16 preprocessing can be use to make a better approche for knucles, dorsal humps scars and pigmentation
## useful to identification of species

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=4, figsize=(15,15))

for i in range(4):
    image = next(test_generator)[0].astype('uint8')
    image = np.squeeze(image)
    ax[i].imshow(image)
    ax[i].axis('off')

In [None]:
from tensorflow import keras
from tensorflow.keras import layers
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras import regularizers
from tensorflow.keras import optimizers
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout
from tensorflow.keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.optimizers import SGD
from keras.regularizers import l2
import tensorflow_hub as hub
import tensorflow_addons as tfa
from tensorflow.keras.models import Model

In [None]:
##################################################################

In [None]:
from tensorflow.keras.applications.vgg16 import VGG16

In [None]:
IMAGE_SIZE = [224, 224]
vgg = VGG16(input_shape=IMAGE_SIZE + [3], weights='imagenet', include_top=False) 

In [None]:
model=Sequential()
for layer in vgg.layers[:-1]:
    model.add(layer)

In [None]:
def count_params(model):
    non_trainable_params=np.sum([np.prod(v.get_shape().as_list()) for v in model.non_trainable_weights])
    trainable_params=np.sum([np.prod(v.get_shape().as_list()) for v in model.trainable_weights])
    return {'non_trainable_params': non_trainable_params, 'trainable_params': trainable_params}

In [None]:
for layer in model.layers:
    layer.trainable=False

In [None]:
x = Flatten()(vgg.output)
prediction=Dense(units=30, activation='softmax')(x)

In [None]:
model = Model(inputs=vgg.input, outputs=prediction)
model.summary()

In [None]:
model.compile(optimizer=Adam(learning_rate=0.01), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
batch_size=64

In [None]:
model.fit(x=train_generator, validation_data=valid_generator, steps_per_epoch=50,validation_steps=100,epochs=2,)

In [None]:
!pip install -U tensorflow-addons

In [None]:
####################################################################

In [None]:
model = Sequential()
model.add(Conv2D(64, (3, 3), activation='relu', padding='same', name='conv_1', input_shape=(224, 224,3)))
model.add(MaxPooling2D((2, 2), strides=(1,1),name='maxpool_1'))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same',name='conv_2'))
model.add(MaxPooling2D((2, 2), strides=(2,2),name='maxpool_2'))
model.add(Conv2D(128, (3, 3), activation='relu', padding='same',name='conv_3'))
model.add(MaxPooling2D((2, 2),strides=(1,1),name='maxpool_3'))
model.add(Conv2D(128, (3, 3), activation='relu', padding='same',name='conv_4'))
model.add(MaxPooling2D((2, 2),strides=(2,2), name='maxpool_4'))


model.add(Conv2D(256, (3, 3), activation='relu', padding='same',name='conv_5'))
model.add(Conv2D(256, (2, 2), activation='relu', padding='same',name='conv_6'))
model.add(Flatten())
model.add(Dropout(0.5))


model.add(Dense(512, activation='relu', name='dense_1'))
model.add(Dense(256, activation='relu', name='dense_2'))
model.add(BatchNormalization())
model.add(Dense(30, activation='softmax', name='output'))

#optimizer = tf.keras.optimizers.Adam(learning_rate=0.01,
#    beta_1=0.9,
#    beta_2=0.999,
#    epsilon=1e-07,
#    amsgrad=False,
#    name="Adam",)
#opt = tfa.optimizers.Lookahead(optimizer)
#model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])



In [None]:
model.compile(
  optimizer=tf.keras.optimizers.Adam(learning_rate=0.005), 
  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1),
  metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
for layer in model.layers:
    layer.trainable=False

In [None]:
BATCH_SIZE=32

In [None]:
train_size=len(train_generator)
valid_size=len(valid_generator)

In [None]:
steps_per_epoch = train_size // BATCH_SIZE
validation_steps = valid_size // BATCH_SIZE
hist = model.fit(
    train_generator,
    epochs=5, steps_per_epoch=steps_per_epoch,
    validation_data=valid_generator,
    validation_steps=validation_steps).history

In [None]:
plt.figure()
plt.ylabel("Loss (training and validation)")
plt.xlabel("Training Steps")
plt.ylim([0,2])
plt.plot(hist["loss"])
plt.plot(hist["val_loss"])

plt.figure()
plt.ylabel("Accuracy (training and validation)")
plt.xlabel("Training Steps")
plt.ylim([0,1])
plt.plot(hist["accuracy"])
plt.plot(hist["val_accuracy"])

In [None]:
rlrp = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_delta=1E-7,verbose=1)


In [None]:
history = model.fit_generator(train_generator, steps_per_epoch=4000//batch_size, epochs=4,
                              validation_data=valid_generator, validation_steps=800//batch_size,verbose=1, validation_freq=1,callbacks=[rlrp]) 

In [None]:
history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['accuracy', 'val_accuracy']].plot();

In [None]:
#########################################################################################################

In [None]:
my_model = Sequential()
my_model.add(Conv2D(64, kernel_size=4, strides=1, activation='relu',input_shape=(224, 224, 3)))
my_model.add(Conv2D(64, kernel_size=4, strides=2, activation='relu'))
my_model.add(Dropout(0.5))
my_model.add(Conv2D(128, kernel_size=4, strides=1, activation='relu'))
my_model.add(Conv2D(128, kernel_size=4, strides=2, activation='relu'))
my_model.add(Dropout(0.5))
my_model.add(Conv2D(256, kernel_size=4, strides=1, activation='relu'))
my_model.add(Conv2D(256, kernel_size=4, strides=2, activation='relu'))
my_model.add(Flatten())
my_model.add(Dropout(0.5))
my_model.add(Dense(512, activation='relu'))
my_model.add(Dense(30, activation='softmax'))

my_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=["accuracy"])

In [None]:
my_model.fit_generator(train_generator, epochs=5, validation_data=val_generator)

In [None]:
history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['accuracy', 'val_accuracy']].plot();

## Save model

In [None]:

model.save('model.h5')


## Load model

In [None]:
from tensorflow.keras.models import load_model
new_model=load_model('../input/model-happywhale/model.h5')

In [None]:
probability_model = tf.keras.Sequential([model, tf.keras.layers.Softmax()])

In [None]:
predictions = probability_model.predict_generator(test_generator[0])
prediction2 = probability_model.predict_generator(test_generator[10])

In [None]:
predictions = model.predict(test_generator[0])
prediction2 = model.predict(test_generator[10])

In [None]:
predictions[0]

In [None]:
np.argmax(predictions[0])


In [None]:
np.argmax(prediction2[0])

In [None]:
train_generator.class_indices

In [None]:
import matplotlib.image as mpimg
img = mpimg.imread('../input/happy-whale-and-dolphin/test_images/000110707af0ba.jpg')
imgplot = plt.imshow(img)

In [None]:
img = mpimg.imread('../input/happy-whale-and-dolphin/test_images/00150406ce5395.jpg')
imgplot = plt.imshow(img)

In [None]:
desired_batch_size=32
filenames = test_generator.filenames
nb_samples = len(filenames)

predict = probability_model.predict_generator(test_generator,steps = np.ceil(nb_samples/desired_batch_size))

In [None]:
%%time
from tqdm import tqdm
import itertools

desired_batch_size=10000
filenames = test_generator.filenames
nb_samples = len(filenames)
pred=[]
for row in tqdm(itertools.islice(test_generator, 27956)):
    pred.append(probability_model.predict_generator(row,steps =  np.ceil(nb_samples/desired_batch_size)))

In [None]:
df_submission["predictions"] = pred
df_submission.to_csv("submission.csv", index=False)