In [None]:
!pip install visualkeras --upgrade
!git clone https://github.com/shreyas-bk/U-2-Net-Keras ./u2-net
!git clone https://github.com/ktjonsson/keras-ArcFace ./arcface

In [None]:
from arcface.src.arcface_layer import ArcFace

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import os, sys, glob
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import keras.backend as K
from IPython.display import Image
import random
import tqdm
from skimage import color, io, feature
import seaborn as sns
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
imgdir = "../input/happy-whale-and-dolphin/"
cropped = "../input/whale2-cropped-dataset/"
u2net = "./u2-net/"
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
sns.set_style("white")
import visualkeras as vk

In [None]:
print(imgdir)
for file in glob.glob(os.path.join(imgdir, "*")): 
    print(f" \_ {file}")
print(cropped)
for file in glob.glob(os.path.join(cropped, "*")): 
    print(f" \_ {file}")

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

import tensorflow as tf
from keras.models import Sequential
from keras import layers

from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input
from keras.layers import Input, Dense, Activation, BatchNormalization, Flatten, Conv2D
from keras.layers import AveragePooling2D, MaxPooling2D, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import resnet
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import Input

In [None]:
# train = pd.read_csv(imgdir + "train.csv")
# train.head()

In [None]:
train_cropped = pd.read_csv(cropped + "train2.csv")
train_cropped.head()

In [None]:
# print(train.shape)
print(train_cropped.shape)

There are some species that are duplicates due to typos, or other misclassification errors. So there are actually 26 species total in the dataset.

In [None]:
def clean_labels(train):
    encoder = LabelEncoder()
    train['species'].replace('kiler_whale', 'killer_whale', inplace=True)
    train['species'].replace('bottlenose_dolpin', 'bottlenose_dolphin', inplace=True)
    train['species'].replace(('globis', 'pilot_whale'), 'short_finned_pilot_whale', 
                         inplace=True)
    train['species_id'] = encoder.fit_transform(train['species'])
    return train

In [None]:
# train = clean_labels(train)
train_cropped = clean_labels(train_cropped)
y = train_cropped['species_id']

In [None]:
fig, ax = plt.subplots(dpi=150)
sns.histplot(data=train_cropped, x='species', hue='species', stat='percent', 
             discrete=True)
ax.get_legend().remove()
plt.xticks(rotation=90)
plt.title("Distribution of different whale species")
sns.despine()

In [None]:
# Image(filename= imgdir + "train_images/" + random.choice(train['image'])) 

In [None]:
Image(filename=cropped + "cropped_train_images/cropped_train_images/" + random.choice(train_cropped['image'])) 

In [None]:
train_cropped["individual_id"].value_counts()

## Processing the Images

In [None]:
pix_size = 128

In [None]:
def process_imgs(train, stop=0, size=64, channels=1, resnet=False):
    with tqdm.tqdm(desc="processing", total=stop) as progress:
        X = np.zeros((train.shape[0], size, size, channels))
        for n, i in enumerate(train['image']):
            img = image.load_img(imgdir + 'train_images/' + i, target_size=(size, size, 3))
            if not resnet:
                if channels == 1:
                    img = color.rgb2gray(img)
                # img = img.reshape((size, size, channels))
            # else:
            img = preprocess_input(image.img_to_array(img))
                
            X[n] = img
            if n == stop:
                break
            progress.update(1)
    X = X.astype('float32')
    return X

In [None]:
# X_gray = process_imgs(train, 500)

In [None]:
X_cropped = process_imgs(train_cropped, stop=500, size=pix_size, channels=3)

In [None]:
# plt.imshow(X_gray[0])

In [None]:
plt.imshow(X_cropped[0])

I'm torn between using a sigma=3 or a sigma=2. Some of the pictures get pretty noisy, but other ones don't really have any dorsal fins that show up at all...

In [None]:
# edges0 = feature.canny(X_gray[0].reshape((64,64)), sigma=3)
# plt.imshow(edges0)

In [None]:
# fig, axes = plt.subplots(1, 4, figsize=(20, 20))
# for j in range(6 * 6):
#     plt.subplot(6, 6, j+1)
#     plt.axis('off')
#     plt.imshow(X_gray[j])
# plt.show()

In [None]:
fig, axes = plt.subplots(1, 4, figsize=(20, 20))
for j in range(6 * 6):
    plt.subplot(6,6, j+1)
    plt.axis('off')
    plt.imshow(feature.canny(X_cropped[j].reshape((pix_size, pix_size)), sigma=2))
plt.show()

In [None]:
# X_canny = np.zeros((X_gray.shape[0], 64, 64))
# for i in range(1000):
#     X_canny[i] = feature.canny(X_gray[i].reshape((64, 64)), sigma=2) 

In [None]:
def split(X, y, size=0.3, random=69):
    return train_test_split(X, y, test_size=size, random_state=random)

In [None]:
# X_train, X_test, y_train, y_test = split(X_gray, y)
# X_train, X_val, y_train, y_val = split(X_train, y_train)

In [None]:
X_crop_train, X_crop_test, y_crop_train, y_crop_test = split(X_cropped, y)
X_crop_train, X_crop_val, y_crop_train, y_crop_val = split(X_crop_train, y_crop_train)

This is the ArcFace implementation without any background removal. I am seeing how well the model performs after 500 epochs with a small dataset.

In [None]:
af = ArcFace(output_dim=26, class_num=26)
model = Sequential([
    layers.Conv2D(256, 3, padding='valid', activation='relu', input_shape=[pix_size, pix_size, 3]),
    layers.MaxPooling2D(2),
    layers.Conv2D(128, 3, padding='valid', activation='relu'),
    layers.Conv2D(128, 3, padding='valid', activation='relu'),
    layers.MaxPooling2D(2),
    layers.Conv2D(64, 3, padding='valid'),
    layers.Conv2D(64, 3, padding='valid', activation='leaky_relu'),
    layers.Flatten(),
    layers.Dropout(0.2),
    layers.Dense(64, activation='relu'),
    layers.BatchNormalization(),
    layers.Dense(256, activation='leaky_relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.2),
    layers.Dense(256, activation='leaky_relu'),
    layers.Dense(26, activation=af)
])

model.summary()

In [None]:
vk.layered_view(model)

In [None]:
lr = 1e-3
best = "best.hdf5"
callbacks = [
        ReduceLROnPlateau(monitor='val_accuracy', factor=0.2, patience=3, min_lr=1e-7),
        EarlyStopping(monitor='val_accuracy', patience=5, min_delta=1e-5), 
        ModelCheckpoint(best, monitor='val_accuracy', verbose=1, save_best_only=True, mode='auto')
    ]

In [None]:
# opt = Adam(learning_rate=lr)
# model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

# model.fit(X_train, y_train, batch_size = 16, epochs = 20, validation_data = (X_val, y_val), callbacks=callbacks)

In [None]:
opt = Adam(learning_rate=lr)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

model.fit(X_crop_train, y_crop_train, batch_size = 32, epochs = 500, validation_data = (X_crop_val, y_crop_val), callbacks=callbacks)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_canny, y, test_size=0.4, random_state=69)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.3, random_state=69)

In [None]:
model.fit(X_train, y_train, batch_size = 16, epochs = 50, validation_data = (X_val, y_val), callbacks=callbacks)

## ResNet
I think that maybe it might work better to use the ResNet model to find the whales in the image. But the results weren't as good as I hoped: in fact, they were worse than the previous model.

In [None]:
X_resnet = process_imgs(train_cropped, 500, channels=3, resnet=True)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_resnet, y, test_size=0.4, random_state=69)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.3, random_state=69)

In [None]:
resmodel = resnet.ResNet50(input_shape=(64, 64, 3), weights='imagenet', include_top=False, classes=26)
model = tf.keras.Sequential([resmodel,
                                 layers.MaxPooling2D(),
                                 layers.Dense(3, activation="relu"), 
                                 layers.Dropout(0.2),
                                 layers.Dense(3, activation="relu"),
                                 layers.Dropout(0.2),
                                 layers.Dense(2, activation="softmax")                                     
                                ])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
              loss='sparse_categorical_crossentropy',
              metrics=['sparse_categorical_accuracy'])

history = model.fit(X_train, y_train, epochs = 50 , validation_data = (X_val, y_val), callbacks=callbacks)
preds = resmodel.predict(X_test)