In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory
import shutil
import os

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
trainCSV = pd.read_csv("../input/happy-whale-and-dolphin/train.csv", index_col=0)
# species = trainCSV.species.tolist()

species_classes = trainCSV[['species']].reset_index()
species_unique = species_classes.species.unique()
species_classes.loc[0].image

In [3]:
if not os.path.isdir('../working/temp/train_images/'):
    os.makedirs('../working/temp/train_images/')
    
files = sorted(os.listdir('../input/dolphin-resized/train_images_res_64x64/train_images_res_64x64'))

for x in range(len(files)):
    if files[x] == species_classes.loc[x].image:
        if not os.path.isdir('../working/temp/train_images/' + species_classes.loc[x].species):
            os.makedirs('../working/temp/train_images/' + species_classes.loc[x].species)
        
        shutil.copy2('../input/dolphin-resized/train_images_res_64x64/train_images_res_64x64/' + files[x], '../working/temp/train_images/' + species_classes.loc[x].species + '/' + files[x])

In [4]:
from tensorflow import keras
from tensorflow.keras import layers, callbacks
# these are a new feature in TF 2.2
from tensorflow.keras.layers.experimental import preprocessing

# Load training and validation sets
train_ = image_dataset_from_directory(
    '../working/temp/train_images/',
    labels='inferred',
    image_size=[64, 64],
    batch_size=128,
    color_mode='grayscale',
    crop_to_aspect_ratio=True,
    seed=1,
    validation_split=0.8,
    subset='training',
)

val_ = image_dataset_from_directory(
    '../working/temp/train_images/',
    labels='inferred',
    image_size=[64, 64],
    batch_size=128,
    color_mode='grayscale',
    crop_to_aspect_ratio=True,
    seed=1,
    validation_split=0.2,
    subset='validation',
)

# Data Pipeline
def convert_to_float(image, label):
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image, label

AUTOTUNE = tf.data.experimental.AUTOTUNE
train = (
    train_
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)
val = (
    val_
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)

early_stopping = callbacks.EarlyStopping(
    min_delta=0.001, # minimium amount of change to count as an improvement
    patience=20, # how many epochs to wait before stopping
    restore_best_weights=True,
)

In [5]:
model = keras.Sequential([
    preprocessing.RandomContrast(0.5),
    preprocessing.RandomRotation(factor=0.20),
    
    # First Convolutional Block
    layers.Conv2D(filters=64, kernel_size=5, activation="relu", padding='same', input_shape=[64, 64, 1]),
    layers.Dropout(0.3),
    layers.MaxPool2D(),
    
    # Second Convolutional Block
    layers.Conv2D(filters=128, kernel_size=3, activation="relu", padding='same'),
    layers.Dropout(0.3),
    layers.MaxPool2D(),
    
    # Third Convolutional Block
    layers.Conv2D(filters=256, kernel_size=3, activation="relu", padding='same'),
    layers.Dropout(0.3),
    layers.MaxPool2D(),
    
    # Fourth Convolutional Block
    layers.Conv2D(filters=512, kernel_size=3, activation="relu", padding='same'),
    layers.Dropout(0.3),
    layers.MaxPool2D(),
    
    layers.Flatten(),
    layers.Dense(units=256, activation="relu"),
    layers.Dense(units=30, activation="softmax"),
])

In [6]:
model.compile(
    optimizer=keras.optimizers.Adam(epsilon=0.01),
    loss='sparse_categorical_crossentropy',
    metrics=['sparse_categorical_accuracy']
)

In [7]:
history = model.fit(
    train,
    validation_data=val,
    epochs=200,
    callbacks=[early_stopping],
    verbose=1,
)

In [8]:
history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['sparse_categorical_accuracy', 'val_sparse_categorical_accuracy']].plot()