In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
from PIL import Image
import os, cv2, gc

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf 
from tensorflow import keras 
from tensorflow.keras import callbacks
from tensorflow.keras import layers, losses, optimizers, metrics
import tensorflow_hub as hub
from keras.applications import imagenet_utils

import tensorflow_addons as tfa
from keras.layers.advanced_activations import LeakyReLU

In [None]:
INP_SIZE      = (512, 512) 
TARGET_SIZE   = (224, 224) 
INTERPOLATION = "bilinear"
N_CLASSES = 15587

NUM_FOLDS  = 5
BATCH_SIZE = 24
SEED       = 42

DATA_DIR  = '../input/happy-whale-and-dolphin/'
TRAIN_DIR = DATA_DIR + 'train_images/'
TEST_DIR  = DATA_DIR + 'test_images/'

# SetAutoTune
AUTOTUNE = tf.data.AUTOTUNE 

In [None]:
#https://www.kaggle.com/ipythonx/tf-keras-learning-to-resize-image-for-vit-model/notebook
def build_augmenter(is_labelled):
    def augment(img):
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        img = tf.image.random_saturation(img, 0.65, 1.05)
        img = tf.image.random_brightness(img, 0.05)
        img = tf.image.random_contrast(img, 0.75, 1.05)
        img = tf.image.random_hue(img, 0.05)
        return img
    
    def augment_with_labels(img, label):
        return augment(img), label
    return augment_with_labels if is_labelled else augment

def build_decoder(is_labelled, size):
    def decode(path):
        file_bytes = tf.io.read_file(path)
        img = tf.image.decode_jpeg(file_bytes, channels = 3)
        img = tf.image.resize(img, (size[0], size[1]))
        return tf.cast(tf.divide(img, 255.),tf.float32)
    
    def decode_with_labels(path, label):
        label = tf.cast(label, tf.int32)
        return decode(path),label
    
    return decode_with_labels if is_labelled else decode

def create_dataset(df, 
                   batch_size  = 32, 
                   is_labelled = False, 
                   augment     = False, 
                   repeat      = False, 
                   shuffle     = False,
                   size        = INP_SIZE):
    decode_fn    = build_decoder(is_labelled, size)
    augmenter_fn = build_augmenter(is_labelled)
    
    # Create Dataset
    if is_labelled:
        dataset = tf.data.Dataset.from_tensor_slices((df['Id'].values, df['target_value'].values))
    else:
        dataset = tf.data.Dataset.from_tensor_slices((df['Id'].values))
        
    dataset = dataset.map(decode_fn, num_parallel_calls = AUTOTUNE)
    dataset = dataset.map(augmenter_fn, num_parallel_calls = AUTOTUNE) if augment else dataset
    dataset = dataset.repeat() if repeat else dataset
    dataset = dataset.shuffle(1024, reshuffle_each_iteration = True) if shuffle else dataset
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

In [None]:
label_encoder = LabelEncoder()

In [None]:
train_df = pd.read_csv(f'{DATA_DIR}train.csv')
train_df['Id'] = train_df['image'].apply(lambda x: f'{TRAIN_DIR}{x}')

# Adjust typos in "species" column from Andrada's kernel
train_df["species"] = train_df["species"].replace(["bottlenose_dolpin", "kiler_whale",
                                             "beluga", 
                                             "globis", "pilot_whale"],
                                            ["bottlenose_dolphin", "killer_whale",
                                             "beluga_whale", 
                                             "short_finned_pilot_whale", "short_finned_pilot_whale"])


# Set a specific label to be able to perform stratification
#train_df['stratify_label'] = train_df['individual_id']

train_df['target_value']  = label_encoder.fit_transform(train_df['individual_id'] )

# Summary
print(f'train_df: {train_df.shape}')
train_df.head()

In [None]:
train_df[train_df['individual_id']=='19fbb960f07d']

In [None]:
train_df.species.value_counts()

In [None]:
train_df.isnull().sum()