### - First, i give all credits to every Kaggle Grandmasters like xhulu, Chris Deotte, CPMP etc (i can't list all of them one by one), also to everyone who shares their notebook on Kaggle.

### - I just copy and learn from them how to make a Deep Learning model.

This is just for fun :)

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import random
import math
import cv2
import os
import gc

import tensorflow as tf
import tensorflow.keras.backend as K
print('Use Tensorflow version:', tf.__version__)

from kaggle_datasets import KaggleDatasets
from tensorflow.keras.layers import Dense, Lambda, Input
from tensorflow.keras.models import Model

plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = [16, 8]

In [None]:
def set_seed(seed: int):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
set_seed(33)

In [None]:
try:
    # TPU detection. No parameters necessary if TPU_NAME environment variable is
    # set: this is always the case on Kaggle.
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    # Default distribution strategy in Tensorflow. Works on CPU and single GPU.
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
# For tf.dataset
AUTO = tf.data.experimental.AUTOTUNE

# Configuration
EPOCHS = 12
BATCH_SIZE = 16 * strategy.num_replicas_in_sync

In [None]:
def show_train_img(mode):
    
    fig, ax = plt.subplots(nrows=2, ncols=5, figsize=(24, 10))
    
    train_path = f'/kaggle/input/dog-breed-identification/{mode}/'
    ten_random = pd.Series(os.listdir(train_path)).sample(10).values
    
    for idx, image in enumerate(ten_random):
        final_path = os.path.join(train_path, image)
        img = cv2.imread(final_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        ax.ravel()[idx].imshow(img)
        ax.ravel()[idx].axis('off')
        
    plt.tight_layout()
    plt.show()

In [None]:
show_train_img('train')

In [None]:
show_train_img('test')

In [None]:
df = pd.read_csv('/kaggle/input/dog-breed-identification/labels.csv')
sub = pd.read_csv('/kaggle/input/dog-breed-identification/sample_submission.csv')

df.head()

In [None]:
unique_value = df['breed'].nunique()

print(f'We have {unique_value} dog categories in this dataset')

## Data Preparation

In [None]:
GCS_DS_PATH = KaggleDatasets().get_gcs_path()

# don't forget to add .jpg
train_paths = [os.path.join(GCS_DS_PATH, 'train', id + '.jpg') for id in df['id']]
train_paths = np.array(train_paths)

test_paths = [os.path.join(GCS_DS_PATH, 'test', id + '.jpg') for id in sub['id']]
test_paths = np.array(test_paths)

In [None]:
dog_categories = np.sort(df['breed'].unique())

cat2idx = {}
idx2cat = {}

for idx, category in enumerate(dog_categories):
    cat2idx[category] = idx
    idx2cat[idx] = category

In [None]:
from tensorflow.keras.utils import to_categorical

label_encoded = df['breed'].map(cat2idx)
train_labels = to_categorical(label_encoded)

In [None]:
from sklearn.model_selection import train_test_split

train_paths, valid_paths, train_labels, valid_labels = train_test_split(train_paths,
                                                                        train_labels,
                                                                        test_size=0.12,
                                                                        stratify=train_labels,
                                                                        random_state=2020)

print(f'train size: {train_paths.shape[0]} images')
print(f'validation size {valid_paths.shape[0]} images')
print(f'\ntest size {sub.shape[0]} images')

## Image Augmentation

In [None]:
def get_mat(rotation, shear, height_zoom, width_zoom, height_shift, width_shift):
    # returns 3x3 transformmatrix which transforms indicies
        
    # CONVERT DEGREES TO RADIANS
    rotation = math.pi * rotation / 180.
    shear    = math.pi * shear    / 180.

    def get_3x3_mat(lst):
        return tf.reshape(tf.concat([lst],axis=0), [3,3])
    
    # ROTATION MATRIX
    c1   = tf.math.cos(rotation)
    s1   = tf.math.sin(rotation)
    one  = tf.constant([1],dtype='float32')
    zero = tf.constant([0],dtype='float32')
    
    rotation_matrix = get_3x3_mat([c1,   s1,   zero, 
                                   -s1,  c1,   zero, 
                                   zero, zero, one])    
    # SHEAR MATRIX
    c2 = tf.math.cos(shear)
    s2 = tf.math.sin(shear)    
    
    shear_matrix = get_3x3_mat([one,  s2,   zero, 
                                zero, c2,   zero, 
                                zero, zero, one])        
    # ZOOM MATRIX
    zoom_matrix = get_3x3_mat([one/height_zoom, zero,           zero, 
                               zero,            one/width_zoom, zero, 
                               zero,            zero,           one])    
    # SHIFT MATRIX
    shift_matrix = get_3x3_mat([one,  zero, height_shift, 
                                zero, one,  width_shift, 
                                zero, zero, one])
    
    return K.dot(K.dot(rotation_matrix, shear_matrix), 
                 K.dot(zoom_matrix,     shift_matrix))


def transform(image):    
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image randomly rotated, sheared, zoomed, and shifted
    
    DIM = 480
    XDIM = DIM%2 #fix for size 331
    
    rot = 180.0 * tf.random.normal([1], dtype='float32')
    shr = 2.0 * tf.random.normal([1], dtype='float32') 
    h_zoom = 1.0 + tf.random.normal([1], dtype='float32') / 8.0
    w_zoom = 1.0 + tf.random.normal([1], dtype='float32') / 8.0
    h_shift = 8.0 * tf.random.normal([1], dtype='float32') 
    w_shift = 8.0 * tf.random.normal([1], dtype='float32') 

    # GET TRANSFORMATION MATRIX
    m = get_mat(rot, shr, h_zoom, w_zoom, h_shift, w_shift) 

    # LIST DESTINATION PIXEL INDICES
    x   = tf.repeat(tf.range(DIM//2, -DIM//2,-1), DIM)
    y   = tf.tile(tf.range(-DIM//2, DIM//2), [DIM])
    z   = tf.ones([DIM*DIM], dtype='int32')
    idx = tf.stack( [x,y,z] )
    
    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    idx2 = K.dot(m, tf.cast(idx, dtype='float32'))
    idx2 = K.cast(idx2, dtype='int32')
    idx2 = K.clip(idx2, -DIM//2+XDIM+1, DIM//2)
    
    # FIND ORIGIN PIXEL VALUES           
    idx3 = tf.stack([DIM//2-idx2[0,], DIM//2-1+idx2[1,]])
    d    = tf.gather_nd(image, tf.transpose(idx3))
        
    return tf.reshape(d,[DIM, DIM, 3])

In [None]:
def prepare_image(filename, label=None, augment=True, final_size=456):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.image.resize(image, (480, 480)) # 1
    image = tf.cast(image, tf.float32) / 255.0
    
    if augment:
        
        image = transform(image)
        image = tf.image.random_crop(image, [468, 468, 3]) # 2
        image = tf.image.random_flip_left_right(image)
        image = tf.image.random_brightness(image, max_delta=0.2)
        image = tf.image.random_contrast(image, lower=0.8, upper=1.2)
        image = tf.image.random_saturation(image, lower=0.75, upper=1.2)
    
    image = tf.image.resize(image, (final_size, final_size))
    image = tf.reshape(image, [final_size, final_size, 3]) # 3
    
    if label is None:
        return image
    else:
        return image, label

In [None]:
def decode_image(filename, label=None, image_size=(456, 456)):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, image_size)
    
    if label is None:
        return image
    else:
        return image, label

In [None]:
def data_augment(image, label=None):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, max_delta=0.2)
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2)
    image = tf.image.random_saturation(image, lower=0.75, upper=1.2)
    
    if label is None:
        return image
    else:
        return image, label

In [None]:
train_dataset = (
    tf.data.Dataset
    .from_tensor_slices((train_paths, train_labels))
    .map(prepare_image, num_parallel_calls=AUTO)
    .cache()
    .repeat()
    .shuffle(2048)
    .batch(BATCH_SIZE)
    .prefetch(AUTO)
)

valid_dataset = (
    tf.data.Dataset
    .from_tensor_slices((valid_paths, valid_labels))
    .map(decode_image, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
    .cache()
    .prefetch(AUTO)
)

test_dataset = (
    tf.data.Dataset
    .from_tensor_slices(test_paths)
    .map(decode_image, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
)

## Modeling

In [None]:
!pip install -q efficientnet

from efficientnet.tfkeras import EfficientNetB5
import tensorflow_addons as tfa

In [None]:
import keras.backend as K

def categorical_focal_loss_with_label_smoothing(gamma=2.0, alpha=0.25, ls=0.1, classes=120.0):
    """
    Implementation of Focal Loss from the paper in multiclass classification
    Formula:
        loss = -alpha*((1-p)^gamma)*log(p)
        y_ls = (1 - α) * y_hot + α / classes
    Parameters:
        alpha -- the same as wighting factor in balanced cross entropy
        gamma -- focusing parameter for modulating factor (1-p)
        ls    -- label smoothing parameter(alpha)
        classes     -- No. of classes
    Default value:
        gamma -- 2.0 as mentioned in the paper
        alpha -- 0.25 as mentioned in the paper
        ls    -- 0.1
        classes     -- 42
    """
    def focal_loss(y_true, y_pred):
        # Define epsilon so that the backpropagation will not result in NaN
        # for 0 divisor case
        epsilon = K.epsilon()
        # Add the epsilon to prediction value
        #y_pred = y_pred + epsilon
        #label smoothing
        y_pred_ls = (1 - ls) * y_pred + ls / classes
        # Clip the prediction value
        y_pred_ls = K.clip(y_pred_ls, epsilon, 1.0-epsilon)
        # Calculate cross entropy
        cross_entropy = -y_true*K.log(y_pred_ls)
        # Calculate weight that consists of  modulating factor and weighting factor
        weight = alpha * y_true * K.pow((1-y_pred_ls), gamma)
        # Calculate focal loss
        loss = weight * cross_entropy
        # Sum the losses in mini_batch
        loss = K.sum(loss, axis=1)
        return loss
    
    return focal_loss

In [None]:
with strategy.scope():
    model = tf.keras.Sequential([
        EfficientNetB5(weights='imagenet', # noisy-student
                       include_top=False,
                       pooling='avg'),
        Dense(120, activation='softmax')
    ])
    
#     model.layers[0].trainable = False
    
    model.compile(optimizer = 'adam', # tfa.optimizers.AdamW(weight_decay=1e-4)
                  loss = categorical_focal_loss_with_label_smoothing(gamma=2.0, alpha=0.75, ls=0.125, classes=120.0),
                  metrics=['categorical_accuracy'])
    
    model.summary()

In [None]:
LR = 0.0002 # 0.0005
EPOCHS = 12
WARMUP = 4

def get_cosine_schedule_with_warmup(lr, num_warmup_steps, num_training_steps, num_cycles=0.5):
    """
    Modified the get_cosine_schedule_with_warmup from huggingface for tensorflow
    (https://huggingface.co/transformers/_modules/transformers/optimization.html#get_cosine_schedule_with_warmup)

    Create a schedule with a learning rate that decreases following the
    values of the cosine function between 0 and `pi * cycles` after a warmup
    period during which it increases linearly between 0 and 1.
    """

    def lrfn(epoch):
        if epoch < num_warmup_steps:
            return float(epoch) / float(max(1, num_warmup_steps)) * lr
        progress = float(epoch - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
        return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))) * lr

    return tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=True)

lr_schedule= get_cosine_schedule_with_warmup(lr=LR, num_warmup_steps=WARMUP, num_training_steps=EPOCHS)

In [None]:
n_steps = train_labels.shape[0] // BATCH_SIZE  # 8995 / 128 = 70

history = model.fit(
    train_dataset, 
    steps_per_epoch=n_steps,
    validation_data=valid_dataset,
    epochs=EPOCHS,
    callbacks=[lr_schedule]
)

In [None]:
# Get training and test loss histories
training_loss = history.history['loss']
test_loss = history.history['val_loss']

# Create count of the number of epochs
epoch_count = range(1, len(training_loss) + 1)

# Visualize loss history
plt.plot(epoch_count, training_loss, 'r--')
plt.plot(epoch_count, test_loss, 'b-')
plt.legend(['Training Loss', 'Validation Loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

In [None]:
test_dataset_tta = (
        tf.data.Dataset
        .from_tensor_slices(test_paths)
        .map(decode_image, num_parallel_calls=AUTO)
        .cache()
        .map(data_augment, num_parallel_calls=AUTO)
        .batch(BATCH_SIZE)
)

tta_times = 5
probabilities = []

for i in range(tta_times):
    print('TTA Number: ', i + 1, '\n')
    probabilities.append(model.predict(test_dataset_tta, verbose=1))
    
tta_pred = np.mean(probabilities, axis=0)

## Make submission

In [None]:
for cat in cat2idx.keys():
    sub[cat] = tta_pred[:, cat2idx[cat]]
sub.to_csv('submission.csv', index=None)

In [None]:
sub.head(10)

### Got score 0.38639 for both public and private LB, not bad :)