In [None]:
import os, math
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image

# Machine Learning
import tensorflow as tf
import tensorflow.keras as keras
from keras import layers, utils, preprocessing, regularizers
from sklearn.metrics import roc_auc_score, roc_curve, auc
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
from keras import backend as K
from tensorflow.keras import mixed_precision

gpu = tf.config.list_physical_devices('GPU')
print(f'Num GPUs Available: {len(gpu)}')
if len(gpu) > 0:
    tf.config.experimental.set_memory_growth(gpu[0], True)

In [None]:
AUTO = tf.data.experimental.AUTOTUNE
IMAGE_SIZE = [256, 256]
EPOCHS = 3
SEED = 2021
BATCH_SIZE = 16
NUM_CLASSES = 81313
INITIAL_LEARNING_RATE = 5e-2
DECAY_STEPS = 100

In [None]:
input_dir = Path('../input/landmark-recognition-2021')
train_df = pd.read_csv(input_dir / 'train.csv')
submission = pd.read_csv(input_dir / 'train.csv')

In [None]:
train_df.head()

In [None]:
%%time
train_df['path']=[str(input_dir)+'/train/'+id[0]+'/'+id[1]+'/'+id[2]+'/'+id+'.jpg' for id in train_df['id']]

In [None]:
train_df.head()

In [None]:
with Image.open(train_df['path'][0]) as im:
    plt.imshow(im)

In [None]:
train_df['id_counts'] = train_df.landmark_id.value_counts().loc[train_df.landmark_id.values].values
id_map = train_df.sort_values(by='id_counts').landmark_id.drop_duplicates().reset_index(drop=True)
id_dict = {id_map.loc[x]:NUM_CLASSES-x for x in range(NUM_CLASSES)}
train_df['encode_id'] = train_df.landmark_id.apply(lambda x: id_dict[x])

del id_map
del id_dict

train_df.head()

In [None]:
# Enable to train only the few most common classes for debugging.
# This is extremely naughty for doing anything but testing end to end.
train_df = train_df.sort_values('encode_id', ascending=True).iloc[:100000,:]
train_df = train_df.sample(frac=1).reset_index(drop=True)
NUM_CLASSES = len(train_df['encode_id'].unique())

train_df.head(), NUM_CLASSES

In [None]:
def flip(image, label):
    image = tf.image.flip_left_right(image)
    return image, label

def read_image_and_label(image_path, label=None, resize=IMAGE_SIZE):
    image=tf.io.read_file(image_path)
    image=tf.image.decode_jpeg(image, channels=3)
    image=tf.image.resize(image, IMAGE_SIZE)
#     image = tf.cast(image, dtype=tf.float32)/255.
    image = tf.cast(image, dtype=tf.float32)
    if not label is None:
        label = tf.cast(label, dtype=tf.int32)
        return image, label
    return image

def get_training_dataset(df):
    dataset_labels = tf.data.Dataset.from_tensor_slices(df['encode_id'].values.astype(int))
    dataset_labels = dataset_labels.map(lambda x: tf.one_hot(x, NUM_CLASSES), num_parallel_calls=AUTO)

    dataset_paths = tf.data.Dataset.from_tensor_slices(df['path'].values)

    dataset_inputs = tf.data.Dataset.zip((dataset_paths, dataset_labels))
    dataset_inputs = dataset_inputs.map(read_image_and_label, num_parallel_calls=AUTO)
    dataset_inputs = dataset_inputs.map(flip, num_parallel_calls=AUTO)
    
    training_dataset = tf.data.Dataset.zip((dataset_inputs, dataset_labels))
    training_dataset = training_dataset.shuffle(1000, reshuffle_each_iteration=True)
    training_dataset = training_dataset.batch(BATCH_SIZE)
    training_dataset = training_dataset.prefetch(AUTO)

    return training_dataset

# def get_test_dataset(images, batch_size=16):
#     test_dataset = tf.data.Dataset.from_tensor_slices((images))
#     test_dataset = test_dataset.map(read_image_and_label)
#     test_dataset = test_dataset.batch(batch_size, drop_remainder=True)
#     return test_dataset

In [None]:
X_train, X_valid = train_test_split(train_df, random_state=SEED, stratify=train_df['encode_id'], test_size=.4)
train_dataset = get_training_dataset(X_train)
valid_dataset = get_training_dataset(X_valid)

In [None]:
one_batch = tf.data.Dataset.from_tensors(next(iter(train_dataset)))
print(one_batch)
del one_batch

In [None]:
class GeM(layers.Layer):
    def __init__(self, pool_size, init_norm=3.0, normalize=False, **kwargs):
        self.pool_size = pool_size
        self.init_norm = init_norm
        self.normalize = normalize

        super(GeM, self).__init__(**kwargs)

    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'pool_size': self.pool_size,
            'init_norm': self.init_norm,
            'normalize': self.normalize,
        })
        return config

    def build(self, input_shape):
        feature_size = input_shape[-1]
        self.p = self.add_weight(name='norms', shape=(feature_size,),
                                 initializer=keras.initializers.constant(self.init_norm),
                                 trainable=True)
        super(GeM, self).build(input_shape)

    def call(self, inputs):
        x = inputs
        x = tf.math.maximum(x, 1e-6)
        x = tf.pow(x, self.p)

        x = tf.nn.avg_pool(x, self.pool_size, self.pool_size, 'VALID')
        x = tf.pow(x, 1.0 / self.p)

        if self.normalize:
            x = tf.nn.l2_normalize(x, 1)
        return x

    def compute_output_shape(self, input_shape):
        return tuple([None, input_shape[-1]])

class ArcFace(layers.Layer):
    def __init__(self, n_classes=10, s=30.0, m=0.50, regularizer=None, **kwargs):
        super(ArcFace, self).__init__(**kwargs)
        self.n_classes = n_classes
        self.s = s
        self.m = m
        self.regularizer = regularizers.get(regularizer)

    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'n_classes': self.n_classes,
            's': self.s,
            'm': self.m,
            'regularizer': self.regularizer
        })
        return config

    def build(self, input_shape):
        super(ArcFace, self).build(input_shape[0])
        self.W = self.add_weight(name='W',
                                shape=(input_shape[0][-1], self.n_classes),
                                initializer='glorot_uniform',
                                trainable=True,
                                regularizer=self.regularizer)

    def call(self, inputs):
        x, y = inputs
        c = K.shape(x)[-1]
        x = tf.nn.l2_normalize(x, axis=1)
        W = tf.nn.l2_normalize(self.W, axis=0)
        logits = x @ W
        theta = tf.acos(K.clip(logits, -1.0 + K.epsilon(), 1.0 - K.epsilon()))
        target_logits = tf.cos(theta + self.m)
        logits = logits * (1 - y) + target_logits * y
        logits *= self.s
        out = layers.Activation('softmax', dtype='float32', name='predictions')(logits)
        return out

    def compute_output_shape(self, input_shape):
        return (None, self.n_classes)

In [None]:
def build_model():
    inputs = keras.Input(shape=(*IMAGE_SIZE, 3), name='Images')
    labels = keras.Input(shape=(NUM_CLASSES,), name='Labels')
    
    i = tf.keras.applications.resnet50.preprocess_input(inputs)
    backbone = tf.keras.applications.ResNet152(include_top=False, weights='imagenet', input_tensor=i)
    
    # Training the backbone will about double the training time.
    backbone.trainable = False
    
    x = backbone.output
    x = GeM(8)(x) # 8 for 256x256, 16 for 512x512
    x = layers.Flatten()(x)

    x = layers.Dense(512, kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    output = ArcFace(n_classes=NUM_CLASSES)([x, labels])
    
    model = keras.Model([inputs, labels], output)
    return model

In [None]:
model = build_model()
# model.summary()

In [None]:
checkpoint = keras.callbacks.ModelCheckpoint('glr_resnet152_gem_arcface_{epoch}.hdf5', monitor='val_acc', save_freq='epoch')

class CosineDecayRestarts(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(
          self,
          initial_learning_rate,
          first_decay_steps,
          t_mul=2.0,
          m_mul=1.0,
          alpha=0.0,
          name=None):
        super(CosineDecayRestarts, self).__init__()

        self.initial_learning_rate = initial_learning_rate
        self.first_decay_steps = first_decay_steps
        self._t_mul = t_mul
        self._m_mul = m_mul
        self.alpha = alpha
        self.name = name

    def __call__(self, step):
        with tf.name_scope(self.name or "SGDRDecay") as name:
            initial_learning_rate = tf.convert_to_tensor(
                self.initial_learning_rate, name="initial_learning_rate")
        dtype = initial_learning_rate.dtype
        first_decay_steps = tf.cast(self.first_decay_steps, dtype)
        alpha = tf.cast(self.alpha, dtype)
        t_mul = tf.cast(self._t_mul, dtype)
        m_mul = tf.cast(self._m_mul, dtype)

        global_step_recomp = tf.cast(step, dtype)
        completed_fraction = global_step_recomp / first_decay_steps

        def compute_step(completed_fraction, geometric=False):
            """Helper for `cond` operation."""
            if geometric:
                i_restart = tf.floor(
                    tf.math.log(1.0 - completed_fraction * (1.0 - t_mul)) /
                    tf.math.log(t_mul))

                sum_r = (1.0 - t_mul**i_restart) / (1.0 - t_mul)
                completed_fraction = (completed_fraction - sum_r) / t_mul**i_restart

            else:
                i_restart = tf.floor(completed_fraction)
                completed_fraction -= i_restart

            return i_restart, completed_fraction

        i_restart, completed_fraction = tf.cond(
            tf.equal(t_mul, 1.0),
            lambda: compute_step(completed_fraction, geometric=False),
            lambda: compute_step(completed_fraction, geometric=True))

        m_fac = m_mul**i_restart
        cosine_decayed = 0.5 * m_fac * (1.0 + tf.cos(
            tf.constant(math.pi, dtype=dtype) * completed_fraction))
        decayed = (1 - alpha) * cosine_decayed + alpha

        return tf.multiply(initial_learning_rate, decayed, name=name)

    def get_config(self):
        return {
            "initial_learning_rate": self.initial_learning_rate,
            "first_decay_steps": self.first_decay_steps,
            "t_mul": self._t_mul,
            "m_mul": self._m_mul,
            "alpha": self.alpha,
            "name": self.name
        }

lr_decay = CosineDecayRestarts(
    INITIAL_LEARNING_RATE, DECAY_STEPS
)

sgd = tf.keras.optimizers.SGD(learning_rate=lr_decay, momentum=.90)
loss_object = tf.keras.losses.CategoricalCrossentropy()

In [None]:
K.clear_session()

model.compile(
    optimizer=sgd,
    loss=loss_object,
    metrics=['acc']
)

history = model.fit(
    train_dataset,
    validation_data=valid_dataset,
    epochs=EPOCHS, 
    callbacks=[checkpoint]
)