Evaluated with :
- EffNet B3
- img_size 512
- 1/3 train data
- 15 epochs

|                   Augmentations                  | Val loss |
|:------------------------------------------------:|:--------:|
| baseline augs                                    | 14.50    |
| no augs                                          | 14.45    |
| central_crop(image, 0.9)                         | 14.32    |
| random_crop(image,[int(dim0.8), int(dim0.8), 3]) | 14.23    |
| transform(image, DIM=dim)                        | 14.52    |
| random_flip_left_right(image)                    | 14.41    |
| random_flip_up_down(image)                       | 14.52    |
| random_hue(image, 0.01)                          | 14.42    |
| random_saturation(image, 0.7, 1.3)               | 14.39    |
| random_contrast(image, 0.8, 1.2)                 | 14.41    |
| random_brightness(image, 0.1)                    | 14.42    |
| central_crop+random_crop+flip_left_right         | 14.20    |

Evaluated with :
- no augmentations
- img_size 512
- 1/3 train data
- 15 epochs

| EffNet w/o aug | Val loss |
|----------------|----------|
| B0             |  14.70   |
| B1             |  14.58   |
| B2             |  14.47   |
| B3             |  14.45   |
| B4             |  14.43   |
| B5             |  14.40   |
| B6             |  14.37   |
| B7             |  14.24   |

Evaluated with :
- no augmentations
- EffNet B3
- 1/3 train data
- 15 epochs

| Image size | Val loss |
|------------|----------|
| 256        |  14.95   |
| 384        |  14.55   |
| 512        |  14.45   |

Evaluated with :
- 3 augs
- EffNet B3
- 1/3 train data
- 15 epochs

| Weight        | Val loss |
|---------------|----------|
| imagenet      |   14.20  |
| noisy-student |   14.25  |

Evaluated with :
- 3 augs
- EffNet B3
- 1/3 train data
- 15 epochs

| s  | m   | Val loss |
|----|-----|----------|
| 30 | 0.2 |    8.46  |
| 30 | 0.3 |   10.40  |
| 30 | 0.4 |   12.32  |
| 30 | 0.5 |   14.20  |
| 30 | 0.6 |   16.12  |

| s  | m   | Val loss |
|----|-----|----------|
| 20 | 0.5 |   10.84  |
| 25 | 0.5 |   12.49  |
| 30 | 0.5 |   14.20  |
| 35 | 0.5 |   15.99  |

| Margin      | Val loss |
|-------------|----------|
| default     |   14.20  |
| easy_margin |   10.57  |

| ls_eps  | Val loss |
|---------|----------|
| 0.0     |   14.20  |
| 0.001   |   14.23  |
| 0.01    |   14.14  |
| 0.05    |   13.72  |
| 0.1     |   13.26  |

Evaluated with : (https://www.kaggle.com/chankhavu/keras-layers-arcface-cosface-adacos)
- 3 augs
- EffNet B3
- 1/3 train data
- 15 epochs

| Weight  | Val loss |
|---------|----------|
| ArcFace |   14.20  |
| CosFace |          |
| AdaCos  |          |

Evaluated with :
- 3 augs
- 1/3 train data
- 15 epochs

| Model       | Val loss |
|-------------|----------|
| EffNet B3   |   14.20  |
| Xception    |   15.94  |
| InceptionV3 |   16.70  |
| MobileNetV2 |   22.29  |
| VGG16       |   15.10  |
| VGG19       |   24.26  |
| ResNet50    |   23.37  |
| ResNet101   |   22.85  |
| DenseNet121 |   17.07  |
| DenseNet169 |   16.59  |
| DenseNet201 |   16.30  |

In [None]:
!pip install -q efficientnet
!pip install tensorflow_addons
import re
import os
import numpy as np
import pandas as pd
import random
import math
import tensorflow as tf
import efficientnet.tfkeras as efn
from sklearn import metrics
from sklearn.model_selection import KFold, train_test_split
from tensorflow.keras import backend as K
import tensorflow_addons as tfa
from tqdm.notebook import tqdm
from kaggle_datasets import KaggleDatasets

In [None]:
try:
    # TPU detection. No parameters necessary if TPU_NAME environment variable is
    # set: this is always the case on Kaggle.
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
else:
    # Default distribution strategy in Tensorflow. Works on CPU and single GPU.
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
ROT_ = 180.0
SHR_ = 2.0
HZOOM_ = 8.0
WZOOM_ = 8.0
HSHIFT_ = 8.0
WSHIFT_ = 8.0

def get_mat(rotation, shear, height_zoom, width_zoom, height_shift, width_shift):
    # returns 3x3 transformmatrix which transforms indicies
        
    # CONVERT DEGREES TO RADIANS
    rotation = math.pi * rotation / 180.
    shear    = math.pi * shear    / 180.

    def get_3x3_mat(lst):
        return tf.reshape(tf.concat([lst],axis=0), [3,3])
    
    # ROTATION MATRIX
    c1   = tf.math.cos(rotation)
    s1   = tf.math.sin(rotation)
    one  = tf.constant([1],dtype='float32')
    zero = tf.constant([0],dtype='float32')
    
    rotation_matrix = get_3x3_mat([c1,   s1,   zero, 
                                   -s1,  c1,   zero, 
                                   zero, zero, one])    
    # SHEAR MATRIX
    c2 = tf.math.cos(shear)
    s2 = tf.math.sin(shear)    
    
    shear_matrix = get_3x3_mat([one,  s2,   zero, 
                                zero, c2,   zero, 
                                zero, zero, one])        
    # ZOOM MATRIX
    zoom_matrix = get_3x3_mat([one/height_zoom, zero,           zero, 
                               zero,            one/width_zoom, zero, 
                               zero,            zero,           one])    
    # SHIFT MATRIX
    shift_matrix = get_3x3_mat([one,  zero, height_shift, 
                                zero, one,  width_shift, 
                                zero, zero, one])
    
    return K.dot(K.dot(rotation_matrix, shear_matrix), 
                 K.dot(zoom_matrix,     shift_matrix))

def transform(image, DIM=256):    
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image randomly rotated, sheared, zoomed, and shifted
    XDIM = DIM%2 #fix for size 331
    
    rot = ROT_ * tf.random.normal([1], dtype='float32')
    shr = SHR_ * tf.random.normal([1], dtype='float32') 
    h_zoom = 1.0 + tf.random.normal([1], dtype='float32') / HZOOM_
    w_zoom = 1.0 + tf.random.normal([1], dtype='float32') / WZOOM_
    h_shift = HSHIFT_ * tf.random.normal([1], dtype='float32') 
    w_shift = WSHIFT_ * tf.random.normal([1], dtype='float32') 

    # GET TRANSFORMATION MATRIX
    m = get_mat(rot,shr,h_zoom,w_zoom,h_shift,w_shift) 

    # LIST DESTINATION PIXEL INDICES
    x   = tf.repeat(tf.range(DIM//2, -DIM//2,-1), DIM)
    y   = tf.tile(tf.range(-DIM//2, DIM//2), [DIM])
    z   = tf.ones([DIM*DIM], dtype='int32')
    idx = tf.stack( [x,y,z] )
    
    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    idx2 = K.dot(m, tf.cast(idx, dtype='float32'))
    idx2 = K.cast(idx2, dtype='int32')
    idx2 = K.clip(idx2, -DIM//2+XDIM+1, DIM//2)
    
    # FIND ORIGIN PIXEL VALUES           
    idx3 = tf.stack([DIM//2-idx2[0,], DIM//2-1+idx2[1,]])
    d    = tf.gather_nd(image, tf.transpose(idx3))
        
    return tf.reshape(d,[DIM, DIM,3])

In [None]:
# For tf.dataset
AUTO = tf.data.experimental.AUTOTUNE

# Data access
GCS_PATH = KaggleDatasets().get_gcs_path('shopee-tf-records-512-stratified')

# Configuration
EPOCHS = 15
BATCH_SIZE = 32 * strategy.num_replicas_in_sync
IMAGE_SIZE = [512, 512]
# Seed
SEED = 42
# Learning rate
LR = 0.001
# Verbosity
VERBOSE = 2
# Number of classes
N_CLASSES = 11014
# Number of folds
FOLDS = 5

# Training filenames directory
TRAINING_FILENAMES = tf.io.gfile.glob(GCS_PATH + '/*.tfrec')

In [None]:
# Function to get our f1 score
def f1_score(y_true, y_pred):
    y_true = y_true.apply(lambda x: set(x.split()))
    y_pred = y_pred.apply(lambda x: set(x.split()))
    intersection = np.array([len(x[0] & x[1]) for x in zip(y_true, y_pred)])
    len_y_pred = y_pred.apply(lambda x: len(x)).values
    len_y_true = y_true.apply(lambda x: len(x)).values
    f1 = 2 * intersection / (len_y_pred + len_y_true)
    return f1

# Function to seed everything
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    tf.random.set_seed(seed)
    
def arcface_format(posting_id, image, label_group, matches):
    return posting_id, {'inp1': image, 'inp2': label_group}, label_group, matches

# Data augmentation function
def data_augment(posting_id, image, label_group, matches):
    dim = image.shape[0]
    if tf.random.uniform([1])[0] < 0.4:
        image = tf.image.central_crop(image, 0.9)
        image = tf.image.resize_with_pad(image, dim, dim)
    if tf.random.uniform([1])[0] <= 0.7:
        image = tf.image.random_crop(image,[int(dim*0.8), int(dim*0.8), 3])
        image = tf.image.resize_with_pad(image, dim, dim)
    if tf.random.uniform([1])[0] < 0.5:
        image = transform(image, DIM=dim)
    if tf.random.uniform([1])[0] < 0.5:
        image = tf.image.random_flip_left_right(image)
#     if tf.random.uniform([1])[0] < 0.5:
#         image = tf.image.random_flip_up_down(image)
    if tf.random.uniform([1])[0] < 0.5:
        image = tf.image.random_hue(image, 0.01)
    if tf.random.uniform([1])[0] < 0.5:
        image = tf.image.random_saturation(image, 0.7, 1.3)
    if tf.random.uniform([1])[0] < 0.5:
        image = tf.image.random_contrast(image, 0.8, 1.2)
    if tf.random.uniform([1])[0] < 0.5:
        image = tf.image.random_brightness(image, 0.1)
    return posting_id, image, label_group, matches

# Function to decode our images
def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels = 3)
    image = tf.image.resize(image, IMAGE_SIZE)
    image = tf.cast(image, tf.float32) / 255.0
    return image

# This function parse our images and also get the target variable
def read_labeled_tfrecord(example):
    LABELED_TFREC_FORMAT = {
        "posting_id": tf.io.FixedLenFeature([], tf.string),
        "image": tf.io.FixedLenFeature([], tf.string),
        "label_group": tf.io.FixedLenFeature([], tf.int64),
        "matches": tf.io.FixedLenFeature([], tf.string)
    }

    example = tf.io.parse_single_example(example, LABELED_TFREC_FORMAT)
    posting_id = example['posting_id']
    image = decode_image(example['image'])
#     label_group = tf.one_hot(tf.cast(example['label_group'], tf.int32), depth = N_CLASSES)
    label_group = tf.cast(example['label_group'], tf.int32)
    matches = example['matches']
    return posting_id, image, label_group, matches

# This function loads TF Records and parse them into tensors
def load_dataset(filenames, ordered = False):
    
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False 
        
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads = AUTO)
    dataset = dataset.with_options(ignore_order)
    dataset = dataset.map(read_labeled_tfrecord, num_parallel_calls = AUTO) 
    return dataset

# This function is to get our training tensors
def get_training_dataset(filenames, ordered = False):
    dataset = load_dataset(filenames, ordered = ordered)
    dataset = dataset.map(data_augment, num_parallel_calls = AUTO)
    dataset = dataset.map(arcface_format, num_parallel_calls = AUTO)
    dataset = dataset.repeat()
    dataset = dataset.shuffle(2048)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTO)
    return dataset

# This function is to get our validation tensors
def get_validation_dataset(filenames, ordered = True):
    dataset = load_dataset(filenames, ordered = ordered)
    dataset = dataset.map(arcface_format, num_parallel_calls = AUTO)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTO) 
    return dataset

# Function to count how many photos we have in
def count_data_items(filenames):
    # The number of data items is written in the name of the .tfrec files, i.e. flowers00-230.tfrec = 230 data items
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)

NUM_TRAINING_IMAGES = count_data_items(TRAINING_FILENAMES)
print(f'Dataset: {NUM_TRAINING_IMAGES} training images')

In [None]:
# Function for a custom learning rate scheduler with warmup and decay
def get_lr_callback():
    lr_start   = 0.000001
    lr_max     = 0.000005 * BATCH_SIZE
    lr_min     = 0.000001#0.000001
    lr_ramp_ep = 5
    lr_sus_ep  = 0
    lr_decay   = 0.8
   
    def lrfn(epoch):
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start   
        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max    
        else:
            lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min    
        return lr

    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose = True)
    return lr_callback

# Arcmarginproduct class keras layer
class ArcMarginProduct(tf.keras.layers.Layer):
    '''
    Implements large margin arc distance.

    Reference:
        https://arxiv.org/pdf/1801.07698.pdf
        https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution/
            blob/master/src/modeling/metric_learning.py
    '''
    def __init__(self, n_classes, s=30, m=0.50, easy_margin=False,
                 ls_eps=0.0, **kwargs):

        super(ArcMarginProduct, self).__init__(**kwargs)

        self.n_classes = n_classes
        self.s = s
        self.m = m
        self.ls_eps = ls_eps
        self.easy_margin = easy_margin
        self.cos_m = tf.math.cos(m)
        self.sin_m = tf.math.sin(m)
        self.th = tf.math.cos(math.pi - m)
        self.mm = tf.math.sin(math.pi - m) * m

    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'n_classes': self.n_classes,
            's': self.s,
            'm': self.m,
            'ls_eps': self.ls_eps,
            'easy_margin': self.easy_margin,
        })
        return config

    def build(self, input_shape):
        super(ArcMarginProduct, self).build(input_shape[0])

        self.W = self.add_weight(
            name='W',
            shape=(int(input_shape[0][-1]), self.n_classes),
            initializer='glorot_uniform',
            dtype='float32',
            trainable=True,
            regularizer=None)

    def call(self, inputs):
        X, y = inputs
        y = tf.cast(y, dtype=tf.int32)
        cosine = tf.matmul(
            tf.math.l2_normalize(X, axis=1),
            tf.math.l2_normalize(self.W, axis=0)
        )
        sine = tf.math.sqrt(1.0 - tf.math.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = tf.where(cosine > 0, phi, cosine)
        else:
            phi = tf.where(cosine > self.th, phi, cosine - self.mm)
        one_hot = tf.cast(
            tf.one_hot(y, depth=self.n_classes),
            dtype=cosine.dtype
        )
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.n_classes

        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        return output

# from tensorflow.keras.applications.xception import Xception
# from tensorflow.keras.applications.inception_v3 import InceptionV3
# from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
# from tensorflow.keras.applications.vgg16 import VGG16
# from tensorflow.keras.applications.vgg19 import VGG19
# from tensorflow.keras.applications.resnet import ResNet50, ResNet101, ResNet152
# from tensorflow.keras.applications.densenet import DenseNet121, DenseNet169, DenseNet201

def get_model():

    with strategy.scope():

        margin = ArcMarginProduct(
            n_classes = N_CLASSES, 
            s = 30, 
            m = 0.5,
#             ls_eps = 0.05,
            name='head/arc_margin', 
            dtype='float32'
            )

        inp = tf.keras.layers.Input(shape = (*IMAGE_SIZE, 3), name = 'inp1')
        label = tf.keras.layers.Input(shape = (), name = 'inp2')
        x = efn.EfficientNetB3(weights = 'imagenet', include_top = False)(inp)
#         x = ResNet101(weights = 'imagenet', include_top = False)(inp)
        x = tf.keras.layers.GlobalAveragePooling2D()(x)
        x = margin([x, label])
        
        output = tf.keras.layers.Softmax(dtype='float32')(x)

        model = tf.keras.models.Model(inputs = [inp, label], outputs = [output])

        opt = tf.keras.optimizers.Adam(learning_rate = LR)

        model.compile(
            optimizer = opt,
            loss = [tf.keras.losses.SparseCategoricalCrossentropy()],
            metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]
            ) 
        
        return model

def train_and_evaluate():

    # Seed everything
    seed_everything(SEED)
    
    print('\n')
    print('-'*50)
    train, valid = train_test_split(TRAINING_FILENAMES, shuffle = True, random_state = SEED, train_size = 0.35)
    train_dataset = get_training_dataset(train, ordered = False)
    train_dataset = train_dataset.map(lambda posting_id, image, label_group, matches: (image, label_group))
    val_dataset = get_validation_dataset(valid, ordered = True)
    val_dataset = val_dataset.map(lambda posting_id, image, label_group, matches: (image, label_group))
    STEPS_PER_EPOCH = count_data_items(train) // BATCH_SIZE
    K.clear_session()
    model = get_model()
    # Model checkpoint
    checkpoint = tf.keras.callbacks.ModelCheckpoint(f'model_{IMAGE_SIZE[0]}_{SEED}.h5', 
                                                    monitor = 'val_loss', 
                                                    verbose = VERBOSE, 
                                                    save_best_only = True,
                                                    save_weights_only = True, 
                                                    mode = 'min')

    history = model.fit(train_dataset,
                        steps_per_epoch = STEPS_PER_EPOCH,
                        epochs = EPOCHS,
                        callbacks = [checkpoint, get_lr_callback()], 
                        validation_data = val_dataset,
                        verbose = VERBOSE)

    
train_and_evaluate()