<a href="https://colab.research.google.com/github/ssawant/kaggle-competitions/blob/main/Google_Landmark_Recognition_2021.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
print(tf.__version__)

2.6.0


In [None]:
!pip install kaggle

In [4]:
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [5]:
!kaggle --version

Kaggle API 1.5.4


In [86]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import random
import pickle
import os
import shutil

from tensorflow import keras

In [83]:
# Install EfficientnetV2 model

! pip install git+https://github.com/sebastian-sz/efficientnet-v2-keras@main --no-deps

from efficientnet_v2 import EfficientNetV2S as ENetV2

# efficientnet_v2s
# WEIGHTS = "imagenet++" #imagenet-21k-ft1k
WEIGHTS = "imagenet-21k-ft1k"
IMG_SIZE = 384
IMG_PAD = 16  # for random cropping

Collecting git+https://github.com/sebastian-sz/efficientnet-v2-keras@main
  Cloning https://github.com/sebastian-sz/efficientnet-v2-keras (to revision main) to /tmp/pip-req-build-p9ywxb5t
  Running command git clone -q https://github.com/sebastian-sz/efficientnet-v2-keras /tmp/pip-req-build-p9ywxb5t


In [8]:
# Init Parameters

N_CLASSES = 81313
N_RECORDS = 1580470

SEED = 4672
random.seed(SEED)

# batching and tuning strategies for TPUs
BUFFER = 2048  # for shuffling
BATCH = 128 #* strategy.num_replicas_in_sync
AUTO = tf.data.experimental.AUTOTUNE

In [9]:
# Kaggle download dataset
!kaggle datasets list -s 'Landmark Recognition 2021 TFRecords 384'

ref                                                            title                                              size  lastUpdated          downloadCount  
-------------------------------------------------------------  ------------------------------------------------  -----  -------------------  -------------  
markwijkhuizen/landmark-recognition-2021-tfrecords-384-part-1  Landmark Recognition 2021 TFRecords 384 Part 1     17GB  2021-08-18 18:26:00             26  
markwijkhuizen/landmark-recognition-2021-tfrecords-384-part-2  Landmark Recognition 2021 TFRecords 384 Part 2     17GB  2021-08-18 18:26:29             15  
markwijkhuizen/landmark-recognition-2021-tfrecords-384-part-3  Landmark Recognition 2021 TFRecords 384 Part 3     17GB  2021-08-18 18:26:56             13  
ankursingh12/resized-plant2021                                 resized_plant2021                                   1GB  2021-03-17 08:45:09           1034  
jpmiller/connect-four-datasets                            

In [10]:
!kaggle datasets download 'markwijkhuizen/landmark-recognition-2021-tfrecords-384-part-1'
!kaggle datasets download 'markwijkhuizen/landmark-recognition-2021-tfrecords-384-part-2'
!kaggle datasets download 'markwijkhuizen/landmark-recognition-2021-tfrecords-384-part-3'

Downloading landmark-recognition-2021-tfrecords-384-part-1.zip to /content
100% 17.2G/17.2G [02:36<00:00, 118MB/s]
100% 17.2G/17.2G [02:36<00:00, 118MB/s]
Downloading landmark-recognition-2021-tfrecords-384-part-2.zip to /content
100% 17.2G/17.2G [02:31<00:00, 169MB/s]
100% 17.2G/17.2G [02:32<00:00, 122MB/s]
Downloading landmark-recognition-2021-tfrecords-384-part-3.zip to /content
100% 17.2G/17.2G [02:32<00:00, 118MB/s]
100% 17.2G/17.2G [02:32<00:00, 121MB/s]


In [None]:
!unzip '/content/landmark-recognition-2021-tfrecords-384-part-1.zip' -d landmark-recognition-2021-tfrecords-384-part-1
!unzip '/content/landmark-recognition-2021-tfrecords-384-part-2.zip' -d landmark-recognition-2021-tfrecords-384-part-2
!unzip '/content/landmark-recognition-2021-tfrecords-384-part-3.zip' -d landmark-recognition-2021-tfrecords-384-part-3


In [None]:
"""
gs://kds-3c3b1b2c873502801f5b1fd7cfde0ff9b1d2d27c43b2302c2886a4f5
gs://kds-4bc4f0d3ed65b1df5fc5b5348504a475b64efda176a19e3f6afab4dd
gs://kds-5d1316b17087454bfff62f35c28ab619b8f98fa37c7afdb8f64a0311
"""

In [12]:
TFR_1, TFR_2, TFR_3 = '/content/landmark-recognition-2021-tfrecords-384-part-1', '/content/landmark-recognition-2021-tfrecords-384-part-2', '/content/landmark-recognition-2021-tfrecords-384-part-3'
TFRECORDS = (
    tf.io.gfile.glob(f'{TFR_1}/*.tfrecords') +
    tf.io.gfile.glob(f'{TFR_2}/*.tfrecords') +
    tf.io.gfile.glob(f'{TFR_3}/*.tfrecords')
)

# First shuffle
random.shuffle(TFRECORDS)

In [106]:
GCS_TFR_1 = 'gs://kds-3c3b1b2c873502801f5b1fd7cfde0ff9b1d2d27c43b2302c2886a4f5'
GCS_TFR_2 = 'gs://kds-4bc4f0d3ed65b1df5fc5b5348504a475b64efda176a19e3f6afab4dd'
GCS_TFR_3 = 'gs://kds-5d1316b17087454bfff62f35c28ab619b8f98fa37c7afdb8f64a0311'

GCS_TFRECORDS = (
    tf.io.gfile.glob(f'{GCS_TFR_1}/*.tfrecords') +
    tf.io.gfile.glob(f'{GCS_TFR_2}/*.tfrecords') +
    tf.io.gfile.glob(f'{GCS_TFR_3}/*.tfrecords')
)

# First shuffle
random.shuffle(GCS_TFRECORDS)

In [13]:
# TPU Boilerplate

try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print("Running on TPU ", tpu.cluster_spec().as_dict()["worker"])
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
except ValueError:
    print("Not connected to a TPU runtime. Using CPU/GPU strategy")
    strategy = tf.distribute.MirroredStrategy()

Running on TPU  ['10.16.231.130:8470']
INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Initializing the TPU system: grpc://10.16.231.130:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.16.231.130:8470


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


In [117]:
# Define the mapping function to extract, transform and load images for training

# TF Record format defined during creation
# www.kaggle.com/markwijkhuizen/google-landmark-recognition-2021-tfrecords-res-384

tfrecord_format = {
    'image': tf.io.FixedLenFeature([], tf.string),
    'label': tf.io.FixedLenFeature([], tf.int64),
    'width': tf.io.FixedLenFeature([], tf.int64),
    'height': tf.io.FixedLenFeature([], tf.int64),
}

def decode_tfrecord(record_bytes):
  features = tf.io.parse_single_example(record_bytes, tfrecord_format)

  # load the image and cast to bfloat16
  img = tf.io.decode_jpeg(features['image'])
  img = tf.cast(img, tf.bfloat16)

  # reshape image to require resoluation
  img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE))

  # agument
  img = tf.image.random_flip_left_right(img)
  # img = tf.image.random_crop(img, (IMG_SIZE, IMG_SIZE, 3))

  # normalize
  img = tf.math.divide(img, 127.5)
  img = tf.math.subtract(img, 1.0)

  # one hot encode label
  label = tf.cast(features['label'], tf.int32)
  label_one_hot = tf.one_hot(
      label,
      N_CLASSES,
      dtype= tf.uint8
  )

  return {"image": img, "label": label}, label_one_hot

with strategy.scope():

    # improve performance by ignoring order
    data_options = tf.data.Options()
    data_options.experimental_deterministic = False

    # extract dataset from TFRecords
    ds = tf.data.TFRecordDataset(GCS_TFRECORDS, num_parallel_reads=AUTO)
    ds = ds.with_options(data_options).shuffle(BUFFER)
    ds = ds.map(decode_tfrecord, num_parallel_calls=AUTO)
    ds = ds.repeat().batch(BATCH).prefetch(AUTO)

In [118]:
# ArcFace and GeM custom layers

class ArcMarginPenalty(tf.keras.layers.Layer):
    """ ArcFace: Additive Angular Margin Loss
    Loss function to enhance discriminative power of DNNs.
    
    Applies an additive angular margin penalty that
    increases the geodesic distance gap (i.e. separability)
    between closest classes when applying softmax.
    
    https://arxiv.org/abs/1801.07698
    https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution
    https://github.com/peteryuX/arcface-tf2
    
    n_classes -- number of unique classes
    margin -- margin magnitude
    scale -- constant scaling factor for output logists
    """

    def __init__(self, n_classes, margin=0.5, scale=64, **kwargs):
        self.n_classes = n_classes
        self.update_margin_scale(margin, scale)
        super(ArcMarginPenalty, self).__init__(**kwargs)

    def get_config(self):
        config = super(ArcMarginPenalty, self).get_config().copy()
        config.update({
            'n_classes': self.n_classes,
            'margin': self.margin,
            'scale': self.scale,
        })
        return config

    def build(self, input_shape):
        super(ArcMarginPenalty, self).build(input_shape[0])
        self.w = self.add_weight(
            "weights",
            shape=(int(input_shape[0][-1]), self.n_classes)
        )

    def update_margin_scale(self, margin, scale):
        self.margin = margin
        self.scale = scale
        self.cos_m = tf.identity(tf.math.cos(self.margin))
        self.sin_m = tf.identity(tf.math.sin(self.margin))
        self.th = tf.identity(tf.math.cos(np.pi - self.margin))
        self.mm = tf.multiply(self.sin_m, self.margin)

    def call(self, inputs, training=None):

        embds, labels = inputs

        if training is None:
            training = tf.keras.backend.learning_phase()

        # calculate cos(theta)
        cos_t = tf.matmul(
            tf.nn.l2_normalize(embds, axis=1),
            tf.nn.l2_normalize(self.w, axis=0)
        )

        # for inference return cosine similarity
        if not training:
            return cos_t

        # add margin, i.e. cos(theta+m)
        sin_t = tf.math.sqrt(1.0 - tf.math.pow(cos_t, 2))
        cos_mt = cos_t * self.cos_m - sin_t * self.sin_m

        # ensure theta+m lies in the range [0, pi]
        cos_mt = tf.where(cos_t > self.th, cos_mt, cos_t - self.mm)

        # retrieve logists and scale
        mask = tf.one_hot(labels, depth=self.n_classes, dtype=cos_t.dtype)
        output = (mask * cos_mt) + ((1.0 - mask) * cos_t)
        return output * self.scale


class GeneralizedMeanPooling(tf.keras.layers.Layer):
    """ Compute the generalized mean of each channel in a tensor
    trainable parameter p increases contrast of the
    pooled feature map to focus on salient features of the
    image. (1: average pooling, infinite: max pooling)

    pool_size -- downscale factor
    init_norm -- initial magnitude for p
    normalize -- apply L2-normalisation to output
    """

    def __init__(self, pool_size, init_norm=3.0, normalize=False, **kwargs):
        self.pool_size = pool_size
        self.init_norm = init_norm
        self.normalize = normalize
        super(GeneralizedMeanPooling, self).__init__(**kwargs)
        
    def get_config(self):
        config = super(GeneralizedMeanPooling, self).get_config().copy()
        config.update({
            'pool_size': self.pool_size,
            'init_norm': self.init_norm,
            'normalize': self.normalize,
        })
        return config

    def build(self, input_shape):
        super(GeneralizedMeanPooling, self).build(input_shape)
        feature_size = input_shape[-1]
        self.p = self.add_weight(
            name='norms',
            shape=(feature_size,),
            initializer=tf.keras.initializers.constant(self.init_norm),
            trainable=True
        )

    def call(self, inputs):
        x = inputs
        x = tf.math.maximum(x, 1e-6)
        x = tf.pow(x, self.p)
        x = tf.nn.avg_pool(x, self.pool_size, self.pool_size, "VALID")
        x = tf.pow(x, 1.0 / self.p)
        if self.normalize:
            x = tf.nn.l2_normalize(x, 1)
        return x

    def compute_output_shape(self, input_shape):
        return tuple([None, input_shape[-1]])

In [40]:
import tensorflow_hub as hub

os.environ["TFHUB_MODEL_LOAD_FORMAT"] = "UNCOMPRESSED"

efficientnet_v2s = 'https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_s/classification/2'

feature_extractor_model = hub.load(efficientnet_v2s)


In [119]:
from tensorflow.keras.applications.efficientnet import EfficientNetB4

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Dropout

In [120]:
# Define the model

# first step: build model with frozen pretrained weights
# - at this stage a large learning rate is used
# - a small arcmargin is used in the initial stages
def build_model(n_classes):

    # two inputs: images and labels
    image = keras.layers.Input(
        shape=(IMG_SIZE, IMG_SIZE, 3),
        name="image",
        dtype=tf.bfloat16
    )
    
    label = keras.layers.Input(
        shape=(),
        name="label",
        dtype=tf.int32
    )

    # # load model without top + freeze pretrained weights
    # model = ENetV2(
    #     input_tensor=image,
    #     include_top=False,
    #     pooling=None,
    #     weights=WEIGHTS
    # )
    # model.trainable = False

    model = EfficientNetB4(
        include_top=False, 
        weights='imagenet', 
        input_tensor=image
        )
  
    model.trainable = False
    
    # pool model output
    x = model.output
  
    x = GeneralizedMeanPooling(12)(x) # imgsize / 32

    # squeeze
    x = keras.layers.Flatten()(x)
    x = keras.layers.Dense(
        512,
        kernel_initializer="he_normal",
        name="squeeze_dense"
    )(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.PReLU()(x)  # trainable leaky ReLU

    # arcface
    output = ArcMarginPenalty(
        n_classes,
        margin=0.3,
        scale=40,
        name="arcface",
        dtype=tf.float32
    )([x, label])

    # Compile
    model = keras.Model(inputs=[image, label], outputs=[output])
    # model = keras.Model(inputs=[image], outputs=[output])
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.01),
        loss=keras.losses.CategoricalCrossentropy(from_logits=True),
        metrics=["accuracy"]
    )

    return model

# after initial training all layers except for batchnorm are unfrozen
# - the learning rate is dramatically decreased
# - margin is increased to increase discriminative power
def unfreeze_model(model):

    for layer in model.layers:
        if not isinstance(layer, keras.layers.BatchNormalization):
            layer.trainable = True

    model.get_layer("arcface").update_margin_scale(0.5, 40)

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.0005),
        loss=keras.losses.CategoricalCrossentropy(from_logits=True),
        metrics=["accuracy"]
    )

In [121]:
# build model within scope to use TPUs
with strategy.scope():
    model = build_model(N_CLASSES)

In [None]:
# Initial training stage - no validation is used
# Number of epochs was determined from previous investigations

EPOCHS = 5
STAGES = int(np.ceil(N_RECORDS / BATCH))

tape = model.fit(
    ds,
    epochs=EPOCHS,
    steps_per_epoch=STAGES,
)

fig, ax = plt.subplots(1, 2, figsize=(8, 4))
ax[0].plot(tape.history["loss"], label="train")
ax[1].plot(tape.history["accuracy"], label="train")

Epoch 1/5
Epoch 2/5
Epoch 3/5