## Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import cv2
import tensorflow as tf
import pathlib
from tqdm import tqdm
# %pip install -q efficientnet
# %pip install tensorflow-addons

## Creating TF Records: To save memory consumption.

### Image Feature Conversion Functions

In [3]:

def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def serialize_example(feature0, feature1, feature2):
    feature = {
        'id': _bytes_feature(feature0),
        'image': _bytes_feature(feature1),
        'target': _int64_feature(feature2)
    }
    example_proto = tf.train.Example(features = tf.train.Features(feature = feature))
    return example_proto.SerializeToString()

### Storing Images as TF records

In [37]:

TRAIN_IMAGE_DIR = 'landmark-recognition-2020/train'
TRAIN = 'landmark-recognition-2020/train_encoded.csv'
image_paths = [x for x in pathlib.Path(TRAIN_IMAGE_DIR).rglob('*.jpg')]

In [38]:
# # Read image and resize it
def read_image(image_path, size = (384, 384)):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, size)
    return img
def get_tf_records(record = 0, size = (384, 384)):
    # Get only one group, this is a slow process so we need to make 50 different sessions
    df_rec = df[df['group'] == record]
    # Reset index 
    df_rec.reset_index(drop = True, inplace = True)
    # Get a list of ids
    ids_list = list(df_rec['id'].unique())
    # Write tf records
    with tf.io.TFRecordWriter('train_{}.tfrec'.format(record)) as writer:
        for image_path in tqdm(image_paths):
            image_id = image_path.name.split('.')[0]
            if image_id in ids_list:
                # Get target
                target = df_rec[df_rec['id'] == image_id]['landmark_id_encode']
                img = read_image(str(image_path), size)
                img = cv2.imencode('.jpg', img, (cv2.IMWRITE_JPEG_QUALITY, 100))[1].tostring()
                example = serialize_example(
                    str.encode(image_id), img, target.values[0]
                )
                writer.write(example)

In [23]:

df = pd.read_csv(TRAIN)
get_tf_records(record = 0, size = (384, 384))

  img = cv2.imencode('.jpg', img, (cv2.IMWRITE_JPEG_QUALITY, 100))[1].tostring()
2022-04-08 00:43:43.291446: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-04-08 00:43:43.291723: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
  0%|          | 182/1580470 [00:00<28:35, 921.33it/s]

Metal device set to: Apple M1

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



100%|██████████| 1580470/1580470 [19:04<00:00, 1381.46it/s]


In [39]:
for i in range(1,50):
    get_tf_records(record = i, size = (384, 384))

  img = cv2.imencode('.jpg', img, (cv2.IMWRITE_JPEG_QUALITY, 100))[1].tostring()
100%|██████████| 1580470/1580470 [19:08<00:00, 1375.89it/s]
100%|██████████| 1580470/1580470 [19:09<00:00, 1374.36it/s]
100%|██████████| 1580470/1580470 [19:07<00:00, 1377.75it/s]
100%|██████████| 1580470/1580470 [18:33<00:00, 1419.74it/s]
100%|██████████| 1580470/1580470 [18:26<00:00, 1428.67it/s]
100%|██████████| 1580470/1580470 [18:33<00:00, 1419.15it/s]
100%|██████████| 1580470/1580470 [18:48<00:00, 1400.00it/s]
100%|██████████| 1580470/1580470 [18:45<00:00, 1404.05it/s]
100%|██████████| 1580470/1580470 [18:47<00:00, 1402.25it/s]
100%|██████████| 1580470/1580470 [18:52<00:00, 1396.12it/s]
100%|██████████| 1580470/1580470 [18:47<00:00, 1402.32it/s]
100%|██████████| 1580470/1580470 [18:25<00:00, 1429.01it/s]
100%|██████████| 1580470/1580470 [18:16<00:00, 1441.37it/s]
100%|██████████| 1580470/1580470 [17:58<00:00, 1465.58it/s]
100%|██████████| 1580470/1580470 [17:48<00:00, 1478.61it/s]
100%|██████████| 15

KeyboardInterrupt: 

## Modeling

In [None]:
import os
import re
import numpy as np
import pandas as pd
import random
import math
from sklearn import metrics
from sklearn.model_selection import train_test_split
import tensorflow as tf
import efficientnet.tfkeras as efn
from tensorflow.keras import backend as K
import tensorflow_addons as tfa
from tqdm.notebook import tqdm as tqdm

In [192]:
ground_truths = pd.read_csv(TRAIN)

In [43]:
AUTO = tf.data.experimental.AUTOTUNE
AUTO

-1

In [119]:
# For tf.dataset
AUTO = tf.data.experimental.AUTOTUNE

# # Data access
DICT_PATH = 'landmark-recognition-2020/train_encoded.csv'

# Configuration
EPOCHS = 5
BATCH_SIZE = 32
IMAGE_SIZE = [384, 384]
# Seed
SEED = 100
# Learning rate
LR = 0.0001
# Number of classes
NUMBER_OF_CLASSES = 66672
strategy = tf.distribute.get_strategy()

# # Training filenames directory
FILENAMES = tf.io.gfile.glob('./train*.tfrec')
# # Read csv file
df = pd.read_csv(DICT_PATH)

In [120]:
FILENAMES = FILENAMES[:5]
NUMBER_OF_CLASSES = 37874
BATCH_SIZE = 32

In [121]:
FILENAMES

['./train_5.tfrec',
 './train_10.tfrec',
 './train_12.tfrec',
 './train_7.tfrec',
 './train_3.tfrec']

In [122]:

# Using 20% of the data to validate
TRAINING_FILENAMES, VALIDATION_FILENAMES = train_test_split(FILENAMES, test_size = 0.20, random_state = SEED)
training_groups = [int(re.compile(r"_([0-9]*)\.").search(filename).group(1)) for filename in TRAINING_FILENAMES]
validation_groups = [int(re.compile(r"_([0-9]*)\.").search(filename).group(1)) for filename in VALIDATION_FILENAMES]
n_trn_classes = df[df['group'].isin(training_groups)]['landmark_id_encode'].nunique()
n_val_classes = df[df['group'].isin(validation_groups)]['landmark_id_encode'].nunique()
print(f'The number of unique training classes is {n_trn_classes} of {NUMBER_OF_CLASSES} total classes')
print(f'The number of unique validation classes is {n_val_classes} of {NUMBER_OF_CLASSES} total classes')


The number of unique training classes is 37349 of 37874 total classes
The number of unique validation classes is 24639 of 37874 total classes


In [123]:

# Seed everything
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    tf.random.set_seed(seed)

# Function to decode our images (normalize and reshape)
def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels = 3)
    # Convert image to floats in [0, 1] range
    image = tf.cast(image, tf.float32) / 255.0
    # Explicit size needed for TPU
    image = tf.reshape(image, [*IMAGE_SIZE, 3])
    return image

# This function parse our images and also get the target variable
def read_tfrecord(example):
    TFREC_FORMAT = {
        # tf.string means bytestring
        "image": tf.io.FixedLenFeature([], tf.string), 
        # shape [] means single element
        "target": tf.io.FixedLenFeature([], tf.int64)
        }
    example = tf.io.parse_single_example(example, TFREC_FORMAT)
    image = decode_image(example['image'])
    target = tf.cast(example['target'], tf.int32)
    return image, target

# This function load our tf records and parse our data with the previous function
def load_dataset(filenames, ordered = False):
    # Read from TFRecords. For optimal performance, reading from multiple files at once and
    # Diregarding data order. Order does not matter since we will be shuffling the data anyway
    
    ignore_order = tf.data.Options()
    if not ordered:
        # Disable order, increase speed
        ignore_order.experimental_deterministic = False 
        
    # Automatically interleaves reads from multiple files
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads = AUTO)
    # Use data as soon as it streams in, rather than in its original order
    dataset = dataset.with_options(ignore_order)
    # Returns a dataset of (image, label) pairs
    dataset = dataset.map(read_tfrecord, num_parallel_calls = AUTO) 
    return dataset

# This function output the data so that we can use arcface
def arcface_format(image, target):
    return {'inp1': image, 'inp2': target}, target

# Training data pipeline
def get_training_dataset(filenames, ordered = False):
    dataset = load_dataset(filenames, ordered = ordered)
    dataset = dataset.map(arcface_format, num_parallel_calls = AUTO)
    # The training dataset must repeat for several epochs
    dataset = dataset.repeat() 
    dataset = dataset.shuffle(2048)
    dataset = dataset.batch(BATCH_SIZE)
    # Prefetch next batch while training (autotune prefetch buffer size)
    dataset = dataset.prefetch(AUTO)
    return dataset

# Validation data pipeline
def get_validation_dataset(filenames, ordered = True, prediction = False):
    dataset = load_dataset(filenames, ordered = ordered)
    dataset = dataset.map(arcface_format, num_parallel_calls = AUTO)
    # If we are in prediction mode, use bigger batch size for faster prediction
    if prediction:
        dataset = dataset.batch(BATCH_SIZE * 4)
    else:
        dataset = dataset.batch(BATCH_SIZE)
    # Prefetch next batch while training (autotune prefetch buffer size)
    dataset = dataset.prefetch(AUTO) 
    return dataset

# Count the number of observations with the tabular csv
def count_data_items(filenames):
    records = [int(re.compile(r"_([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    df = pd.read_csv(DICT_PATH)
    n = df[df['group'].isin(records)].shape[0]
    return n


In [124]:

NUM_TRAINING_IMAGES = count_data_items(TRAINING_FILENAMES)
NUM_VALIDATION_IMAGES  = count_data_items(VALIDATION_FILENAMES)
print(f'Training with {NUM_TRAINING_IMAGES} images')
print(f'Validating with {NUM_VALIDATION_IMAGES} images')


Training with 126440 images
Validating with 31610 images


In [125]:

# Function for a custom learning rate scheduler with warmup and decay
def get_lr_callback():
    lr_start   = 0.000001
    lr_max     = 0.0000005 * BATCH_SIZE
    lr_min     = 0.000001
    lr_ramp_ep = 5
    lr_sus_ep  = 0
    lr_decay   = 0.8
   
    def lrfn(epoch):
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start   
        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max    
        else:
            lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min    
        return lr

    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose = False)
    return lr_callback

In [126]:
# Function to calculate global average precision score
def gap_vector(pred, conf, true, return_x = False):
    '''
    Compute Global Average Precision (aka micro AP), the metric for the
    Google Landmark Recognition competition. 
    This function takes predictions, labels and confidence scores as vectors.
    In both predictions and ground-truth, use None/np.nan for "no label".

    Args:
        pred: vector of integer-coded predictions
        conf: vector of probability or confidence scores for pred
        true: vector of integer-coded labels for ground truth
        return_x: also return the data frame used in the calculation

    Returns:
        GAP score
    '''
    x = pd.DataFrame({'pred': pred, 'conf': conf, 'true': true})
    x.sort_values('conf', ascending = False, inplace = True, na_position = 'last')
    x['correct'] = (x.true == x.pred).astype(int)
    x['prec_k'] = x.correct.cumsum() / (np.arange(len(x)) + 1)
    x['term'] = x.prec_k * x.correct
    gap = x.term.sum() / x.true.count()
    if return_x:
        return gap, x
    else:
        return gap


In [127]:
class ArcMarginProduct(tf.keras.layers.Layer):
    '''
    Implements large margin arc distance.

    Reference:
        https://arxiv.org/pdf/1801.07698.pdf
        https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution/
            blob/master/src/modeling/metric_learning.py
    '''
    def __init__(self, n_classes, s=30, m=0.50, easy_margin=False,
                 ls_eps=0.0, **kwargs):

        super(ArcMarginProduct, self).__init__(**kwargs)

        self.n_classes = n_classes
        self.s = s
        self.m = m
        self.ls_eps = ls_eps
        self.easy_margin = easy_margin
        self.cos_m = tf.math.cos(m)
        self.sin_m = tf.math.sin(m)
        self.th = tf.math.cos(math.pi - m)
        self.mm = tf.math.sin(math.pi - m) * m

    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'n_classes': self.n_classes,
            's': self.s,
            'm': self.m,
            'ls_eps': self.ls_eps,
            'easy_margin': self.easy_margin,
        })
        return config

    def build(self, input_shape):
        super(ArcMarginProduct, self).build(input_shape[0])

        self.W = self.add_weight(
            name='W',
            shape=(int(input_shape[0][-1]), self.n_classes),
            initializer='glorot_uniform',
            dtype='float32',
            trainable=True,
            regularizer=None)

    def call(self, inputs):
        X, y = inputs
        y = tf.cast(y, dtype=tf.int32)
        cosine = tf.matmul(
            tf.math.l2_normalize(X, axis=1),
            tf.math.l2_normalize(self.W, axis=0)
        )
        sine = tf.math.sqrt(1.0 - tf.math.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = tf.where(cosine > 0, phi, cosine)
        else:
            phi = tf.where(cosine > self.th, phi, cosine - self.mm)
        one_hot = tf.cast(
            tf.one_hot(y, depth=self.n_classes),
            dtype=cosine.dtype
        )
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.n_classes

        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        return output


In [128]:
# Function to build our model using fine tunning (efficientnet)
def get_model():

    with strategy.scope():

        margin = ArcMarginProduct(
            n_classes = NUMBER_OF_CLASSES, 
            s = 64, 
            m = 0.05, 
            name='head/arc_margin', 
            dtype='float32'
            )

        inp = tf.keras.layers.Input(shape = (*IMAGE_SIZE, 3), name = 'inp1')
        label = tf.keras.layers.Input(shape = (), name = 'inp2')
        x0 = efn.EfficientNetB0(weights = 'imagenet', include_top = False)(inp)
        x = tf.keras.layers.GlobalAveragePooling2D()(x0)
        x = tf.keras.layers.Dropout(0.3)(x)
        x = tf.keras.layers.Dense(512)(x)
        x = margin([x, label])
        
        output = tf.keras.layers.Softmax(dtype='float32')(x)

        model = tf.keras.models.Model(inputs = [inp, label], outputs = [output])

        opt = tf.keras.optimizers.Adam(learning_rate = LR)

        model.compile(
            optimizer = opt,
            loss = [tf.keras.losses.SparseCategoricalCrossentropy()],
            metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]
            ) 
        
        return model

In [129]:
# Seed everything
seed_everything(SEED)

# Build training and validation generators
train_dataset = get_training_dataset(TRAINING_FILENAMES, ordered = False)
val_dataset = get_validation_dataset(VALIDATION_FILENAMES, ordered = True, prediction = False)
STEPS_PER_EPOCH = NUM_TRAINING_IMAGES // BATCH_SIZE


In [130]:
model = get_model()
# Using a checkpoint to save best model (want the entire model, not only the weights)
checkpoint = tf.keras.callbacks.ModelCheckpoint(f'baseline_model_effb0_arcface.h5', 
                                                 monitor = 'val_loss', 
                                                 save_best_only = True, 
                                                 save_weights_only = False)
# Using learning rate scheduler
cb_lr_schedule = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss', 
                                                       mode = 'min', 
                                                       factor = 0.5, 
                                                       patience = 1, 
                                                       verbose = 1, 
                                                       min_delta = 0.0001)


In [131]:

# Train and evaluate our model
history = model.fit(train_dataset,  
                    steps_per_epoch = STEPS_PER_EPOCH,
                    epochs = EPOCHS,
                    # callbacks = [get_lr_callback(), checkpoint],
                    validation_data = val_dataset,
                    verbose = 1
                    )


Epoch 1/5


2022-04-08 12:00:05.368435: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


  60/3951 [..............................] - ETA: 3:27:09 - loss: 8.1922 - sparse_categorical_accuracy: 0.0000e+00

KeyboardInterrupt: 

In [None]:

# # Restart tpu
# tf.tpu.experimental.initialize_tpu_system(tpu)
# # Load best model
# model = tf.keras.models.load_model('/content/drive/My Drive/Models/baseline_model_effb0_arcface.h5')

# # Reset val dataset, now in prediction mode
# val_dataset = get_validation_dataset(VALIDATION_FILENAMES, ordered = True, prediction = True)
# # Get ground truth target for the fold
# val_target = val_dataset.map(lambda image, target: target).unbatch()
# val_targets = list(next(iter(val_target.batch(NUM_VALIDATION_IMAGES))).numpy())

#  # Predictions
# val_image = val_dataset.map(lambda image, target: image['inp1'])
# # Transform validation dataset as a numpy iterator
# val_image = val_image.as_numpy_iterator()
# # Initiate empty list to store predictions and confidences
# target_predictions = []
# target_confidences = []
# # Iterate over validation images and predict in batches of 1024 images
# batches = math.ceil(NUM_VALIDATION_IMAGES / (BATCH_SIZE * 4))
# for image in tqdm(val_image, total = batches):
#     prediction = model.predict(image)
#     target_prediction = np.argmax(prediction, axis = -1)
#     target_confidence = np.max(prediction, axis = -1)
#     target_predictions.extend(list(target_prediction))
#     target_confidences.extend(list(target_confidence))

# # Calculate global average precision for the fold
# gap = gap_vector(target_predictions, target_confidences, val_targets)
# accuracy_score = metrics.accuracy_score(val_targets, target_predictions)
# print(f'Our global average precision score is {gap}')
# print(f'Our accuracy score is {accuracy_score}')

# New Thing

In [185]:
train_df = pd.read_csv('landmark-recognition-2020/train_encoded.csv')
sample_df = train_df.sample(frac=0.7, random_state=1234)
sample_df.head()

Unnamed: 0,id,landmark_id,landmark_id_encode,group
1042527,1c3654818c768b2c,134466,54000,32.0
613554,fed703fc407ea6cb,78875,31731,23.0
158442,a781aff8a80fe610,20409,8211,38.0
795783,7317abdb5266aaf6,102839,41351,31.0
1042519,143ad48bc9990f56,134466,54000,25.0


In [186]:
sample_df_classes = sample_df['landmark_id_encode'].value_counts()

In [187]:
sample_df_classes_with_at_least_100 = sample_df_classes[sample_df_classes >= 100]
sample_df_classes_with_at_least_100

55807    4379
50843    1578
8211     1229
33462    1199
45494     802
         ... 
54074     100
49146     100
72633     100
61231     100
40704     100
Name: landmark_id_encode, Length: 996, dtype: int64

In [188]:
sample_df_classes_with_at_least_100.sum()

175809

In [189]:
for clas in sample_df_classes_with_at_least_100.index:
    if not os.path.exists(f'Data/train/{clas}'):
        os.makedirs(f'Data/train/{clas}')
    if not os.path.exists(f'Data/test/{clas}'):
        os.makedirs(f'Data/test/{clas}')
    if not os.path.exists(f'Data/val/{clas}'):
        os.makedirs(f'Data/val/{clas}')

In [177]:
import shutil

In [190]:

for clas in sample_df_classes_with_at_least_100.index:
    train_images_df = sample_df[sample_df['landmark_id_encode'] == clas].sample(frac=0.8)
    test_df = sample_df[(sample_df['landmark_id_encode'] == clas) & ~sample_df['id'].isin(train_images_df['id'])]
    train_df = train_images_df.sample(frac=0.7)
    val_df = train_images_df[~train_images_df['id'].isin(train_df['id'])]
    
    for img in train_df['id'].values:
        f1 = img[0]
        f2 = img[1]
        f3 = img[2]
        img_path = f"landmark-recognition-2020/train/{f1}/{f2}/{f3}/{img}.jpg"
        if not os.path.exists(f'Data/train/{clas}/{img}.jpg'):
            shutil.copyfile(img_path, f'Data/train/{clas}/{img}.jpg')
    for img in test_df['id'].values:
        f1 = img[0]
        f2 = img[1]
        f3 = img[2]
        img_path = f"landmark-recognition-2020/train/{f1}/{f2}/{f3}/{img}.jpg"
        if not os.path.exists(f'Data/test/{clas}/{img}.jpg'):
            shutil.copyfile(img_path, f'Data/test/{clas}/{img}.jpg')
    for img in val_df['id'].values:
        f1 = img[0]
        f2 = img[1]
        f3 = img[2]
        img_path = f"landmark-recognition-2020/train/{f1}/{f2}/{f3}/{img}.jpg"
        if not os.path.exists(f'Data/val/{clas}/{img}.jpg'):
            shutil.copyfile(img_path, f'Data/val/{clas}/{img}.jpg')