## Importing Libraries

In [1]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import re
import numpy as np
import pandas as pd
import random
import math
from sklearn import metrics
from sklearn.model_selection import train_test_split
import tensorflow as tf
import efficientnet.tfkeras as efn
from tensorflow.keras import backend as K
import tensorflow_addons as tfa
from tqdm.notebook import tqdm as tqdm
import cv2
import pathlib
import operator
import gc
import pathlib
import shutil
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import backend as K
from scipy import spatial
import cv2
import efficientnet.tfkeras as efn
import math

## Setting Global Configs

In [2]:
# config

# For tf.dataset
AUTO = tf.data.experimental.AUTOTUNE

# Configuration
EPOCHS = 2 # 3hrs
BATCH_SIZE = 32
IMAGE_SIZE = [32, 32] # Effecientnet accepts this format
# Seed
SEED = 100
# Learning rate
LR = 0.0001
# Number of classes
NUMBER_OF_CLASSES = 996
strategy = tf.distribute.get_strategy()



In [3]:
train_df = pd.read_csv('landmark-recognition-2020/train_encoded.csv')
sample_df = train_df.sample(frac=0.7, random_state=1234)
sample_df.head()

Unnamed: 0,id,landmark_id,landmark_id_encode,group
1042527,1c3654818c768b2c,134466,54000,32.0
613554,fed703fc407ea6cb,78875,31731,23.0
158442,a781aff8a80fe610,20409,8211,38.0
795783,7317abdb5266aaf6,102839,41351,31.0
1042519,143ad48bc9990f56,134466,54000,25.0


In [4]:
sample_df_classes = sample_df['landmark_id_encode'].value_counts()
sample_df_classes_with_at_least_100 = sample_df_classes[sample_df_classes >= 100]
# images = 175809, landmarks = 996


In [5]:
# Seed everything
random.seed(SEED)
np.random.seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
tf.random.set_seed(SEED)

In [6]:
# Model Training

train_dir = 'Data/train'
test_dir = 'Data/test'
val_dir = 'Data/val'
datagen = ImageDataGenerator(rescale = 1./255)

In [7]:
def make_train_generator():
    train_generator = datagen.flow_from_directory(train_dir,target_size=IMAGE_SIZE,batch_size=BATCH_SIZE,class_mode='sparse')
    return train_generator

train_dataset = tf.data.Dataset.from_generator(make_train_generator,(tf.float32, tf.float32))

Metal device set to: Apple M1

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2022-04-09 16:27:51.384271: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-04-09 16:27:51.384387: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [8]:
def make_val_generator():
    val_generator = datagen.flow_from_directory(val_dir,target_size=IMAGE_SIZE,batch_size=BATCH_SIZE,class_mode='sparse', shuffle=False)
    return val_generator

val_dataset = tf.data.Dataset.from_generator(make_val_generator,(tf.float32, tf.float32))

In [9]:
NUM_TRAINING_IMAGES = 98428
NUM_VALIDATION_IMAGES = 42219
STEPS_PER_EPOCH = NUM_TRAINING_IMAGES // BATCH_SIZE

In [10]:
# This function output the data so that we can use arcface
def arcface_format(image, target):
    return {'inp1': image, 'inp2': target}, target

In [11]:
train_dataset = train_dataset.map(arcface_format,num_parallel_calls = AUTO)

In [12]:
val_dataset = val_dataset.map(arcface_format,num_parallel_calls = AUTO)

In [13]:
train_dataset

<ParallelMapDataset element_spec=({'inp1': TensorSpec(shape=<unknown>, dtype=tf.float32, name=None), 'inp2': TensorSpec(shape=<unknown>, dtype=tf.float32, name=None)}, TensorSpec(shape=<unknown>, dtype=tf.float32, name=None))>

In [14]:
class ArcMarginProduct(tf.keras.layers.Layer):
    '''
    Implements large margin arc distance.

    Reference:
        https://arxiv.org/pdf/1801.07698.pdf
        https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution/
            blob/master/src/modeling/metric_learning.py
    '''
    def __init__(self, n_classes, s=30, m=0.50, easy_margin=False,
                 ls_eps=0.0, **kwargs):

        super(ArcMarginProduct, self).__init__(**kwargs)

        self.n_classes = n_classes
        self.s = s
        self.m = m
        self.ls_eps = ls_eps
        self.easy_margin = easy_margin
        self.cos_m = tf.math.cos(m)
        self.sin_m = tf.math.sin(m)
        self.th = tf.math.cos(math.pi - m)
        self.mm = tf.math.sin(math.pi - m) * m

    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'n_classes': self.n_classes,
            's': self.s,
            'm': self.m,
            'ls_eps': self.ls_eps,
            'easy_margin': self.easy_margin,
        })
        return config

    def build(self, input_shape):
        super(ArcMarginProduct, self).build(input_shape[0])

        self.W = self.add_weight(
            name='W',
            shape=(int(input_shape[0][-1]), self.n_classes),
            initializer='glorot_uniform',
            dtype='float32',
            trainable=True,
            regularizer=None)

    def call(self, inputs):
        X, y = inputs
        y = tf.cast(y, dtype=tf.int32)
        cosine = tf.matmul(
            tf.math.l2_normalize(X, axis=1),
            tf.math.l2_normalize(self.W, axis=0)
        )
        sine = tf.math.sqrt(1.0 - tf.math.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = tf.where(cosine > 0, phi, cosine)
        else:
            phi = tf.where(cosine > self.th, phi, cosine - self.mm)
        one_hot = tf.cast(
            tf.one_hot(y, depth=self.n_classes),
            dtype=cosine.dtype
        )
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.n_classes

        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        return output


In [15]:
def develop_model():

    with strategy.scope():

        margin = ArcMarginProduct(
            n_classes = NUMBER_OF_CLASSES, 
            s = 64, 
            m = 0.05, 
            name='head/arc_margin', 
            dtype='float32'
            )

        inp = tf.keras.layers.Input(shape = (*IMAGE_SIZE, 3), name = 'inp1')
        label = tf.keras.layers.Input(shape = (), name = 'inp2')
        x0 = efn.EfficientNetB0(weights = 'imagenet',include_top = False, input_shape=(*IMAGE_SIZE, 3))(inp)
        x = tf.keras.layers.GlobalAveragePooling2D()(x0)
        x = tf.keras.layers.Dropout(0.3)(x)
        x = tf.keras.layers.Dense(512)(x)
        x = margin([x, label])
        
        output = tf.keras.layers.Softmax(dtype='float32')(x)

        model = tf.keras.models.Model(inputs = [inp, label], outputs = [output])

        opt = tf.keras.optimizers.Adam(learning_rate = LR)

        model.compile(
            optimizer = opt,
            loss = [tf.keras.losses.SparseCategoricalCrossentropy()],
            metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]
            ) 
        
        return model

In [16]:
model = develop_model()

In [17]:
# Train and evaluate our model
history = model.fit(train_dataset,  
                    steps_per_epoch = STEPS_PER_EPOCH,
                    # validation_steps=validation_generator.samples//BATCH_SIZE,
                    epochs = 2,
                    validation_data = val_dataset,
                    verbose=1
                    )

Epoch 1/2


2022-04-09 16:28:12.544736: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-04-09 16:28:15.625759: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-09 16:28:15.935213: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "GPU" } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


Found 98428 images belonging to 996 classes.

2022-04-09 16:33:57.388739: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-04-09 16:33:57.473121: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "GPU" } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


Found 42219 images belonging to 996 classes.


KeyboardInterrupt: 

In [None]:
# Try with 32x32.


In [216]:
NUM_EMBEDDING_DIMENSIONS = 512
DATASET_DIR = 'landmark-recognition-2020/train_encoded.csv'
TEST_IMAGE_DIR = 'landmark-recognition-2020/test'
TRAIN_IMAGE_DIR = 'landmark-recognition-2020/train'
MODEL1 = model
MODEL1 = tf.keras.models.Model(inputs = MODEL1.input[0], outputs = MODEL1.layers[-4].output)
NUM_TO_RERANK = 1
NUM_PUBLIC_TEST_IMAGES = 10345 # Used to detect if in session or re-run.

In [225]:

# Read image and resize it
def read_image(image_path, size = (384, 384)):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, size)
    img = cv2.imencode('.jpg', img, (cv2.IMWRITE_JPEG_QUALITY, 100))[1].tostring()
    img = tf.image.decode_jpeg(img, channels = 3)
    img = tf.image.resize(img, (384, 384))
    img = tf.cast(img, tf.float32) / 255.0
    img = tf.reshape(img, [1, 384, 384, 3])
    return img

# Function to get training and test embeddings
def generate_embeddings(filepaths):
    image_paths = [x for x in pathlib.Path(filepaths).rglob('*.jpg')][:100]
    num_images = len(image_paths)
    ids = num_images * [None]
    # Generate an empty matrix where we can store the embeddings of each image
    embeddings = np.empty((num_images, NUM_EMBEDDING_DIMENSIONS))
    for i, image_path in enumerate(image_paths):
        ids[i] = image_path.name.split('.')[0]
        image_tensor = read_image(str(image_path), (384, 384))
        prediction = MODEL1.predict(image_tensor)
        embeddings[i, :] = prediction
    return ids, embeddings

# This function get the most similar train images for each test image based on cosine similarity
def get_similarities(train_csv, test_directory, train_directory):
    # Get target dictionary
    df = pd.read_csv(train_csv)
    df = df[['id', 'landmark_id']]
    df.set_index('id', inplace = True)
    df = df.to_dict()['landmark_id']
    # Extract the test ids and global feature for the test images
    test_ids, test_embeddings = generate_embeddings(test_directory)
    # Extract the train ids and global features for the train images
    train_ids, train_embeddings = generate_embeddings(train_directory)
    # Initiate a list were we will store the similar training images for each test image (also score)
    train_ids_labels_and_scores = [None] * test_embeddings.shape[0]
    # Using (slow) for-loop, as distance matrix doesn't fit in memory
    for test_index in range(test_embeddings.shape[0]):
        distances = spatial.distance.cdist(
            test_embeddings[np.newaxis, test_index, : ], train_embeddings, 'cosine')[0]
        # Get the indices of the closest images
        top_k = np.argpartition(distances, NUM_TO_RERANK)[:NUM_TO_RERANK]
        # Get the nearest ids and distances using the previous indices
        nearest = sorted([(train_ids[p], distances[p]) for p in top_k], key = lambda x: x[1])
        # Get the labels and score results
        train_ids_labels_and_scores[test_index] = [(df[train_id], 1.0 - cosine_distance) for \
                                                   train_id, cosine_distance in nearest]
        
    del test_embeddings
    del train_embeddings
    gc.collect()
    return test_ids, train_ids_labels_and_scores

# This function aggregate top simlarities and make predictions
def generate_predictions(test_ids, train_ids_labels_and_scores):
    targets = []
    scores = []
    
    # Iterate through each test id
    for test_index, test_id in enumerate(test_ids):
        aggregate_scores = {}
        # Iterate through the similar images with their corresponing score for the given test image
        for target, score in train_ids_labels_and_scores[test_index]:
            if target not in aggregate_scores:
                aggregate_scores[target] = 0
            aggregate_scores[target] += score
        # Get the best score
        target, score = max(aggregate_scores.items(), key = operator.itemgetter(1))
        targets.append(target)
        scores.append(score)
        
    final = pd.DataFrame({'id': test_ids, 'target': targets, 'scores': scores})
    final['landmarks'] = final['target'].astype(str) + ' ' + final['scores'].astype(str)
    final[['id', 'landmarks']].to_csv('submission.csv', index = False)
    return final

In [226]:
image_paths = [x for x in pathlib.Path(TEST_IMAGE_DIR).rglob('*.jpg')]

In [227]:
test_len = len(image_paths)
test_len

10345

In [229]:
test_ids, train_ids_labels_and_scores = get_similarities(DATASET_DIR, TEST_IMAGE_DIR, TRAIN_IMAGE_DIR)

  img = cv2.imencode('.jpg', img, (cv2.IMWRITE_JPEG_QUALITY, 100))[1].tostring()


In [230]:
final = generate_predictions(test_ids, train_ids_labels_and_scores)

In [231]:
final

Unnamed: 0,id,target,scores,landmarks
0,999dceaeeed8caf9,3835,0.313815,3835 0.31381453276392346
1,990255f7964cf539,132505,0.376544,132505 0.37654368407135674
2,990b80c71118df0a,117623,0.432461,117623 0.4324607164613049
3,997a84232ec25a53,33992,0.353848,33992 0.35384802615999456
4,997f68d90399dbe0,97345,0.520745,97345 0.5207448350738543
...,...,...,...,...
95,97dc0313f1693828,43728,0.301886,43728 0.30188577806035166
96,974867f7466fe7d9,136684,0.357139,136684 0.3571387137544382
97,9742af625b2f2b36,93712,0.501953,93712 0.5019533949269013
98,97477917f5651732,186426,0.301657,186426 0.3016572810764271


In [209]:
actuals = pd.read_csv('train_label_to_category.csv')

In [234]:
actuals[actuals['landmark_id']==2188]['category'].values[0]

'http://commons.wikimedia.org/wiki/Category:Glanfurt'

In [235]:
actuals

Unnamed: 0,landmark_id,category
0,0,http://commons.wikimedia.org/wiki/Category:Hap...
1,1,http://commons.wikimedia.org/wiki/Category:Lui...
2,2,http://commons.wikimedia.org/wiki/Category:Gra...
3,3,http://commons.wikimedia.org/wiki/Category:Twe...
4,4,http://commons.wikimedia.org/wiki/Category:San...
...,...,...
203089,203089,http://commons.wikimedia.org/wiki/Category:Isa...
203090,203090,http://commons.wikimedia.org/wiki/Category:Mar...
203091,203091,http://commons.wikimedia.org/wiki/Category:Sil...
203092,203092,http://commons.wikimedia.org/wiki/Category:Hoa_Lu


In [236]:
encoded_actuals = pd.read_csv(DATASET_DIR)
final_encoded = encoded_actuals[encoded_actuals['landmark_id'].isin(final['target'])]
final_encoded

Unnamed: 0,id,landmark_id,landmark_id_encode,group
0,17660ef415d37059,1,0,1.0
1,92b6290d571448f6,1,0,3.0
2,cd41bf948edc0340,1,0,0.0
3,fb09f1e98c6d2f70,1,0,2.0
4,25c9dfc7ea69838d,7,1,9.0
...,...,...,...,...
1580465,72c3b1c367e3d559,203092,81312,12.0
1580466,7a6a2d9ea92684a6,203092,81312,18.0
1580467,9401fad4c497e1f9,203092,81312,14.0
1580468,aacc960c9a228b5f,203092,81312,19.0


In [None]:
sample_df_classes_with_at_least_100.values