In [None]:
%matplotlib inline

There has been a lot of confusion on how exactly we are supposed to submit our model. As the ***Data*** section of the competition states:
> Your model must be named submission.zip and be compatible with TensorFlow 2.2. The submission.zip should contain all files and directories created by the tf.saved_model_save function using Tensorflow's SavedModel format.

Now question is what exactly in the [SavedModel](https://www.tensorflow.org/tutorials/keras/save_and_load#savedmodel_format) format do we need to submit.

Also, majority of us don't want to use tensorflow to train our models. And we don't know how to preprocess. So we'll tackle two things mainly.

1. Use our own keras model in submission.
2. How to preprocess.

Let's get started.

Let's reverse engineer the model that organisers gave us as baseline. We'll use saved_model_cli to visualize it's structure. You may want to check out this [discussion thread](https://www.kaggle.com/c/landmark-retrieval-2020/discussion/163589).

In [None]:
!saved_model_cli show --dir "../input/baseline-landmark-retrieval-model/baseline_landmark_retrieval_model" --all

Important things to notice are:

    inputs['input_image'] tensor_info:
    dtype: DT_UINT8
    shape: (-1, -1, 3)
        
    outputs['global_descriptor'] tensor_info:
    dtype: DT_FLOAT
    shape: (2048)

Armed with this information, let's create our own model.

In [None]:
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

import os
import cv2
import glob

import tensorflow as tf
import keras
from keras.models import load_model, save_model
from keras.layers import Input, GlobalAveragePooling2D, GlobalMaxPooling2D
import keras.backend as K
from keras.models import Model, load_model
from keras.applications import VGG16
from keras.applications.vgg16 import preprocess_input

In [None]:
# import the data
train_df = pd.read_csv('../input/landmark-retrieval-2020/train.csv')
train_df.head()

In [None]:
train_df[train_df.id == '000171b259e48280']

In [None]:
# check the jpg images for the same landmark id
train_df[train_df.landmark_id == 5724]

In [None]:
# first get the number of images with the same idnumber
number_image = train_df[train_df.landmark_id == 107382].count().values[1]
number_image

In [None]:
for number in np.arange(0,number_image):
    print (number)

the rule of naming the picture depends on the id, which start with the number or letter.

In [None]:
# check the folder structure of train folder
arr = os.listdir('../input/landmark-retrieval-2020/train/0/0/0/')
print(arr)

In [None]:
train_df[train_df.landmark_id == 107382].id.values[1]

In [None]:
# split the name of id and find the path to the image
folder_path_number = []

for i in np.arange(0,3):
    folder_path_number.append(train_df[train_df.landmark_id == 107382].id.values[0][i])

folder_path_number

In [None]:
# path of image
nameofimage = train_df[train_df.landmark_id == 107382].id.values[0]
image_path = (f'../input/landmark-retrieval-2020/train/{folder_path_number[0]}/{folder_path_number[1]}/{folder_path_number[2]}/{nameofimage}.jpg')
image_path

In [None]:
# show the image
image = cv2.imread(image_path)
plt.figure(figsize = (12,9))
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.axis('off')

In [None]:
# the shape of image with 3 bit RGB info
image.shape

In [None]:
number_image = train_df[train_df.landmark_id == 5724].count().values[1]
length = 8*number_image
fig, axes = plt.subplots(1, number_image, figsize=(length, 6))

for number in np.arange(0,number_image): 
    folder_path_number = []
    for i in np.arange(0,3):
        folder_path_number.append(train_df[train_df.landmark_id == 5724].id.values[number][i])
    
    nameofimage = train_df[train_df.landmark_id == 5724].id.values[number]
    #print (nameofimage)
    image_path = (f'../input/landmark-retrieval-2020/train/{folder_path_number[0]}/{folder_path_number[1]}/{folder_path_number[2]}/{nameofimage}.jpg')
    #print (image_path)
    image = cv2.imread(image_path)
    axes[number].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[number].axis('off')
    axes[number].set_title(f'id of jpg image: {nameofimage}')

In [None]:
# define the function to get the path of image
def get_path(landmark_id):
    # first get the number of images with the same idnumber
    number_image = train_df[train_df.landmark_id == landmark_id].count().values[1]
    
    image_path = []
    for number in np.arange(0,number_image):
        folder_path_number = []    
        for i in np.arange(0,3):
            folder_path_number.append(train_df[train_df.landmark_id == landmark_id].id.values[number][i])
          
        nameofimage = train_df[train_df.landmark_id == landmark_id].id.values[0]
        image_path.append((f'../input/landmark-retrieval-2020/train/{folder_path_number[0]}/{folder_path_number[1]}/{folder_path_number[2]}/{nameofimage}.jpg')) 
        
    return image_path

In [None]:
# print the image with the same landmark id
def print_image(landmark_id):
    # first get the number of images with the same idnumber
    number_image = train_df[train_df.landmark_id == landmark_id].count().values[1]
    
    # build a figure subplot within a row
    length = 8*number_image
    fig, axes = plt.subplots(1, number_image, figsize=(length, 6))    
    
    for number in np.arange(0,number_image): 
        folder_path_number = []
        for i in np.arange(0,3):
            folder_path_number.append(train_df[train_df.landmark_id == landmark_id].id.values[number][i])
    
        nameofimage = train_df[train_df.landmark_id == landmark_id].id.values[number]
        #print (nameofimage)
        image_path = (f'../input/landmark-retrieval-2020/train/{folder_path_number[0]}/{folder_path_number[1]}/{folder_path_number[2]}/{nameofimage}.jpg')
        #print (image_path)
        image = cv2.imread(image_path)
        axes[number].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        axes[number].axis('off')
        axes[number].set_title(f'id of jpg image: {nameofimage}')

In [None]:
get_path(107382)

In [None]:
print_image(107382)

In [None]:
# the number of the unique landmark in the dataset
print ('The number of the unique landmark id in the dataset is', train_df.landmark_id.nunique())

There are varying shapes of images as you can see below, meaning we'll need to resize images inside the model.

In [None]:
files = glob.glob("../input/landmark-retrieval-2020/train/a/b/c/*.jpg")
for i in range(10):
    im = cv2.imread(files[i])
    print(im.shape)

Now let's load our model. In this case the vanilla VGG16 pretrained model of Keras for demonstration purposes. Since this is not trained on any retrieval dataset, the score will most probably be zero.

In [None]:
vgg = VGG16(input_shape=(224,224,3), weights=None, include_top=False)
vgg.load_weights("../input/keras-pretrained-models/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5")

input_image = Input((224,224,3))
x = vgg(input_image)
output = GlobalMaxPooling2D()(x)

model = Model(inputs=[input_image], outputs=[output])
model.summary()

Now the main part! The *input_image* will be in it's own variable shape and hence we need to resize it within the model.

In [None]:
import tensorflow as tf

class MyModel(tf.keras.Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.model = model
    
    @tf.function(input_signature=[
      tf.TensorSpec(shape=[None, None, 3], dtype=tf.uint8, name='input_image')
    ])
    def call(self, input_image):
        output_tensors = {}
        
        # resizing
        im = tf.image.resize(input_image, (224,224))
        
        # preprocessing
        im = preprocess_input(im)
        
        extracted_features = self.model(tf.convert_to_tensor([im], dtype=tf.uint8))[0]
        output_tensors['global_descriptor'] = tf.identity(extracted_features, name='global_descriptor')
        return output_tensors

Now we create and save our model instance.

In [None]:
m = MyModel() #creating our model instance

served_function = m.call
tf.saved_model.save(
      m, export_dir="./my_model", signatures={'serving_default': served_function})

In [None]:
!ls ./my_model/variables

In [None]:
from zipfile import ZipFile

with ZipFile('submission.zip','w') as zip:           
    zip.write('./my_model/saved_model.pb', arcname='saved_model.pb') 
    zip.write('./my_model/variables/variables.data-00000-of-00001', arcname='variables/variables.data-00000-of-00001')
    #zip.write('./my_model/variables/variables.data-00001-of-00002', arcname='variables/variables.data-00001-of-00002') 
    zip.write('./my_model/variables/variables.index', arcname='variables/variables.index') 
    #zip.write('./my_model/assets', arcname='assets') 

Last but not the least, let's visualize our model to see if the structure is as per the requirements.

In [None]:
!saved_model_cli show --dir ./my_model/ --all

Please upvote and let me know if this helps!