# Load all the required library

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import multiprocessing
from PIL import Image
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import Activation, Dropout, Flatten, Dense, Conv2D, MaxPooling2D, SeparableConv2D, BatchNormalization, GlobalMaxPooling2D
from keras import Model
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.applications.xception import Xception
from keras.models import load_model

In [None]:
train_label = pd.read_csv('../input/landmark-recognition-2020/train.csv', dtype=str)
test_label = pd.read_csv('../input/landmark-recognition-2020/sample_submission.csv', dtype=str)

In [None]:
print("The total number of pictures in the train dataset:", len(train_label))
print("The total number of landmarks in the train dataset:", train_label.landmark_id.nunique())
print("The total number of pictures in the test dataset:", len(test_label))

In [None]:
# Sort the original train.csv by landmark_id frequency
train_sort = pd.DataFrame(train_label['landmark_id'].value_counts(sort = True, ascending = False)) 
train_sort.reset_index(inplace=True) 
train_sort.columns=['landmark_id','count']
train_sort

In [None]:
# Stratify the training data by landmark_id, with 30% sample
train_label_stratified = train_label.groupby("landmark_id", group_keys=False).apply(lambda x: x.sample(frac = 0.3, random_state = 123))
print("The total number of pictures in the stratified dataset:", len(train_label_stratified))
print("The total number of landmarks in the stratified dataset:", train_label_stratified.landmark_id.nunique())

In [None]:
#train_label_stratified = train_label.loc[train_label['landmark_id'].isin([x for x in list(train_sort[0:2500].landmark_id)])]
#print("The total number of pictures in the selected dataset:", len(train_label_stratified))
#print("The total number of landmarks in the selected dataset:", train_label_stratified.landmark_id.nunique())


In [None]:
train_sort_stratified = pd.DataFrame(train_label_stratified['landmark_id'].value_counts()) 
train_sort_stratified.reset_index(inplace=True) 
train_sort_stratified.columns=['landmark_id','count']
train_sort_stratified

In [None]:
# rename the id to match with the image file path in the training/test folder
train_label_stratified["id"] = train_label_stratified.id.str[0]+"/"+train_label_stratified.id.str[1]+"/"+train_label_stratified.id.str[2]+"/"+train_label_stratified.id+".jpg"
test_label["id"] = test_label.id.str[0]+"/"+test_label.id.str[1]+"/"+test_label.id.str[2]+"/"+test_label.id+".jpg"

In [None]:
train_label_stratified.head()

In [None]:
test_label.head()

In [None]:
# parameters
val_split = 0.25
batch_size = 128
img_width = img_height = 256

## Generate training data and validation data

In [None]:
datagen=ImageDataGenerator(validation_split=val_split, rescale=1. / 255)

train_generator=datagen.flow_from_dataframe(dataframe=train_label_stratified,
                                            directory="/kaggle/input/landmark-recognition-2020/train/",
                                            x_col="id",
                                            y_col="landmark_id",
                                            subset="training",
                                            batch_size=batch_size,
                                            seed=123,
                                            shuffle=True,
                                            class_mode="categorical",
                                            target_size=(img_width,img_height),
                                            color_mode="rgb")

valid_generator=datagen.flow_from_dataframe(dataframe=train_label_stratified,
                                            directory="/kaggle/input/landmark-recognition-2020/train/",
                                            x_col="id",
                                            y_col="landmark_id",
                                            subset="validation",
                                            batch_size=batch_size,
                                            seed=123,
                                            shuffle=True,
                                            class_mode="categorical",
                                            target_size=(img_width,img_height),
                                            color_mode="rgb")

#test_datagen=ImageDataGenerator(rescale=1. / 255)

#test_generator=test_datagen.flow_from_dataframe(dataframe=test_label,
#                                                directory="/kaggle/input/landmark-recognition-2020/test/",
#                                                x_col="id",
#                                                y_col=None,
#                                                batch_size=batch_size,
#                                                seed=123,
#                                                shuffle=False,
#                                                class_mode=None,
#                                                target_size=(img_width,img_height))

## Define the model with pretrained Xception plus extra layers

In [None]:
# model 
def my_model(input_shape, num_classes, dropout, learning_rate = 0.0002):

    base_model = Xception(input_shape=input_shape,weights='imagenet', include_top=False)
    #base_model.load_weights("../input/keraspretrainedmodel/xception_weights_tf_dim_ordering_tf_kernels_notop.h5")
    base_model.trainable = False
    x = Sequential()
    x.add(base_model)
    
    x.add(SeparableConv2D(64, kernel_size=(3, 3), activation='relu',kernel_initializer = tf.keras.initializers.he_uniform(seed=1)))
    x.add(BatchNormalization())
    x.add(SeparableConv2D(32, kernel_size=(3, 3), activation='relu',kernel_initializer = tf.keras.initializers.he_uniform(seed=3)))
    x.add(BatchNormalization())
    x.add(SeparableConv2D(num_classes,kernel_size = (3,3), depth_multiplier=1, activation = 'relu',
                kernel_initializer = tf.keras.initializers.he_uniform(seed=0),
                kernel_regularizer=tf.keras.regularizers.l1_l2(l1=0.1, l2=0.01)
                ))
    x.add(GlobalMaxPooling2D())
    x.add(BatchNormalization())
    x.add(Dropout(dropout))
    
    x.add(Flatten())
    x.add(Dense(512, activation = 'relu'))
    x.add(Dropout(dropout))
    x.add(Dense(num_classes, activation = 'softmax'))

    x.compile(loss='categorical_crossentropy',experimental_steps_per_execution=8, optimizer = tf.keras.optimizers.Adagrad(learning_rate=learning_rate), metrics='categorical_accuracy')
    x.summary()
    return x

In [None]:
num_classes = len(train_sort_stratified)
model = my_model(input_shape = (img_width, img_height, 3), num_classes = num_classes, dropout = 0.3)

In [None]:
# Define call backs:
checkpointer = ModelCheckpoint('basic_cnn.h5', monitor='val_categorical_accuracy', verbose=1, save_best_only=True)

# Early stopping
early_stopping = EarlyStopping(monitor='val_loss', verbose=1, patience=5)

In [None]:
epochs = 20 # Defining epochs for the model
batch_size = 128
train_samples  = int(len(train_label_stratified)*(1-val_split))//batch_size
validation_samples  = int(len(train_label_stratified)*val_split)//batch_size

print(train_samples)
print(validation_samples)

In [None]:
history = model.fit(train_generator,
                    steps_per_epoch=40,
                    epochs=epochs,
                    callbacks=[checkpointer, early_stopping],
                    use_multiprocessing=True,
                    verbose=2,
                    validation_data=valid_generator,
                    validation_steps=10)

model.save("basic_cnn.h5")

In [None]:
model.trainable = True
    
model.compile(loss='categorical_crossentropy', experimental_steps_per_execution=8, optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001), metrics='categorical_accuracy')
model.summary()

In [None]:
history = model.fit(
        train_generator,
        steps_per_epoch=100,
        epochs=epochs,
        callbacks=[checkpointer, early_stopping],
        use_multiprocessing=True,
        verbose=1,
        validation_data=valid_generator,
        validation_steps=25)

model.save("fined_cnn.h5")

In [None]:
best_model = load_model("../input/fine-cnn-model/fined_cnn.h5")
test_label = pd.read_csv('../input/landmark-recognition-2020/sample_submission.csv', dtype=str)
test_label["id"] = test_label.id.str[0]+"/"+test_label.id.str[1]+"/"+test_label.id.str[2]+"/"+test_label.id+".jpg"

test_datagen=ImageDataGenerator(rescale=1. / 255)

test_generator=test_datagen.flow_from_dataframe(dataframe=test_label,
                                                directory="/kaggle/input/landmark-recognition-2020/test/",
                                                x_col="id",
                                                y_col=None,
                                                batch_size=64,
                                                seed=123,
                                                shuffle=False,
                                                class_mode=None,
                                                target_size=(256,256))

In [None]:
y_pred = best_model.predict(test_generator, verbose=1, steps=len(test_label))

In [None]:
y_pred_label = np.argmax(y_pred, axis=-1)
y_prob = np.max(y_pred, axis=-1)
print(y_pred_label.shape, y_prob.shape)

In [None]:
def get_test_image_path(img_id):
    #image_path = f"../input/landmark-recognition-2020/test/{img_id[0]}{img_id[1]}{img_id[2]}{img_id}"
    image_path = f"../input/landmark-recognition-2020/test/{img_id}"

    img = np.array(Image.open(image_path).resize((224, 224), Image.LANCZOS))
    return img

In [None]:
def get_train_image_path(img_id):
    #image_path = f"../input/landmark-recognition-2020/test/{img_id[0]}{img_id[1]}{img_id[2]}{img_id}"
    image_path = f"../input/landmark-recognition-2020/train/{img_id}"

    img = np.array(Image.open(image_path).resize((224, 224), Image.LANCZOS))
    return img

In [None]:
train_label = pd.read_csv('../input/landmark-recognition-2020/train.csv', dtype=str)
train_sort = pd.DataFrame(train_label['landmark_id'].value_counts(sort = True, ascending = False)) 
train_sort.reset_index(inplace=True) 
train_sort.columns=['landmark_id','count']
train_label_stratified = train_label.loc[train_label['landmark_id'].isin([x for x in list(train_sort[0:200].landmark_id)])]
train_sort_stratified = pd.DataFrame(train_label_stratified['landmark_id'].value_counts()) 
train_sort_stratified.reset_index(inplace=True) 
train_sort_stratified.columns=['landmark_id','count']
train_label_stratified["id"] = train_label_stratified.id.str[0]+"/"+train_label_stratified.id.str[1]+"/"+train_label_stratified.id.str[2]+"/"+train_label_stratified.id+".jpg"

In [None]:
y_uniq = np.unique(train_label_stratified.landmark_id.values)
y_pred_label = [y_uniq[Y] for Y in y_pred_label]

## Extracting best and worst classficiations from predictions

In [None]:
temp_sub

In [None]:
temp_sub = test_label

for i in range(len(temp_sub)):
    temp_sub.loc[i, "landmarks"] = str(y_pred_label[i])

#temp_sub.insert(2, "pred", y_prob)    


In [None]:
worst_preds = temp_sub.sort_values(by=['pred'])
worst_preds = worst_preds[0:12]
worst_preds

In [None]:
best_preds = temp_sub.sort_values(by=['pred'], ascending=False)
best_preds = best_preds[0:12]
best_preds

## 12 Worst classifications

In [None]:
figure = plt.figure(figsize = (14, 14))
worst_images = worst_preds.id.values

for i in range(len(worst_images)):
    path = worst_images[i]
    # Display the randomly selected images.
    image = get_test_image_path(path)
    figure.add_subplot(3, 4, i+1)
    plt.title(worst_preds.pred.values[i])
    plt.imshow(image)

## 12 Best classfications

In [None]:
figure = plt.figure(figsize = (14, 14))
best_images = best_preds.id.values

for i in range(len(best_images)):
    path = best_images[i]
    image = get_test_image_path(path)
    figure.add_subplot(3, 4, i+1)
    plt.title(best_preds.pred.values[i])
    plt.imshow(image)

### Visual Comparison

In [None]:
best_train = train_label_stratified[train_label_stratified.landmark_id == '96663'].reset_index()
best_train

In [None]:
figure = plt.figure(figsize = (14, 14))
for i in range(9):
    path = best_train.id[i]
    image = get_train_image_path(path)
    figure.add_subplot(3, 3, i+1)
    plt.title(best_train.id[i])
    plt.imshow(image)

In [None]:
best_train_2 = train_label_stratified[train_label_stratified.landmark_id == '179959'].reset_index()
best_train_2

In [None]:
figure = plt.figure(figsize = (14, 14))
for i in range(9):
    path = best_train_2.id[i]
    image = get_train_image_path(path)
    figure.add_subplot(3, 3, i+1)
    plt.title(best_train_2.id[i])
    plt.imshow(image)

In [None]:
worst_train = train_label_stratified[train_label_stratified.landmark_id == '126637'].reset_index()
worst_train

In [None]:
figure = plt.figure(figsize = (14, 14))
for i in range(9):
    path = worst_train.id[i]
    image = get_train_image_path(path)
    figure.add_subplot(3, 3, i+1)
    plt.title(worst_train.id[i])
    plt.imshow(image)