**Google Landmark Recognition 2021 VGIS Mini Project**
This notebook contains the material of the third exercise for the Research in VGIS course.
The notebook will go through the tasks of the exercise.

In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import random
import cv2
import tensorflow as tf

#print(os.listdir('/kaggle/input/landmark-recognition-2021'))

In [None]:
path = '/kaggle/input/landmark-recognition-2021'
os.listdir(path)

train_images = f'{path}/train'
df_train = pd.read_csv(f'{path}/train.csv')
df_train['path'] = df_train['id'].apply(lambda f: os.path.join('/kaggle/input/landmark-recognition-2021/train',f[0], f[1], f[2], f + '.jpg'))

test_images = f'{path}/test'
df_test = pd.read_csv(f'{path}/sample_submission.csv')
df_test['path'] = df_test['id'].apply(lambda f: os.path.join('/kaggle/input/landmark-recognition-2021/test',f[0], f[1], f[2], f + '.jpg'))

# Defining the amount of classes and images in the training dataset.
nr_classes = len(df_train["landmark_id"].unique())
nr_images = len(df_train)

#print("Number of classes in training dataset: ", nr_classes)
#print("Number of images in training dataset: ", nr_images)

In [None]:
# Histogram of data distribution, to show the amount of images in each class.
# One class goes higher than the histogram top, which is due to the class containing 6272 images.
#hist = plt.figure(figsize = (10, 10))
#ax = plt.hist(df_train["landmark_id"], bins = df_train["landmark_id"].unique())
#plt.ylim([0, 100])
#plt.show()

In [None]:
# Showing the number of classes containing 5 or less images in a class.
# I am doing the same for classes contatining between 5 to 10 images.
#classes = ax[0]
#from0To5 = len(classes[classes <= 5])
#from5To10 = len(classes[classes <= 10] - from0To5)
#print("Number of classes with 0 to 5 images: ", from0To5)
#print("Number of classes with 5 to 10 images: ", from5To10)

In [None]:
# Here can a overall representation of the data distribution be seen
ValueCounts = df_train['landmark_id'].value_counts()
ValueCounts.describe()

In [None]:
#Visualize 4 sample images from 4 random classes.
displayImages = []
for i in range(0,4):
    randomClass = df_train[df_train['landmark_id'] == ValueCounts.iloc[[np.random.randint(0, nr_classes)]].index[0]]
    for j in range(0,4):
        randomImages = randomClass.iloc[np.random.randint(0, len(randomClass))]
        displayImages.append(randomImages)
        
plt.subplots(4, 4, figsize = (15, 10))
for i in range(len(displayImages)):
    plt.subplot(4, 4, i + 1)
    plt.axis('Off')
    img = cv2.imread(displayImages[i][2])
    plt.imshow(img)
    plt.title(f'landmark id: {displayImages[i][1]} ', fontsize=8)

In [None]:
# Setting up hyperparameters and splitting training data into train and val
def imagePath(imgPath):
    images = []
    for imgFile in imgPath:
        imgPic = cv2.imread(imgFile, 1)
        images.append(cv2.resize(imgPic, (img_size, img_size)))
    
    return images

# Hyperparameters
epochs = 50
batch_size = 32
img_size = 128
train_split = 0.7
val_split = 0.2
nrClasses = 120

# Setting up dataset for training
imgList = []
labels = []
temp_labels = []

i = 0
for lbl in df_train['landmark_id'].unique():
    if i == nrClasses:
        break
    if(len(df_train['path'][df_train['landmark_id'] == lbl].value_counts()) > 50 and # Try to change it to 25 to see if higher accuracy is achieved
       len(df_train['path'][df_train['landmark_id'] == lbl].value_counts()) < 500): 
        for path in df_train['path'][df_train['landmark_id'] == lbl]: 
            imgList.append(path) 
            labels.append(lbl)
            temp_labels.append(i)
        i = i + 1

# Random shuffle dataset, so it is no longer set up in classes
shuff = list(zip(imgList, temp_labels))
random.shuffle(shuff)

imgList, lbls = zip(*shuff)

# Preparing data to be split into train and val
imgNr = round(len(imgList) * train_split)

trainImages = imgList[:imgNr]
print("Images being resized: ", len(trainImages))
trainData = imagePath(trainImages)
trainLabels = lbls[:imgNr]

print("Number of training images: ", len(trainData))
print("Number of training labels: ", len(trainLabels))

# Setting images and labels to be split into x_train, y_tran, x_val and y_val
xData = np.array(trainData) / 255
yData = tf.keras.utils.to_categorical(trainLabels, num_classes = nrClasses)

x_train, x_val, y_train, y_val = train_test_split(xData, yData, test_size = val_split, random_state = 101)

# Setting up data generator
dataGenerator = tf.keras.preprocessing.image.ImageDataGenerator(horizontal_flip = False, 
                                                                vertical_flip = False, 
                                                                rotation_range = 0, 
                                                                zoom_range = 0.2, 
                                                                width_shift_range = 0, 
                                                                height_shift_range = 0, 
                                                                shear_range = 0, 
                                                                fill_mode = "nearest")

opt = tf.optimizers.Adam(learning_rate = 0.001)
opt2 = tf.optimizers.Adam(learning_rate = 0.001, beta_1 = 0.9, beta_2 = 0.999, 
epsilon = 1e-08, decay = 0.0001)

**Creating the CNN**

In [None]:
# This a implementation of ResNet50. It also allows for experimentation with more layers by changing to RenNet101.
ResNet101 = tf.keras.applications.resnet.ResNet101(input_shape = (img_size, img_size, 3),
                                                      include_top = False,
                                                      weights = 'imagenet',
                                                      pooling = 'avg')


inputs = ResNet101.input
flatten = tf.keras.layers.Flatten()(ResNet101.output)
dropout1 = tf.keras.layers.Dropout(0.2)(flatten)
dense1 = tf.keras.layers.Dense(units = 4096, activation = "relu")(dropout1)
dropout2 = tf.keras.layers.Dropout(0.2)(dense1)
dense2 = tf.keras.layers.Dense(units = 4096, activation = "relu")(dropout2)
dropout3 = tf.keras.layers.Dropout(0.2)(dense2)
output = tf.keras.layers.Dense(units = nrClasses, activation = "softmax")(dropout3)
model = tf.keras.Model(inputs = inputs, outputs = output)

print(model.summary())

In [None]:
# Compile the network
model.compile(optimizer = opt2, loss = "categorical_crossentropy", metrics = ['accuracy'])

#es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

history = model.fit(dataGenerator.flow(x_train, y_train, batch_size = batch_size), validation_data = (x_val, y_val), epochs = epochs)

In [None]:
# Plotting the performance of the model
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('ResNet50 Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc = 'upper left')
plt.show()

# Plot of loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('ResNet50 Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc = 'upper left')
plt.show()

In [None]:
# Predictions of the model -> running model on test data
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn import metrics

testImages = imgList[len(trainImages):]
testImages = imagePath(testImages)
testLabels = lbls [len(trainLabels):]

testData = np.array(testImages) / 255
testPrediction = model.predict(dataGenerator.flow(testData, batch_size = batch_size))

goodAcc = []
badAcc = []
confidence = []
for i in testPrediction:
    confidence.append(max(i))
    goodAcc.append(np.argmax(i))
    badAcc.append(np.argmax(i))

precision, recall, fscore, support = score(testLabels, goodAcc, labels = np.unique(goodAcc))

print(metrics.confusion_matrix(testLabels, goodAcc))
print(metrics.classification_report(testLabels, goodAcc, digits = 3))

for i in range(len(goodAcc)):
    plt.axis('Off')
    if (testLabels[i] == goodAcc[i]):
        print("Perfect Label Match")
        title = ('True label: ' + str(testLabels[i]) + '_' + 'Predicted label: ' + str(goodAcc[i]) + '_' + 'confidence: ' + str(confidence[i]))
        plt.title(title, fontsize = 10)
        plt.imshow(testImages[i])
        plt.show()
        
    elif(confidence[i] > 0.98):
        print("High Confidence")
        title = ('True label: ' + str(testLabels[i]) + '_' + 'Predicted label: ' + str(goodAcc[i]) + '_' + 'confidence: ' + str(confidence[i]))
        plt.title(title, fontsize = 10)
        plt.imshow(testImages[i])
        plt.show()
    
    elif(confidence[i] < 0.05):
        print("Poor Confidence")
        title = ('True label: ' + str(testLabels[i]) + '_' + 'Predicted label: ' + str(goodAcc[i]) + '_' + 'confidence: ' + str(confidence[i]))
        plt.title(title, fontsize = 10)
        plt.imshow(testImages[i])
        plt.show()