In [None]:
import os
import pandas as pd
import numpy as np
import random
import seaborn as sns
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow as tf #ignore error if no GPU set up on machine
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.keras import layers
#from tensorflow.python.keras.layers import Input, Conv2D, MaxPo

In [None]:
#landmark_id = class labels representing the different landmarks 
#id = img_id 
#img_paths = []
mainpath = '../input/landmark-recognition-2021/train'
traindf = pd.read_csv("../input/landmark-recognition-2021/train.csv")#, index_col=False)
traindf['img_path'] = traindf['id'].apply(lambda r: os.path.join(mainpath,r[0], r[1], r[2], r + '.jpg'))
landmark_unique = traindf['landmark_id'].unique() 
#img_paths.append(lambda r: os.path.join('../input/landmark-recognition-2021/train',r[0], r[1], r[2], r + '.jpg'))
traindf.head()

In [None]:
# defining labels 
img_paths = [] 
labels = [] 
temp_labels = [] #later used for random image selection
classSize = 1000#1000&700 classes allocated too much memory and notebook was restarted
imageSize = 224
minImgs = 100
maxImgs = 1000
i=0

for u_id in landmark_unique[:classSize]: 
    if(len(traindf['img_path'][traindf['landmark_id'] == u_id].value_counts()) > minImgs):# and
       #len(traindf['img_path'][traindf['landmark_id'] == u_id].value_counts()) < maxImgs):
        for path in traindf['img_path'][traindf['landmark_id'] == u_id]:
            img_paths.append(path)
            labels.append(u_id)
            temp_labels.append(i)
        i = i+1

#defining paths to train images   
transformed_imgs = []
for img_path in img_paths:
    img_pix = cv2.imread(img_path,1) #img_pix stores images in finalpath in green-channel
    transformed_imgs.append(cv2.resize(img_pix, (imageSize,imageSize))) #universally resized images for training

#creation of training data:
#new indexation to avoid IndexError in building the train dataset
lbl_path_set = list(zip(img_paths,temp_labels))

#Mix up training data so it is not organised in classes anymore 
random.shuffle(lbl_path_set)

img_data, labels_data = zip(*lbl_path_set) 

train_data = []
for img in transformed_imgs[:len(img_data)]:
    train_data.append(img)

In [None]:
# 1. How many images does the dataset consist of?
traindf.shape   
print("1. Dataset shape: ",traindf.shape)
# The training dataset consists of 1580470 images
#---------------------------------------------------
# 2.How many classes?
#Counting unique labels - nr. of classes 
print("2. Total Nr. classes: ",len(landmark_unique))
# The training dataset consists of 81313 classes
#----------------------------------------------------
# 2.How many images per class?
print("3. Images per class: ",traindf['landmark_id'].value_counts()) #counts values per landmark_id
# Number of images per class varies - from only 2 images/class up to 6272 images/class
#----------------------------------------------------
#2.ii. How many classes have less than 5 training samples?
newdf = pd.DataFrame(traindf.landmark_id.value_counts().reset_index().values, columns=["landmark_id", "images"])
vals = []
for i in newdf['images']:
    if i < 5:
        vals.append(i)
print("4. ",len(vals),"categories with <5 samples")

# 17297 classes have less than 5 training samples
#-----------------------------------------------------------
#2.iii. How many classes have between 5 and 10 training samples?
vals = []
for i in newdf['images']:
    if i >= 5 and i <=10:
        vals.append(i)
print("5. ", len(vals)," categories with 5-10 samples")
# 27349 classes have between 5 and 10 training samples
#----------------------------------------------------------------
#Plot Data distribution
#plt.figure(figsize = (8, 8))
#binsize = 500#len(traindf.landmark_id.unique())
#plt.title('Landmark id density plot')
#sns.histplot(traindf['landmark_id'], color="tomato", kde=True, bins=binsize)
#plt.show()
#----------------------------------------------------------------
#Print 4 random images from 4 random classes
fig = plt.gcf()
fig.set_size_inches(10, 10)

images = []
for i in range(0,4):
    rand_nr = np.random.randint(0,len(landmark_unique))
    rand_landmark = traindf[traindf['landmark_id']==traindf['landmark_id'].value_counts().iloc[[rand_nr]].index[0]]
    for j in range(0,4):
        rand_nr = np.random.randint(0, len(rand_landmark))
        rand_img = rand_landmark.iloc[rand_nr]
        images.append(rand_img)
        

for i in range(len(images)):
    img = images[i]['id']
    cl = images[i]['landmark_id']
    new_path = mainpath+"/"+img[:1]+"/"+img[1]+"/"+img[2]+"/"+img+".jpg"
    sp = plt.subplot(5, 4, i + 1)
    sp.axis('Off')
    imag = cv2.imread(new_path) 
    plt.imshow(imag)
    
print("6. Samples from 4 different categories")
plt.show()

In [None]:
# prepare training data - convert from integer to floating point
X_data = np.array(train_data)#img_data)
Y_data =  to_categorical(labels_data, num_classes = classSize) #to_categorical - converts class vector (integer) to binary matrix

# split train set into training and validation  (70 to 30 ratio)
testSize = 0.3
X_train, X_val, Y_train, Y_val = train_test_split(X_data, Y_data, test_size = testSize, random_state=42) # include fixed randomstate for reproduction

print("X train data : ", X_train.shape)
print("X label data : ", Y_train.shape)
print("Y test data : ",  X_val.shape)
print("Y label data : ", Y_val.shape)

In [None]:
# experiment with Hyper-parameters:

#Batch size
batchSize = 64#,128
kernelSize = 3
strideSize = 2 
filterSize = 7 #3
paddingSize = 'same'
activation_fct = 'relu'
epochSize = 10
poolSize = (2,2)#2x2 maxpooling
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)#0.0001
# Preprocessing 
datagen = ImageDataGenerator(horizontal_flip=True, #ImageDataGenerator class to perform image augmentation during training
                             vertical_flip=True,
                             rotation_range=20,
                             zoom_range=0.2,
                             width_shift_range=0.2,
                             height_shift_range=0.2,
                             shear_range=0.2,
                             fill_mode="nearest")

#train_data_gen = datagen.flow_from_directory(directory=mainpath, subset="training")
#val_dat_gen = datagen.flow_from_directory(directory=mainpath,subset="validation")



In [None]:
model = keras.Sequential(name="my_model")
model.add(layers.Conv2D(64, (3, 3),input_shape = (244,244,3),activation = 'relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Conv2D(64, (3, 3), padding="same", activation="relu"))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Conv2D(128, (3, 3), padding="same", activation="relu"))
model.add(layers.Conv2D(128, (3, 3), padding="same", activation="relu"))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Conv2D(256, (3, 3), padding="same", activation="relu"))
model.add(layers.Conv2D(256, (3, 3), padding="same", activation="relu"))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Conv2D(512, (3, 3), padding="same", activation="relu"))
model.add(layers.Conv2D(512, (3, 3), padding="same", activation="relu"))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Conv2D(512, (3, 3), padding="same", activation="relu"))
model.add(layers.Conv2D(512, (3, 3), padding="same", activation="relu"))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(4096, activation="relu")) 
model.add(layers.Dense(4096, activation="relu"))
model.add(layers.Dense(classSize, activation='softmax'))



In [None]:
#model leaning on VGG16 architecture
#model = keras.Sequential(name="my_model")
#first conv Block
#model.add(layers.Conv2D(filters=64,kernel_size=(3,3), padding = "same",activation = activation_fct, input_shape = (imageSize,imageSize,3)))#filters, kernel_size, stide(x,x),padding, 
#model.add(layers.Dropout(0.1))
#model.add(layers.Conv2D(filters=64,kernel_size=(3,3), padding="same", activation=activation_fct))
#model.add(layers.Dropout(0.1))
#model.add(layers.MaxPooling2D(pool_size=poolSize,strides=(2,2),padding="same"))
#second conv Block
#model.add(layers.Conv2D(filters=128, kernel_size=(3,3), padding="same", activation=activation_fct))
#model.add(layers.Dropout(0.1))
#model.add(layers.Conv2D(filters=128, kernel_size=(3,3), padding="same", activation=activation_fct))
#model.add(layers.Dropout(0.2))
#model.add(layers.MaxPooling2D(pool_size=poolSize,strides=(2,2),padding="same"))
#third Conv Block
#model.add(layers.BatchNormalization())
#model.add(layers.Conv2D(filters=256, kernel_size=(3,3), padding="same", activation=activation_fct))
#model.add(layers.Conv2D(filters=256, kernel_size=(3,3), padding="same", activation=activation_fct))
#model.add(layers.Conv2D(filters=256, kernel_size=(3,3), padding="same", activation=activation_fct))
#model.add(layers.MaxPooling2D(pool_size=poolSize,strides=(2,2),padding="same"))
#fourth Conv Block
#model.add(layers.BatchNormalization())
#model.add(layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation=activation_fct))
#model.add(layers.Dropout(0.1))
#model.add(layers.BatchNormalization())
#model.add(layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation=activation_fct))
#model.add(layers.Dropout(0.2))
#model.add(layers.BatchNormalization())
#model.add(layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation=activation_fct))
#model.add(layers.Dropout(0.2))
#model.add(layers.MaxPooling2D(pool_size=poolSize,strides=(2,2),padding="same"))
#fifth Conv Block
#model.add(layers.BatchNormalization())
#model.add(layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation=activation_fct))
#model.add(layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation=activation_fct))
#model.add(layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation=activation_fct))
#model.add(layers.MaxPooling2D(pool_size=poolSize,strides=(2,2),padding="same"))

          
#Fully Connected Layers
#model.add(layers.Flatten())
#model.add(layers.BatchNormalization())
#model.add(layers.Dense(4096, activation=activation_fct)) #4096
#odel.add(layers.Dropout(0.5))
#model.add(layers.BatchNormalization())
#model.add(layers.Dense(4096, activation=activation_fct))
#model.add(layers.Dropout(0.5))
#model.add(layers.BatchNormalization())
#model.add(layers.Dense(classSize, activation='softmax'))

model.summary()
#model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy']) 


In [None]:

model.compile(optimizer=optimizer,loss='categorical_crossentropy',metrics=['accuracy'])
history = model.fit(datagen.flow(X_train,Y_train,batch_size=batchSize),validation_data=(X_val,Y_val),epochs=epochSize) 

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

# plot Trainging/Validation accuracy
plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()

# plot Training/Validation loss
plt.plot(epochs, loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend(loc=0)
plt.figure()

plt.show()


loss, acc = model.evaluate(X_train,Y_train,verbose=0)
val_loss, val_acc = model.evaluate(X_val,Y_val,verbose=0)

print(f'Train acc: {acc*100:.3f} % || Validation acc: {val_acc*100:.3f} %')
print(f'Train loss: {loss*100:.3f} % || Validation loss: {val_loss*100:.3f} %')

#V01:1000 classes (100-300), 5 conv layers, Batchsize 100, 30 epochs, lr 0.0001
#V001:1000 classes (100-500), 5 conv layers, Batchsize 100, 30 epochs, lr 0.0001 -> something like Baseline Architecture 
#V0001:1000 classes (50-550), 5 conv layers, Batchsize 100, 30 epochs, lr 0.0001 
#Batchnorm:
#V02:1000 classes (100-500), 5 conv layers, Batchsize 32, 30 epochs, w/ Batchnormalization after each activationFct lr 0.0075 
#Layers
#V03: 1000 classes (100-500), 3 conv layers (64,128,256,2048), Batchsize 32, 40 epochs, lr 0.001
#V04: 1000 classes (100-500), 8 conv layers (64*2,128*2,256*2,568,568,4096), Batchsize 32, 40 epochs, lr 0.001
#V05: 1000 classes (100-500), 5 conv layers (reduced stride to 2), Batchsize 64, 30 epochs, lr 0.001
#V06: 1000 classes (100-500), 5 conv layers (added Dropoutlayer (0.2 after conv-relu and 0.5 after dense)), Batchsize 100, 30 epochs, lr 0.001
#V07: 1000 classes (100-500), 5 conv layers (added Dropoutlayer (0.2 after conv-relu and 0.5 after dense)), Batchsize 100, 30 epochs, lr 0.001
#V08: 1000 classes (100-500), 5 conv layers (added Dropoutlayer (only one dropout 0.5 after dense)), Batchsize 100, 50 epochs, lr 0.00005)
#V09: 1000 classes (100-500), 5 conv layers, Batchsize 100, 40 epochs, lr 0.0001 - horizontal_flip=True, vertical_flip=True, rotation_range=20, zoom_range=0.2, width_shift_range=0.2, height_shift_range=0.2,
#V23: 1000 classes (100-500), 5 conv layers (added Dropoutlayers after each convolutional layer (0.1-0.2)), Batchsize 32, 40 epochs, lr 0.0001) + imageDataGen applied

#500 classes, Batchsize 32, 100 Epochs, LR 0.001 - 2.30h
#500 classes, Batchsize 32, 20 Epochs, LR 0.001 - 1 h
#500 classes, Batchsize 64, 20 Epochs, LR 0.001 - 30 min
#500 classes, 3 Conv Layers, Batchsize 64, 20 Epochs, LR 0.001 -> ResourceExhaustedError: OOM when allocating tensor with shape[401408,4096] on GPU -> reducing conv layers caused that (maybe jump from 128 to 512 was too much...having 256 inbetween solves the issue )
#500 classes, 3 Conv Layers, Batchsize 64, 20 Epochs, LR 0.0001 -> Ran out of memory
#500 classes, 3 Conv Layers, Batchsize 128, 10 Epochs, LR 0.001
#V6:500 classes (100-300), 5 conv layers, Batchsize 64,10 epochs, lr 0.001
#V7:500 classes (100-300), 5 conv layers, Batchsize 64,10 epochs, lr 0.0001 -> filtering the classes got rid of the memory issue
#V8:1000 classes (100-300), 5 conv layers, Batchsize 64,10 epochs, lr 0.0001 
#V9:10000 classes (100-300), 5 conv layers, Batchsize 285,10 epochs, lr 0.0001 ->  too big for memory
#V9:3000 classes (100-300), 5 conv layers, Batchsize 285,10 epochs, lr 0.0001 ->  too big for memory when running network
#V9:1000 classes (100-300), 5 conv layers, Batchsize 128,10 epochs, lr 0.0001 
#V10:1000 classes (100-300), 5 conv layers, Batchsize 128,10 epochs, lr 0.001 
#V11:1000 classes (100-300), 5 conv layers, Batchsize 64,10 epochs, lr 0.001 
#V12:1000 classes (100-300), 5 conv layers, Batchsize 64,10 epochs, lr 0.00001 
#V13:1000 classes (100-max), 5 conv layers, Batchsize 128, 10 epochs, lr 0.0001 -> ran out of memory
#V13:1000 classes (100-max), 5 conv layers, Batchsize 64, 10 epochs, lr 0.0001 
#V14:1000 classes (100-max), 5 conv layers, Batchsize 64, 20 epochs, lr 0.0001 
#V15:1000 classes (100-max), 5 conv layers, Batchsize 100, 30 epochs, lr 0.00001 
#V16:1000 classes (100-max), 5 conv layers, Batchsize 100, 30 epochs, lr 0.00001 w/ Batchnormalization -> best yet but ppbl overfitting?
#V17:1000 classes (100-max), 5 conv layers, Batchsize 128, 40 epochs, lr 0.001 w/ Batchnormalization
#V18:1000 classes (100-max), 5 conv layers, Batchsize 128, 40 epochs, lr 0.0001 w/ Batchnormalization -> train & test acc = 11%, loss = 11%
#V19:1000 classes (100-max), 5 conv layers, Batchsize 128, 40 epochs, lr 0.00001 w/ Batchnormalization -> both at 3%
#V20:1000 classes (100-max), 5 conv layers, Batchsize 80, 40 epochs, lr 0.00001 w/ Batchnormalization -> really bad
#V21:1000 classes (100-max), 5 conv layers, Batchsize 100, 40 epochs, lr 0.00001 w/ Batchnormalization -> really bad
#V22:1000 classes (100-max), 5 conv layers, Batchsize 64, 10 epochs, lr 0.0001 w/ Batchnormalization -> max 10%
#V22:1000 classes (100-max), 5 conv layers, Batchsize 64, 10 epochs, lr 0.0001 w/ Batchnormalization only before dense layers F-B-D-B-D-B-D
#Batch Norm:
#V01:1000 classes (100-300), 5 conv layers, Batchsize 100, 30 epochs, lr 0.0001 -> something like Baseline Architecture durchlauf
#V02:1000 classes (100-300), 5 conv layers, Batchsize 100, 30 epochs, w/ Batchnormalization lr 0.0075 
#V03:1000 classes (100-300), 5 conv layers, Batchsize 100, 30 epochs, w/ Batchnormalization lr 0.0001 (take V18)
