# Imports

In [None]:
!pip install -q efficientnet

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import math
import tensorflow as tf
from tensorflow import keras
from sklearn import model_selection
from PIL import Image
import matplotlib.pyplot as plt
import tensorflow_addons as tfa
import efficientnet.tfkeras as efn
import h5py

# Directories

In [None]:
data_dir = Path('../input/landmark-recognition-2021')
train_data_dir = data_dir / 'train'
test_data_dir = data_dir / 'test'
train_label_file = data_dir / 'train.csv'
sample_file = data_dir / 'sample_submission.csv'

In [None]:
original_label = pd.read_csv(train_label_file, index_col='id')
sub = pd.read_csv(sample_file, index_col='id')

# Preprocessing of the Data

In [None]:
original_label

In [None]:
original_label = original_label.sample(frac=1) # Shuffling the data

In [None]:
original_label # Exhibition of the shuffled Data

In [None]:
leny = len(original_label)
border = int(0.2*leny)
test_label = original_label[:border]
label = original_label[border:]

In [None]:
len(label)

In [None]:
label['landmark_id'].value_counts()

In [None]:
values = label['landmark_id'].value_counts().values
indexes = label['landmark_id'].value_counts().index

In [None]:
values = np.array(values)
indexes = np.array(indexes)

In [None]:
values

In [None]:
cutter = 50 # How many labels we are trying to predict correctly

In [None]:
values[:cutter]

In [None]:
indexes[:cutter]

In [None]:
# get_rid = indexes[values < 1000]
get_rid = indexes[cutter:]

In [None]:
get_rid

In [None]:
label

In [None]:
len(get_rid)

In [None]:
label_numpy = label['landmark_id'].to_numpy()

In [None]:
label_numpy

In [None]:
mask = np.isin(label_numpy, get_rid)

In [None]:
mask

In [None]:
label_numpy[mask == 1] = 0

In [None]:
label['landmark_id'] = label_numpy

In [None]:
label

In [None]:
label = label.groupby('landmark_id').tail(100)

In [None]:
label.landmark_id.value_counts()

In [None]:
label

In [None]:
label.drop(label.index[label['landmark_id'] == 0], inplace = True)

In [None]:
label.landmark_id.value_counts()

In [None]:
indexes = label.landmark_id.value_counts().index

In [None]:
indexes

In [None]:
index_numpy = np.array(indexes)

In [None]:
landmark_id_numpy = label.landmark_id.to_numpy()

In [None]:
landmark_id_numpy

In [None]:
for i, index in enumerate(index_numpy):
    landmark_id_numpy[landmark_id_numpy==index] = i

In [None]:
label['landmark_id'] = landmark_id_numpy

In [None]:
label.landmark_id.value_counts()

In [None]:
no_classes = len(label.landmark_id.value_counts())

In [None]:
def id_to_path(s, train=True):
    data_dir = train_data_dir if train else test_data_dir
    return data_dir / s[0] / s[1] / s[2] / f'{s}.jpg'

In [None]:
input_size = (300, 300, 3)
batch_size = 32
n_epoch = 10
seed = 42

In [None]:
newsize = (300, 300)
# # x = np.asarray((Image.open(id_to_path('fd80b73a476ae8a1')).resize(newsize)))
# x = np.array(Image.open(id_to_path('fd80b73a476ae8a1')).resize(newsize))

In [None]:
# x.shape

In [None]:
# plt.imshow(x)

In [None]:
class DataGenerator(keras.utils.Sequence):
    def __init__(self, x_set, y_set=None, batch_size=32):
        self.x , self.y = x_set, y_set
        self.batch_size = batch_size
        self.is_train = False if y_set is None else True
        
    def __len__(self):
        return math.ceil(len(self.x) / self.batch_size)
    
    def __getitem__(self, idx):
        batch_ids = self.x[idx * self.batch_size: (idx + 1) * self.batch_size]
        if self.y is not None:
            batch_y = self.y[idx * self.batch_size: (idx + 1) * self.batch_size]
        
#         list_x = [np.load(id_to_path(x, self.is_train))[::2] for x in batch_ids]
        newsize = (300, 300)
        list_x = np.array([np.asarray(Image.open(id_to_path(x, self.is_train)).resize(newsize)) for x in batch_ids])
#         list_x = np.concatenate(list_x, axis=0)
        batch_x = np.moveaxis(list_x,1,1)
        batch_x = batch_x.astype("float") / 255
        
        if self.is_train:
            return batch_x, batch_y
        else:
            return batch_x

In [None]:
label.nunique()[0]

# Architecture of the Model

In [None]:
model = tf.keras.Sequential([
        efn.EfficientNetB3(input_shape=input_size,weights='imagenet',include_top=False),
        keras.layers.GlobalAveragePooling2D(),
        keras.layers.Dense(no_classes, activation='softmax')
        ])

model.summary()

In [None]:
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense, Dropout, Flatten
# from tensorflow.keras.layers import Conv2D, MaxPooling2D

# input_shape = x.shape

# # Create the model
# model = Sequential()
# model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))
# model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))
# model.add(Flatten())
# model.add(Dense(256, activation='relu'))
# model.add(Dense(no_classes, activation='softmax'))

In [None]:
# model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3),
#               loss= tf.keras.losses.SparseCategoricalCrossentropy(
#     from_logits=False, reduction="auto", name="sparse_categorical_crossentropy"
# ), metrics=[keras.metrics.AUC()])

In [None]:
# model.compile(optimizer='adam',
# #               loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
#               loss=tfa.losses.SigmoidFocalCrossEntropy(),
#               metrics=['accuracy'])

# Compile of the Model

In [None]:
# Compile the model
model.compile(loss=tf.keras.losses.sparse_categorical_crossentropy,
              optimizer=tf.keras.optimizers.Adam(),
              metrics=['accuracy'])

# Fit of the Model

In [None]:
x0 = label.index.values
y0 = label['landmark_id'].values
x2 = test_label.index.values
y2 = test_label['landmark_id'].values

x1 = sub.index.values

x_train, x_val, y_train, y_val = model_selection.train_test_split(x0, y0, test_size=.2, random_state=seed)
# x_train, y_train = x0, y0
# x_val, y_val = x2, y2

train = DataGenerator(x_train, y_train, batch_size=batch_size)
val = DataGenerator(x_val, y_val, batch_size=batch_size)
test = DataGenerator(x1, batch_size=batch_size)

# history = model.fit(train, validation_data=val, epochs=n_epoch)
history = model.fit(train, validation_data=val, epochs=5)

# Exhibition of the Learning of the Model

In [None]:
import matplotlib.pyplot as plt

plt.figure(num=0)
plt.title('loss')
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='validation')
plt.legend()

plt.figure(num=1)
plt.title('accuracy')
plt.plot(history.history['accuracy'], label='train')
plt.plot(history.history['val_accuracy'], label='validation')
plt.legend()

In [None]:
model.save("model_name.h5")

In [None]:
label

In [None]:
test_label

In [None]:
valid_label = test_label[:1000]
x2 = valid_label.index.values
y2 = valid_label['landmark_id'].values
x_val, y_val = x2, y2
val = DataGenerator(x_val, y_val, batch_size=batch_size)

# Predictions on the validation set

In [None]:
# prediction = model.predict(test).flatten()
prediction = model.predict(val)
predictions = np.argmax(prediction, axis=1)
for i, index in enumerate(index_numpy):
    predictions[predictions==i] = index

In [None]:
valid_label.nunique()[0]

In [None]:
1 / valid_label.nunique()[0]

In [None]:
(predictions == y_val).sum()/len(predictions)

# Predictions on the test set

In [None]:
# prediction = model.predict(test).flatten()
prediction = model.predict(test)

In [None]:
scores = prediction.max(axis=1)

In [None]:
scores.shape

In [None]:
predictions = np.argmax(prediction, axis=1)

In [None]:
predictions.shape

In [None]:
predictions

In [None]:
index_numpy

In [None]:
for i, index in enumerate(index_numpy):
    predictions[predictions==i] = index

In [None]:
predictions

In [None]:
my_pred = str(predictions) + ' ' + str(scores)

In [None]:
sub['landmarks'] = predictions
sub['scores'] = scores
sub['space'] = scores
sub['space'] = sub['space'].apply(lambda x: ' ')

sub['landmarks'] = sub['landmarks'].apply(lambda x: str(x))
sub['scores'] = sub['scores'].apply(lambda x: str("%.2f" % x))
sub['landmarks'] = sub['landmarks'] + sub['space'] + sub['scores']

sub.drop('scores', inplace=True, axis=1)
sub.drop('space', inplace=True, axis=1)

In [None]:
sub

In [None]:
sub.reset_index(inplace=True)

In [None]:
sub

In [None]:
sub.to_csv('submission.csv')

In [None]:
s = pd.read_csv('../input/landmark-recognition-2021/sample_submission.csv')
s

In [None]:
file = h5py.File('data.h5', 'w')
file.create_dataset('dataset', data=index_numpy)
file.close()