# EfficientNet Tuned for Landmark Classification
Baseline image classifier: additional layers are added on top of the pretrained **EfficientnetB0** model. The model is being trained on augmented data until validation loss stops decreasing.

In [None]:
import pandas as pd
import numpy as np
import os
import glob
import gc
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

In [None]:
# Plots display settings
plt.rcParams['figure.figsize'] = 12, 8
plt.rcParams.update({'font.size': 14})

In [None]:
# DataFrame display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.options.display.float_format = '{:.4f}'.format

In [None]:
# To avoid errors during notebook submission
INPUT_DIR = os.path.join('..', 'input')
DATASET_DIR = os.path.join(INPUT_DIR, 'landmark-recognition-2021')

TRAIN_METADATA = os.path.join(DATASET_DIR, 'train.csv')
TRAIN_DIRECTORY = os.path.join(DATASET_DIR, 'train')
TEST_DIRECTORY = os.path.join(DATASET_DIR, 'test')

In [None]:
# Pretrained image classification model EfficientNetB7
# from tf.keras.applications with global average pooling as a final layer
# In this notebook the model is loaded from a public dataset on Kaggle
# at https://www.kaggle.com/ekaterinadranitsyna/keras-applications-models
IMG_MODEL = '../input/keras-applications-models/EfficientNetB0.h5'

In [None]:
# TensorFlow settings
AUTOTUNE = tf.data.experimental.AUTOTUNE
IMG_SIZE = 224
BATCH_SIZE = 256
DROPOUT_RATE = 0.1
LEARNING_RATE = 1e-3
EPOCHS = 1000
PATIENCE = 3

VAL_SIZE = 0.15
TOP_CLASSES = 10

### Functions

In [None]:
def id_to_path(image_id: str, directory: str) -> str:
    """Function transforms image ID into path to the image.
    :param image_id: String id from 'train.csv'
    :param directory: Path to the directory with images
    :return: Path to the image file
    """
    subdir_1 = image_id[0]
    subdir_2 = image_id[1]
    subdir_3 = image_id[2]
    path = f'{directory}/{subdir_1}/{subdir_2}/{subdir_3}/{image_id}.jpg'
    return path


def path_to_id(path: str) -> str:
    """Function transforms path to the image into image ID.
    :param path: Path to image file
    :return: String representing image ID
    """
    return path[-20:-4]


def samples_distribution(value_counts: pd.Series) -> None:
    """Function displays distribution of images per class
    and prints out basic statistics.
    :param value_counts: Series objects where index represent class IDs, values - number of samples
    :return: None
    """
    mean_images = round(value_counts.mean(), 0)
    median_images = round(value_counts.median(), 0)
    print(f'Total number of classes: {len(value_counts)}')
    print(f'{value_counts.min()} - {value_counts.max()} samples per class')
    print(f'Mean value: {mean_images} samples\n'
          f'Median value: {median_images} samples')

    images_per_class.hist(bins=20, log=True)
    plt.vlines(mean_images, ymin=0, ymax=80_000, colors='red', label='Mean number')
    plt.vlines(median_images, ymin=0, ymax=80_000, colors='green', label='Median number')
    plt.title('Train samples per class')
    plt.xlabel('Number of images')
    plt.ylabel('Frequency')
    plt.show()

    
@tf.function
def get_image(path: str) -> tf.Tensor:
    """Function loads image from a file and preprocesses it.
    :param path: Path to image file
    :return: Tensor with preprocessed image
    """
    image = tf.image.decode_jpeg(tf.io.read_file(path), channels=3)
    image = tf.cast(tf.image.resize_with_pad(image, IMG_SIZE, IMG_SIZE), dtype=tf.int32)
    return tf.keras.applications.efficientnet.preprocess_input(image)


@tf.function
def process_dataset(path: str, label: int) -> tuple:
    """Function loads image from a file and preprocesses it.
    :param path: Path to image file
    :param label: Class label
    :return: tf.Tensor with preprocessed image, numeric label
    """
    return get_image(path), label


@tf.function
def get_dataset(x, y=None) -> tf.data.Dataset:
    """Function creates batched optimized dataset for the model
    out of an array of file paths and (optionally) class labels.
    :param x: Input data for the model (array of file paths)
    :param y: Target values for the model (array of class indexes)
    :return TensorFlow Dataset object
    """
    if y is not None:
        ds = tf.data.Dataset.from_tensor_slices((x, y))
        return ds.map(process_dataset, num_parallel_calls=AUTOTUNE) \
            .batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
    else:
        ds = tf.data.Dataset.from_tensor_slices(x)
        return ds.map(get_image, num_parallel_calls=AUTOTUNE) \
            .batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
    
    
def plot_history(hist):
    """Function plots a chart with training and validation metrics.
    :param hist: Tensorflow history object from model.fit()
    """
    # Losses and metrics
    loss = hist.history['loss']
    val_loss = hist.history['val_loss']
    acc = hist.history['sparse_categorical_accuracy']
    val_acc = hist.history['val_sparse_categorical_accuracy']

    # Epochs to plot along x axis
    x_axis = range(1, len(loss) + 1)

    fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, sharex=True)

    ax1.plot(x_axis, loss, 'bo', label='Training')
    ax1.plot(x_axis, val_loss, 'ro', label='Validation')
    ax1.set_title('Training and validation loss')
    ax1.set_ylabel('Loss')
    ax1.legend()

    ax2.plot(x_axis, acc, 'bo', label='Training')
    ax2.plot(x_axis, val_acc, 'ro', label='Validation')
    ax2.set_title('Training and validation accuracy')
    ax2.set_xlabel('Epochs')
    ax2.set_ylabel('Accuracy')
    ax2.legend()

    plt.tight_layout()
    plt.show()

### Data processing

In [None]:
# Find paths to all test images
test_paths = glob.glob(f'{TEST_DIRECTORY}/*/*/*/*.jpg')

# DataFrame for predicted values
test_prediction = pd.DataFrame()
test_prediction['path'] = test_paths
test_prediction['id'] = test_prediction['path'].apply(path_to_id)

n_test_samples = len(test_prediction)
print(f'Test data: {n_test_samples} images')
test_prediction.head()

In [None]:
data_train = pd.read_csv(TRAIN_METADATA)
data_train['path'] = data_train['id'].apply(lambda x: id_to_path(x, TRAIN_DIRECTORY))
n_train_samples = len(data_train)
print(f'Train data: {n_train_samples} images')
data_train.head()

In [None]:
# To save time when saving the notebook - check total number of train images.
if n_train_samples == 1_580_470:
    # If the number of available images is limited
    full_size_training = False
    EPOCHS = 10
    print('WARNING: Training will be limited to a portion of the train data.')
else:
    # Full size training and inference mode
    full_size_training = True
    print('INFO: Full size training and inference will be executed.')

In [None]:
# Visualize class imbalance
images_per_class = data_train['landmark_id'].value_counts()
n_classes = len(images_per_class)
samples_distribution(images_per_class)

In [None]:
# Missing classes in the train set
largest_id = data_train["landmark_id"].max()
print(f'Largest landmark ID: {largest_id}')
print(f'Number of missing classes: {largest_id - n_classes}')

In [None]:
# This trick is to save the notebook faster.
# It would not apply in real inference execution.
if not full_size_training:
    selected_classes = data_train['landmark_id'].value_counts().head(TOP_CLASSES).index
    data_train = data_train.loc[data_train['landmark_id'].isin(selected_classes), :]
    n_classes = TOP_CLASSES
    print(f'Limited train data to {TOP_CLASSES} most frequent landmark IDs: {len(data_train)} samples.')

In [None]:
# Assign new integer IDs to all classes starting from 0 without any gaps
encoder = LabelEncoder()
data_train['class_id'] = encoder.fit_transform(data_train['landmark_id'])
data_train.head()

In [None]:
# Select train and validation images so that all classes are present in both sets
train_paths, valid_paths, train_labels, valid_labels = train_test_split(
    data_train['path'], data_train['class_id'], test_size=VAL_SIZE,
    shuffle=True, stratify=data_train['class_id'])
print(f'Train set: {len(train_paths)} samples\n'
      f'Validation set: {len(valid_paths)} samples')

In [None]:
# Create tensorflow datasets
train_ds = get_dataset(train_paths, train_labels)
valid_ds = get_dataset(valid_paths, valid_labels)

### Model architecture

In [None]:
# Block for data augmentation inside the model
augmentation = tf.keras.models.Sequential(
    [
        tf.keras.layers.experimental.preprocessing.RandomRotation(factor=(-0.15, 0.15)),
        tf.keras.layers.experimental.preprocessing.RandomTranslation(
            height_factor=(-0.1, 0.1), width_factor=(-0.1, 0.1)),
        tf.keras.layers.experimental.preprocessing.RandomContrast(factor=(0., 0.1)),
        tf.keras.layers.experimental.preprocessing.RandomCrop(IMG_SIZE, IMG_SIZE),
        tf.keras.layers.experimental.preprocessing.RandomZoom(
            height_factor=(-0.15, 0.15), width_factor=None)
    ],
    name='image_augmentation',
)

# Pretrained image classification model
image_model = tf.keras.models.load_model(IMG_MODEL)

# Freeze weights in the original model
image_model.trainable = False

model = tf.keras.models.Sequential(
    [
        tf.keras.layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3)),
        augmentation,  # This block works only in training mode
        image_model,  # Pretrained model with all layers frozen
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(DROPOUT_RATE, name='top_dropout'),
        tf.keras.layers.Dense(n_classes, activation='softmax', name='class_proba')
    ]
)

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

model.summary()

### Model training

In [None]:
print('Started training the model with original layers frozen.')
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=PATIENCE, restore_best_weights=True)

history = model.fit(train_ds, validation_data=valid_ds,
                    epochs=EPOCHS, callbacks=[early_stop],
                    use_multiprocessing=True,
                    workers=-1)

In [None]:
gc.collect()

In [None]:
plot_history(history)

### Prediction for the test set

In [None]:
print(f'Started inference for {n_test_samples} images.')

In [None]:
# The number of test images could be much larger and cause memory errors.
# To avoid it process the test data in chunks of size 5,000.
start_idx = 0
end_idx = 4_999

# To add predicted classes and probabilities for all chunks of images
chunks = []

while start_idx < n_test_samples:
    print(f'Processing rows: {start_idx} - {end_idx}')
    test_ds = get_dataset(test_prediction.loc[start_idx:end_idx, 'path'])
    pred_proba = model.predict(test_ds,
                               use_multiprocessing=True,
                               workers=-1)

    cur_chunk = pd.DataFrame()
    cur_chunk['class_id'] = tf.math.argmax(pred_proba, axis=1)
    cur_chunk['landmark_id'] = encoder.inverse_transform(cur_chunk['class_id'])
    cur_chunk['proba'] = np.max(pred_proba, axis=1)

    chunks.append(cur_chunk)

    start_idx += 5_000
    end_idx += 5_000

    if end_idx >= n_test_samples:
        end_idx = n_test_samples - 1

    gc.collect()

In [None]:
# Concatenate all chunks and add them to the combined dataframe
test_prediction = pd.concat(
    [
        test_prediction,
        pd.concat(chunks, ignore_index=True)
    ],
    axis=1
)

In [None]:
# Sort all collected data by probabilities
test_prediction.sort_values(by='proba', ascending=False, inplace=True)

# Combine predicted class and probability into a single string
test_prediction['landmark_id'] = test_prediction['landmark_id'].astype('str')
test_prediction['landmarks'] = test_prediction[['landmark_id', 'proba']].values.tolist()
test_prediction['landmarks'] = test_prediction['landmarks'].apply(lambda x: f'{x[0]} {round(x[1], 3)}')
test_prediction.head()

In [None]:
# Save results to file
test_prediction[['id', 'landmarks']].to_csv('submission.csv', index=False)
test_prediction[['id', 'landmarks']].head()