- This Notebook is an attempt to do a simple image search using autoencoders.
- The image reconstructed is still blurry.

In [None]:
%matplotlib inline

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
import re
import math, random

from tqdm import tqdm
from glob import glob
import gc

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display

plt.rcParams["figure.figsize"] = (8, 8)
plt.rcParams['axes.titlesize'] = 8

from kaggle_datasets import KaggleDatasets

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import Input, Model
import tensorflow.keras.layers as L
import tensorflow.keras.backend as K

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from tensorflow.keras.metrics import categorical_accuracy


from time import time, strftime, gmtime

start = time()
print(start)

import datetime
print(str(datetime.datetime.now()))

In [None]:
train = pd.read_csv('../input/landmark-retrieval-2020/train.csv')
print(train.shape)
train

In [None]:
landmark_counts = train['landmark_id'].value_counts().reset_index().rename(columns = {'landmark_id': 'count', 'index': 'landmark_id'})
landmark_counts = landmark_counts.sort_values('count')
landmark_counts

__Taking only the topp 10 classes__

In [None]:
top_100 = landmark_counts.tail(100).reset_index(drop = True)
top_100 = train.loc[train['landmark_id'].isin(top_100['landmark_id'])].reset_index(drop = True)
top_100

> __Create a dataframe from top 100 classes with each class containing 300 images - simplicity purpose__

In [None]:
df = pd.DataFrame()

for each in tqdm(top_100['landmark_id'].unique()):
    temp = top_100.loc[top_100['landmark_id'] == each]
    temp = temp.sample(300)
    df = pd.concat([df, temp], ignore_index = True)
print(df.shape)
df

In [None]:
num_classes = df['landmark_id'].nunique()
num_classes

In [None]:
lbl = LabelEncoder()

df['landmark_id'] = lbl.fit_transform(df['landmark_id'])
df

__Adding Image path to the df as a column__
- Thanks to this notebook for the below code https://www.kaggle.com/derinformatiker/landmark-retrieval-all-paths

In [None]:
def get_paths(sub):
    index = ["0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f"]

    paths = []

    for a in index:
        for b in index:
            for c in index:
                try:
                    paths.extend([f"{sub}/{a}/{b}/{c}/" + x for x in os.listdir(f"/kaggle/input/landmark-retrieval-2020/{sub}/{a}/{b}/{c}")])
                except:
                    pass

    return paths

In [None]:
df_path = df.copy()

rows = []
for i in tqdm(range(len(df))):
    row = df.iloc[i]
    path  = list(row["id"])[:3]
    temp = row["id"]
    row["id"] = f"train/{path[0]}/{path[1]}/{path[2]}/{temp}.jpg"
    rows.append(row["id"])
    
rows = pd.DataFrame(rows)
df_path["id"] = rows

In [None]:
print(df_path.shape)
df_path.head()

In [None]:
def plot_images_random(data, nrows, ncols, title = None):
    plt.suptitle(title, fontsize = 16)
    plt.figure(figsize = (16, 16))
    plt.rcParams["axes.grid"] = False
    for i, img_id  in enumerate(np.random.choice(data['id'], nrows * ncols)):
        try:
            img = cv2.imread('/kaggle/input/landmark-retrieval-2020/' + img_id)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, (224, 224))
            plt.subplot(nrows, ncols, i + 1)
            plt.imshow(img)
        except:
            pass

In [None]:
plot_images_random(df_path, 3, 4, 'Images from Train folder')

In [None]:
dim = [224, 224]

In [None]:
# Detect hardware, return appropriate distribution strategy
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection. No parameters necessary if TPU_NAME environment variable is set. On Kaggle this is always the case.
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy() # default distribution strategy in Tensorflow. Works on CPU and single GPU.

AUTO = tf.data.experimental.AUTOTUNE

print("REPLICAS: ", strategy.num_replicas_in_sync)

BATCH_SIZE = 16 * strategy.num_replicas_in_sync
print(BATCH_SIZE)

GCS_DS_PATH = KaggleDatasets().get_gcs_path()
print(GCS_DS_PATH)

In [None]:
#Check the GCS path bucket - helps alot
!gsutil ls $GCS_DS_PATH

In [None]:
def format_path(pt):
    return os.path.join(GCS_DS_PATH, pt)

In [None]:
train_paths = df_path['id'].apply(format_path).values

from tensorflow.keras.utils import to_categorical

train_targets = to_categorical(df_path['landmark_id'].values, num_classes = num_classes)

train_paths[:2], train_targets.shape

In [None]:
train_path, valid_path, train_label, valid_label = train_test_split(train_paths, train_targets, test_size = 0.05, random_state = 2019)
print(train_path.shape, train_label.shape, valid_path.shape, valid_label.shape)

In [None]:
def decode_image(filename, label = None, image_size = dim):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels = 3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, image_size)
    
    if label is None:
        return image
    else:
        return image, image

def data_augment(image, label = None):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    
    if label is None:
        return image
    else:
        return image, image

In [None]:
train_dataset = (
    tf.data.Dataset
    .from_tensor_slices((train_path, train_label))
    .map(decode_image, num_parallel_calls = AUTO)
    .cache()
    .repeat()
    .shuffle(1024)
    .batch(BATCH_SIZE)
    .prefetch(AUTO)
)

valid_dataset = (
    tf.data.Dataset
    .from_tensor_slices((valid_path, valid_label))
    .map(decode_image, num_parallel_calls = AUTO)
    .batch(BATCH_SIZE)
    .cache()
    .prefetch(AUTO)
)

In [None]:
train_dataset, valid_dataset

__Visualize images in the train and valid datasets__

In [None]:
image_batch, label_batch = next(iter(train_dataset))

plt.figure(figsize = (10, 10))
for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(image_batch[i].numpy())
    label = label_batch[i]
    plt.title(np.argmax(label))
    plt.axis("off")

In [None]:
image_batch, label_batch = next(iter(valid_dataset))

plt.figure(figsize = (10, 10))
for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(image_batch[i].numpy())
    label = label_batch[i]
    plt.title(np.argmax(label))
    plt.axis("off")

In [None]:
LR = 1e-4
EPOCHS = 80
ch_dim = -1
hidden = 16
ch = 3

__Simple Autoencoder__

In [None]:
with strategy.scope():
    #Encoder
    inputs = Input(shape = (*dim, ch))

    x = L.Conv2D(128, (3, 3), strides = 2, padding = 'same')(inputs)
    x = L.LeakyReLU(alpha = 0.2)(x)
    x = L.BatchNormalization(axis = ch_dim)(x)
    
    x = L.Conv2D(64, (3, 3), strides = 2, padding = 'same')(x)
    x = L.LeakyReLU(alpha = 0.2)(x)
    x = L.BatchNormalization(axis = ch_dim)(x)
    
    x = L.Conv2D(32, (3, 3), strides = 2, padding = 'same')(x)
    x = L.LeakyReLU(alpha = 0.2)(x)
    x = L.BatchNormalization(axis = ch_dim)(x)

    enc_size = K.int_shape(x)
    #print(enc_size)

    x = L.Flatten()(x)
    encoder_output = L.Dense(hidden, name = 'Encoder')(x)

    encoder_model = Model(inputs = inputs, outputs = encoder_output, name = 'encoder_model')

    #Decoder
    x = L.Dense(np.prod(enc_size[1: ]))(encoder_output)

    x = L.Reshape((enc_size[1], enc_size[2], enc_size[3]))(x)

    x = L.Conv2DTranspose(32, (3, 3), strides = 2, padding = 'same')(x)
    x = L.LeakyReLU(alpha = 0.2)(x)
    x = L.BatchNormalization(axis = ch_dim)(x)
    
    x = L.Conv2DTranspose(64, (3, 3), strides = 2, padding = 'same')(x)
    x = L.LeakyReLU(alpha = 0.2)(x)
    x = L.BatchNormalization(axis = ch_dim)(x)
    
    x = L.Conv2DTranspose(128, (3, 3), strides = 2, padding = 'same')(x)
    x = L.LeakyReLU(alpha = 0.2)(x)
    x = L.BatchNormalization(axis = ch_dim)(x)

    x = L.Conv2D(ch, (3, 3), padding = 'same')(x)
    out = L.Activation('sigmoid', name = 'Decoder')(x)

    autoencoder = Model(inputs = inputs, outputs = out, name = 'autoencoder')

    opt = Adam(lr = LR, decay = LR / EPOCHS)
    autoencoder.compile(loss = tf.keras.losses.MeanSquaredError(), optimizer = opt)

    autoencoder.summary()

In [None]:
encoder_model.summary()

In [None]:
STEPS_PER_EPOCH = train_label.shape[0] // BATCH_SIZE

checkpoint = ModelCheckpoint('auto_tpu_model.h5', monitor = 'val_loss', save_best_only = True, verbose = 1)

reduce_lr = ReduceLROnPlateau(monitor = 'loss', factor = 0.1, patience = 5, min_lr = 0.0001, verbose = 1)

early = EarlyStopping(monitor = 'val_loss', patience = 10, verbose = 1, mode = 'auto')

In [None]:
history = autoencoder.fit(train_dataset,  epochs = EPOCHS, batch_size = BATCH_SIZE,
                    steps_per_epoch = STEPS_PER_EPOCH,
                    validation_data = valid_dataset,
                   verbose = 1, callbacks = [checkpoint, early, reduce_lr]
                   )
gc.collect()

In [None]:
encoder_model.save('./EncoderModel_tpu.h5')

In [None]:
history.history.keys()

In [None]:
pd.DataFrame(history.history).plot(y = ['loss', 'val_loss'], logy = False)
plt.xlabel("Epochs")
plt.ylabel("Loss")

__Visualize Autoencoder outputs__

In [None]:
def visualize_autoencoder(img, encoder, decoder):
    """Draws original, encoded and decoded images"""
    # img[None] will have shape of (1, 32, 32, 3) which is the same as the model input
    encoder_output = encoder.predict(img[None])
    recon = decoder.predict(img[None])

    plt.figure(figsize = (12, 12))
    plt.subplot(1,3,1)
    plt.title("Original")
    plt.imshow(img)

    plt.subplot(1,3,2)
    plt.title("Encoder Output")
    plt.imshow(encoder_output.reshape([encoder_output.shape[-1] // 2, -1]))

    plt.subplot(1,3,3)
    plt.title("Reconstructed")
    plt.imshow(recon.squeeze())
    plt.show()

image_batch, _ = next(iter(train_dataset))

for i in range(5):
    visualize_autoencoder(image_batch[i], encoder_model, autoencoder)

__Create test and index dataframes with thier respective paths__

In [None]:
test_paths = get_paths('test')
test_df = pd.DataFrame(test_paths, columns = ['id'])
test_df

In [None]:
index_paths = get_paths('index')
index_df = pd.DataFrame(index_paths, columns = ['id'])
index_df

In [None]:
test_paths = test_df['id'].apply(format_path).values
test_dataset = (
    tf.data.Dataset
    .from_tensor_slices(test_paths)
    .map(decode_image, num_parallel_calls = AUTO)
    .batch(BATCH_SIZE)
)
test_dataset

In [None]:
test_batch = next(iter(test_dataset))

plt.figure(figsize = (10, 10))
for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(test_batch[i].numpy())
    plt.title('Test Images')
    plt.axis("off")

__Test Image Embeddings obtained by making predictions using Encoder Model__

In [None]:
test_emb = encoder_model.predict(test_dataset, verbose = 1)
np.save('./test_embs.npy', test_emb)

In [None]:
index_paths = index_df['id'].apply(format_path).values

index_dataset = (
    tf.data.Dataset
    .from_tensor_slices(index_paths)
    .map(decode_image, num_parallel_calls = AUTO)
    .batch(BATCH_SIZE)
)
index_dataset

In [None]:
index_batch = next(iter(index_dataset))

plt.figure(figsize = (10, 10))
for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(index_batch[i].numpy())
    plt.title('Index Images')
    plt.axis("off")

__Index Image Embeddings obtained by making predictions using Encoder Model__

In [None]:
index_emb = encoder_model.predict(index_dataset, verbose = 1)
np.save('./index_embs.npy', index_emb)

In [None]:
test_emb.shape, index_emb.shape

__Finding the embedding distances__

In [None]:
def euclidean(a, b):
    #compute and return the euclidean distance between two vectors
    return np.linalg.norm(a - b)

In [None]:
dist = []
test_ret = {}
knn = 10
for i, test_img in enumerate(test_emb):
    for index_img in index_emb:
        dist.append(euclidean(test_img, index_img))
    d = {i: np.argsort(dist)[:knn]}
    test_ret.update(d)
    dist = []
len(test_ret), test_ret

In [None]:
def plot_predictions(keys, values, nrows = 2, ncols = 5):
    plt.title('Test Image {}'.format(keys), fontsize = 12)
    plt.rcParams["axes.grid"] = False
    img = cv2.imread('/kaggle/input/landmark-retrieval-2020/' + test_df['id'].iloc[keys])
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (224, 224))
    plt.imshow(img)
    plt.figure(figsize = (16, 16))
    for i, ind  in enumerate(values):
        img = cv2.imread('/kaggle/input/landmark-retrieval-2020/' + index_df['id'].iloc[ind])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (224, 224))
        plt.subplot(nrows, ncols, i + 1)
        plt.imshow(img)
        plt.title('Index Images {}'.format(ind), fontsize = 16)

In [None]:
keys = random.sample(test_ret.keys(), 5)

In [None]:
plot_predictions(keys[0], test_ret[keys[0]])

In [None]:
plot_predictions(keys[1], test_ret[keys[1]])

In [None]:
plot_predictions(keys[2], test_ret[keys[2]])

In [None]:
plot_predictions(keys[3], test_ret[keys[3]])

In [None]:
plot_predictions(keys[4], test_ret[keys[4]])

In [None]:
finish = time()
print(strftime("%H:%M:%S", gmtime(finish - start)))