In [14]:
# SIFT Features, ORB Features, Deep
# Cosine, euclidean, etc
# BoW, VLAD
# Differnt k (# of clusters)

In [15]:
import json
import numpy as np
import h5py
import matplotlib.pyplot as plt
import os
from tqdm.notebook import tqdm
from skimage.color import rgb2gray
from skimage.feature import SIFT, ORB
from sklearn.cluster import KMeans
from sklearn import preprocessing
import pickle
import math

In [16]:
# map
with open("data02/database/database_lite.json", "r") as f:
    m_idx = json.load(f)
    m_imgs = np.array(m_idx["im_paths"])
    m_loc = np.array(m_idx["loc"])

# query
with open("data02/query/query_lite.json", "r") as f:
    q_idx = json.load(f)
    q_imgs = np.array(q_idx["im_paths"])
    q_loc = np.array(q_idx["loc"])

with h5py.File("data02/london_lite_gt.h5", "r") as f:
    fovs = f["fov"][:]
    sim = f["sim"][:].astype(np.uint8)

In [17]:
def extract_all_descriptors(extractor):
    descriptors = None

    for img_name in tqdm(m_imgs):
        img = plt.imread(os.path.join('data02', img_name))
        img = rgb2gray(img)

        d = extractor(img)

        if descriptors is None:
            descriptors = d
        else:
            descriptors = np.vstack((descriptors, d))

    return descriptors

In [18]:
def find_centroids(descriptors, k):
    kmeans = KMeans(n_clusters=k, random_state=0, n_init=5, verbose=0)
    clusters = kmeans.fit(descriptors)
    return clusters.cluster_centers_


In [19]:
def manhatten(matrix, vector):
    return np.linalg.norm(matrix - vector, axis=1, ord=1)

def infinity(matrix, vector):
    return np.linalg.norm(matrix - vector, axis=1, ord=math.inf)

def eucledian(matrix, vector):
    return np.linalg.norm(matrix - vector, axis=1)

def cosine(matrix, vector):
    dists = np.zeros(matrix.shape[0])
    for i in range(matrix.shape[0]):
        dists[i] = np.dot(matrix[i], vector) / (np.linalg.norm(matrix[i]) * np.linalg.norm(vector))

    return -dists

In [20]:
def extract_with_sift(image):
    sift = SIFT()
    sift.detect_and_extract(image)
    return sift.descriptors

def extract_with_orb(image):
    orb = ORB()
    orb.detect_and_extract(image)
    return orb.descriptors

In [21]:
def bag_of_words(centroids, img_descriptors, distance_metric):
    n_centroids = centroids.shape[0]  # number of centroids found with the KMeans clustering
    n_descriptors = img_descriptors.shape[0]  # number of descriptors extracted from the image

    bow_vector = np.zeros(n_centroids)

    for i in range(n_descriptors):
        dists = distance_metric(centroids, img_descriptors[i])
        idx = np.argmin(dists)
        bow_vector[idx] += 1
    return bow_vector

In [22]:
def create_all_bow_vectors(extractor, centroids, distance_metric):
    bow_map_images = None
    for img_name in tqdm(m_imgs):
        img = plt.imread(os.path.join('data02', img_name))
        img = rgb2gray(img)

        img_descriptors = extractor(img)  # descriptors (the feature vectors)

        # compute BoW representation of the image (using the basic 'words', i.e. centroids, computed earlier)
        bow = bag_of_words(centroids, img_descriptors, distance_metric)
        # add the computed BoW vector to the set of map representations
        if bow_map_images is None:
            bow_map_images = bow
        else:
            bow_map_images = np.vstack( (bow_map_images, bow))

    return bow_map_images

In [23]:
def recall_at_k(relevant, retrieved, k):
    return np.sum(np.in1d(relevant, retrieved[:k])) / len(relevant)

In [24]:
def precision_at_k(relevant, retrieved, k):
    tp = np.sum(np.in1d(relevant, retrieved[:k]))
    return tp / k

def average_precision(relevant, retrieved):
    # BEGIN ANSWER
    avg_prec = 0
    for doc in relevant:
        k = np.where(retrieved == doc)[0][0] + 1
        avg_prec += precision_at_k(relevant, retrieved[:k], k)
    return avg_prec/len(relevant)

def mean_average_precision(all_relevant, all_retrieved):
    # BEGIN ANSWER
    total = 0
    count = len(all_retrieved)

    for qid in range(len(all_retrieved)):
        avg_precision = average_precision(all_relevant[qid], all_retrieved[qid])
        total += avg_precision
    # END ANSWER
    return total / count

def average_recall_at_k(all_relevant, all_retrieved, k):
    running_recall = 0
    for relevant, retrieved in zip(all_relevant, all_retrieved):
        r_k = recall_at_k(relevant, retrieved, k)
        running_recall += r_k

    return running_recall / len(all_relevant)

In [25]:
def run_all_queries(feature_extractor, centroids, distance_metric, scaler):
    all_relevant_images = []
    all_retrieved_images = []
    for query_idx in tqdm(range(len(q_imgs))):
        img = plt.imread(os.path.join("data02", q_imgs[query_idx]))
        img = rgb2gray(img)
        # compute bag of words
        query_img_descriptors = feature_extractor(img)
        bow = bag_of_words(centroids, query_img_descriptors, distance_metric)

        bow = scaler.transform(bow.reshape(-1, 1).transpose())
        bow = bow.transpose().reshape(-1)

        # Retrieve the indices of the top-10 similar images from the map
        # retrieved_images = np.argsort(np.linalg.norm(bow_map_images - bow, axis=1))
        dists = distance_metric(bow_map_images, bow)

        retrieved_images = np.argsort(np.array(dists))
        # retrieved_images = np.argsort(eucledian(bow_map_images, bow))
    #     print('Indices of similar images retrieved: ', retrieved_images[:10])
        all_retrieved_images.append(retrieved_images)
        # Indices of the relevant map images for the query: we have the relevance judgements (Ground truth)
        relevant_images = np.where(sim[query_idx, :] == 1)[0]
    #     print('Indices of relevant images (given in the GT relevance judgements): ', relevant_images)
        all_relevant_images.append(relevant_images)

    return all_retrieved_images, all_relevant_images

In [26]:
for feature_extractor in [extract_with_orb, extract_with_sift]:
    descriptors = extract_all_descriptors(feature_extractor)
    for n_centroids in [10, 30, 50]:
        centroids = find_centroids(descriptors, n_centroids)

        for distance_metric in [manhatten, infinity, cosine, eucledian]:
            bow_map_images = create_all_bow_vectors(feature_extractor, centroids, distance_metric)

            scaler = preprocessing.StandardScaler()
            bow_map_images = scaler.fit_transform(bow_map_images)

            all_retrieved_images, all_relevant_images = run_all_queries(feature_extractor, centroids, distance_metric, scaler)

            print("-------------")
            print(f"Feature type: {feature_extractor.__name__}")
            print(f"# of centroids: {n_centroids}")
            print(f"Distance metric: {distance_metric.__name__}")
            print("")
            mAP = mean_average_precision(all_relevant_images, all_retrieved_images)
            print(f"mAP: {mAP}")
            for k in [1, 5, 10]:
                r_k = average_recall_at_k(all_relevant_images, all_retrieved_images, k)
                print(f"Recall@{k}: {r_k}")

            print("-------------")

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_orb
# of centroids: 10
Distance metric: manhatten

mAP: 0.0251674880713988
Recall@1: 0.004842857142857142
Recall@5: 0.013752331002331005
Recall@10: 0.02000429178664473
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_orb
# of centroids: 10
Distance metric: infinity

mAP: 0.022966680297401634
Recall@1: 0.004462698412698412
Recall@5: 0.01111870525988173
Recall@10: 0.02212039017627252
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_orb
# of centroids: 10
Distance metric: cosine

mAP: 0.022769958098497165
Recall@1: 0.0027429217840982544
Recall@5: 0.010071036262212736
Recall@10: 0.021821596486302355
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_orb
# of centroids: 10
Distance metric: eucledian

mAP: 0.022677356197821914
Recall@1: 0.0019
Recall@5: 0.013665021906198378
Recall@10: 0.025646954679307613
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_orb
# of centroids: 30
Distance metric: manhatten

mAP: 0.027507417274331626
Recall@1: 0.003638095238095238
Recall@5: 0.019941456582633046
Recall@10: 0.030829089211442142
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_orb
# of centroids: 30
Distance metric: infinity

mAP: 0.021719312233618112
Recall@1: 0.002753571428571428
Recall@5: 0.013058397974574443
Recall@10: 0.020098404046933454
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_orb
# of centroids: 30
Distance metric: cosine

mAP: 0.029978298575872902
Recall@1: 0.0073026455026455024
Recall@5: 0.013801058201058206
Recall@10: 0.02525137155872448
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_orb
# of centroids: 30
Distance metric: eucledian

mAP: 0.026328925852484877
Recall@1: 0.0048095238095238095
Recall@5: 0.012530952380952387
Recall@10: 0.028375461466637924
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_orb
# of centroids: 50
Distance metric: manhatten

mAP: 0.027522937205240027
Recall@1: 0.005042250233426706
Recall@5: 0.017774183006535944
Recall@10: 0.025145059134765002
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_orb
# of centroids: 50
Distance metric: infinity

mAP: 0.02213998345551891
Recall@1: 0.004523809523809524
Recall@5: 0.009372397645927058
Recall@10: 0.017741154379389674
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_orb
# of centroids: 50
Distance metric: cosine

mAP: 0.030719792837923515
Recall@1: 0.005094872393401805
Recall@5: 0.01826861428332016
Recall@10: 0.03249136560607146
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_orb
# of centroids: 50
Distance metric: eucledian

mAP: 0.026260310247298583
Recall@1: 0.0025
Recall@5: 0.017478944911297856
Recall@10: 0.030577357609710542
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_sift
# of centroids: 10
Distance metric: manhatten

mAP: 0.03450626281542902
Recall@1: 0.0050728937728937735
Recall@5: 0.022197569097569095
Recall@10: 0.03605120516738161
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_sift
# of centroids: 10
Distance metric: infinity

mAP: 0.031180127122121978
Recall@1: 0.00479471916971917
Recall@5: 0.021100427350427345
Recall@10: 0.03477085218702864
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_sift
# of centroids: 10
Distance metric: cosine

mAP: 0.04174589207407814
Recall@1: 0.00651031746031746
Recall@5: 0.026438411261940672
Recall@10: 0.04679875418698946
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_sift
# of centroids: 10
Distance metric: eucledian

mAP: 0.0375270778727779
Recall@1: 0.006586580086580086
Recall@5: 0.02481887556887556
Recall@10: 0.04408958263958261
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_sift
# of centroids: 30
Distance metric: manhatten

mAP: 0.0408721346602413
Recall@1: 0.009054761904761904
Recall@5: 0.028164472455648914
Recall@10: 0.0475313258636788
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_sift
# of centroids: 30
Distance metric: infinity

mAP: 0.03016475203586035
Recall@1: 0.004585714285714287
Recall@5: 0.019653846153846157
Recall@10: 0.03610562019679666
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_sift
# of centroids: 30
Distance metric: cosine

mAP: 0.05485412416593428
Recall@1: 0.014490266106442579
Recall@5: 0.03549752567693743
Recall@10: 0.052732536417830524
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_sift
# of centroids: 30
Distance metric: eucledian

mAP: 0.04369101592253944
Recall@1: 0.010204761904761906
Recall@5: 0.03235912698412698
Recall@10: 0.04909548947784239
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_sift
# of centroids: 50
Distance metric: manhatten

mAP: 0.04243599542954771
Recall@1: 0.007950793650793651
Recall@5: 0.0288876984126984
Recall@10: 0.05014428882664175
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_sift
# of centroids: 50
Distance metric: infinity

mAP: 0.030491810255927787
Recall@1: 0.006600094350094351
Recall@5: 0.01905046783282077
Recall@10: 0.032184781558310956
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_sift
# of centroids: 50
Distance metric: cosine

mAP: 0.06279949662429402
Recall@1: 0.01472219887955182
Recall@5: 0.039534502588914354
Recall@10: 0.06838142853878147
-------------


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

-------------
Feature type: extract_with_sift
# of centroids: 50
Distance metric: eucledian

mAP: 0.04725453894508526
Recall@1: 0.011248412698412697
Recall@5: 0.03212289377289377
Recall@10: 0.0571637814799579
-------------
