In [14]:
import json
import numpy as np
import h5py
import matplotlib.pyplot as plt
import os
from tqdm.notebook import tqdm
from skimage.color import rgb2gray
from skimage.feature import SIFT
from sklearn.cluster import KMeans

In [2]:
# map
with open("data02/database/database_lite.json","r") as f:
    m_idx = json.load(f)
    m_imgs = np.array(m_idx["im_paths"])
    m_loc=np.array(m_idx["loc"])

# query
with open("data02/query/query_lite.json","r") as f:
    q_idx=json.load(f)
    q_imgs=np.array(q_idx["im_paths"])
    q_loc=np.array(q_idx["loc"])

In [3]:
with h5py.File("data02/london_lite_gt.h5","r") as f:
    fovs = f["fov"][:]
    sim = f["sim"][:].astype(np.uint8)

In [4]:
def bag_of_words(centroids, img_descriptors):
    n_centroids = centroids.shape[0]  # number of centroids found with the KMeans clustering
    n_descriptors = img_descriptors.shape[0]  # number of descriptors extracted from the image

    bow_vector = np.zeros(n_centroids)

    for i in range(n_descriptors):
        idx = np.argmin(np.linalg.norm(centroids - img_descriptors[i], axis=1), axis=0)
        bow_vector[idx] += 1
    return bow_vector

In [12]:
descriptors = None
sift = SIFT()

for img_name in tqdm(m_imgs):
    img = plt.imread(os.path.join('data02', img_name))
    img = rgb2gray(img)

    sift.detect_and_extract(img)
    print(sift.descriptors.shape)

    if descriptors is None:
        descriptors = sift.descriptors
    else:
        descriptors = np.vstack((descriptors, sift.descriptors))



  0%|          | 0/1000 [00:00<?, ?it/s]

(761, 128)
(988, 128)
(964, 128)
(881, 128)
(933, 128)
(1000, 128)
(1450, 128)
(1344, 128)
(1454, 128)
(1574, 128)
(1390, 128)
(1568, 128)
(1535, 128)
(1572, 128)
(1540, 128)
(1564, 128)
(1561, 128)
(1571, 128)
(1587, 128)
(1553, 128)
(1532, 128)
(1335, 128)
(1337, 128)
(1026, 128)
(972, 128)
(1103, 128)
(1080, 128)
(1017, 128)
(1189, 128)
(1025, 128)
(750, 128)
(791, 128)
(968, 128)
(922, 128)
(515, 128)
(576, 128)
(381, 128)
(112, 128)
(241, 128)
(324, 128)
(267, 128)
(265, 128)
(313, 128)
(419, 128)
(492, 128)
(567, 128)
(610, 128)
(491, 128)
(564, 128)
(561, 128)
(506, 128)
(597, 128)
(359, 128)
(588, 128)
(702, 128)
(537, 128)
(554, 128)
(523, 128)
(524, 128)
(535, 128)
(528, 128)
(393, 128)
(486, 128)
(675, 128)
(829, 128)
(898, 128)
(672, 128)
(934, 128)
(1051, 128)
(652, 128)
(841, 128)
(678, 128)
(575, 128)
(614, 128)
(529, 128)
(344, 128)
(444, 128)
(599, 128)
(777, 128)
(790, 128)
(458, 128)
(638, 128)
(612, 128)
(275, 128)
(972, 128)
(786, 128)
(443, 128)
(303, 128)
(403, 1

In [13]:
import pickle
# save descriptors (uncomment if you want to save the computed descriptors)
f = open('data02/SIFT-descriptors-map.bin', 'wb')
data = pickle.dump(descriptors, f)
f.close()

# load pre-computed descriptors
# f = open('data02/ORB-descriptors-map.bin', 'rb')
# descriptors = pickle.load(f)
# f.close()

In [15]:
K = 32
num_initialization = 5

kmeans = KMeans(n_clusters=K, random_state=0, n_init=num_initialization, verbose=1)
clusters = kmeans.fit(descriptors)
centroids = clusters.cluster_centers_

Initialization complete


Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md



Iteration 0, inertia 107218109386.0.
Iteration 1, inertia 71426241942.62704.
Iteration 2, inertia 70141469465.20734.
Iteration 3, inertia 69546428627.36285.
Iteration 4, inertia 69193179570.66953.
Iteration 5, inertia 68965577197.79855.
Iteration 6, inertia 68814847059.17447.
Iteration 7, inertia 68715185267.23842.
Iteration 8, inertia 68648507141.49531.
Iteration 9, inertia 68600669792.47072.
Iteration 10, inertia 68563703273.752426.
Iteration 11, inertia 68533110814.37239.
Iteration 12, inertia 68505831241.02156.
Iteration 13, inertia 68480476017.62596.
Iteration 14, inertia 68456862018.86252.
Iteration 15, inertia 68433933378.50716.
Iteration 16, inertia 68412088655.69012.
Iteration 17, inertia 68391594521.75092.
Iteration 18, inertia 68372762229.10057.
Iteration 19, inertia 68356172778.47818.
Iteration 20, inertia 68342127666.788055.
Iteration 21, inertia 68329894610.33148.
Iteration 22, inertia 68319355432.65659.
Iteration 23, inertia 68309854061.42056.
Iteration 24, inertia 68301

In [16]:
bow_map_images = None

for img_name in tqdm(m_imgs):
    img = plt.imread(os.path.join('data02', img_name))
    img = rgb2gray(img)

    sift.detect_and_extract(img)
    img_descriptors = sift.descriptors  # descriptors (the feature vectors)

    # compute BoW representation of the image (using the basic 'words', i.e. centroids, computed earlier)
    bow = bag_of_words(centroids, img_descriptors)
    # add the computed BoW vector to the set of map representations
    if bow_map_images is None:
        bow_map_images = bow
    else:
        bow_map_images = np.vstack( (bow_map_images, bow))

  0%|          | 0/1000 [00:00<?, ?it/s]

In [17]:
from sklearn import preprocessing
orig_bow_map_images = bow_map_images

# Compute z-score statistics
scaler = preprocessing.StandardScaler().fit(bow_map_images)
# Normalize the vectors of the map collection (0 mean and 1 std)
bow_map_images = scaler.transform(bow_map_images)

In [18]:
def retrieve_images(map_bow_vectors, query_bow):
    return np.argsort(np.linalg.norm(map_bow_vectors - query_bow, axis=1))

In [19]:
# Retrieve the most similar images to query image 221 (index 221-1=220)
query_idx = 220

img = plt.imread("data02/" + q_imgs[query_idx])
img = rgb2gray(img)
# compute bag of words
sift.detect_and_extract(img)
query_img_descriptors = sift.descriptors
bow = bag_of_words(centroids, query_img_descriptors)

# Normalize the query BoW vector using the mean and variance of the map (computed earlier and saved into the scaler object)
bow = scaler.transform(bow.reshape(-1, 1).transpose())
bow = bow.transpose().reshape(-1)

# Retrieve the indices of the top-10 similar images from the map
retrieved_images = retrieve_images(bow_map_images, bow)
print('Indices of similar images retrieved: ', retrieved_images[:10])
# Indices of the relevant map images for the query: we have the relevance judgements (Ground truth)
relevant_images = np.where(sim[query_idx, :] == 1)[0]
print('Indices of relevant images (given in the GT relevance judgements): ', relevant_images)

Indices of similar images retrieved:  [321 946  48 336  82  74 340 320  47 315]
Indices of relevant images (given in the GT relevance judgements):  [310 311 312 313 314 315 316 317 318 319 320 321 322 323 324]


In [20]:
## BEGIN ANSWER
def precision_at_k(relevant, retrieved, k):
    tp = np.sum(np.in1d(relevant, retrieved[:k]))
    fp = len(relevant) - tp
    return tp / (tp + fp)

In [21]:
## BEGIN ANSWER
## BEGIN ANSWER
def average_precision(relevant, retrieved):
    # BEGIN ANSWER
    precisions = []
    for k in range(len(retrieved)):
        prec_at_k = precision_at_k(relevant, retrieved, k)
        precisions.append(prec_at_k)
    return sum(precisions)/len(precisions)

def mean_average_precision(all_relevant, all_retrieved):
    # BEGIN ANSWER
    total = 0
    count = len(all_retrieved)
    for qid in range(len(all_retrieved)):
        avg_precision = average_precision(all_relevant[qid], all_retrieved[qid])
        total += avg_precision
    # END ANSWER
    return total / count

all_relevant_images = []
all_retrieved_images = []
for query_idx in tqdm(range(len(q_imgs))):
    img = plt.imread("data02/" + q_imgs[query_idx])
    img = rgb2gray(img)
    # compute bag of words
    sift.detect_and_extract(img)
    query_img_descriptors = sift.descriptors
    bow = bag_of_words(centroids, query_img_descriptors)

    # Normalize the query BoW vector using the mean and variance of the map (computed earlier and saved into the scaler object)
    bow = scaler.transform(bow.reshape(-1, 1).transpose())
    bow = bow.transpose().reshape(-1)

    # Retrieve the indices of the top-10 similar images from the map
    retrieved_images = retrieve_images(bow_map_images, bow)
#     print('Indices of similar images retrieved: ', retrieved_images[:10])
    all_retrieved_images.append(retrieved_images)
    # Indices of the relevant map images for the query: we have the relevance judgements (Ground truth)
    relevant_images = np.where(sim[query_idx, :] == 1)[0]
#     print('Indices of relevant images (given in the GT relevance judgements): ', relevant_images)
    all_relevant_images.append(relevant_images)

mapr = mean_average_precision(all_relevant_images, all_retrieved_images)
mapr
## END ANSWER

  0%|          | 0/500 [00:00<?, ?it/s]

0.6271227203620999