In [None]:
%pip install sklearn scikit-image h5py

In [48]:
# load data
import json
import numpy as np
import h5py
import matplotlib.pyplot as plt
import tqdm

# map
with open("data02/database/database_lite.json","r") as f:
    m_idx = json.load(f)
    m_imgs = np.array(m_idx["im_paths"])
    m_loc=np.array(m_idx["loc"])

# query
with open("data02/query/query_lite.json","r") as f:
    q_idx=json.load(f)
    q_imgs=np.array(q_idx["im_paths"])
    q_loc=np.array(q_idx["loc"])


In [51]:
# Create descriptors
import os
import skimage
from skimage.feature import ORB
from skimage.color import rgb2gray

# Initialize the ORB descriptor
descriptor_extractor = ORB(n_keypoints=10000)
# Initialize the data structure that will contain all the descriptors
descriptors = []
responses = []

# Loop over map images
for img_name in m_imgs:
    img = plt.imread(os.path.join('data02/', img_name))
    img = rgb2gray(img)
    
    # Extract ORB descriptors
    descriptor_extractor.detect_and_extract(img)  
    # keypoints1 = descriptor_extractor.keypoints  # position of the points (not interesting for us)
    descriptors_img = descriptor_extractor.descriptors  # descriptors (the feature vectors)
    responses_img = descriptor_extractor.responses
    # Accumulate the computed descriptors
    descriptors.append(descriptors_img)
    responses.append(responses_img)

In [1]:
import pickle
# save descriptors (uncomment if you want to save the computed descriptors)
#f = open('./ORB-descriptors-original.bin', 'wb')
#data = pickle.dump(descriptors, f)
#f.close()

# load pre-computed descriptors
f = open('./ORB-descriptors-original.bin', 'rb')
descriptors = pickle.load(f)
f.close()

# Ns for which a subset is created
ns = [50, 100, 250, 500, 750, 1000, 1250, 1500, 1750, 2000]
descriptors_ns = [] # will contain subsets of the descriptors for each N in Ns
for n in ns:
    descriptors_n = []
    for index in range(len(descriptors)):
        best_indices = responses[index].argsort()[::-1][:n]
        descriptors_n.extend(descriptors[index][best_indices])
    descriptors_ns.append(descriptors_n)


In [35]:
import sklearn
from sklearn.cluster import KMeans

# clustering
K = 32  # number of clusters (equivalent to the number of words) we want to estimate
num_initialization = 5 # Number of time the k-means algorithm will be run with different centroid seeds.

# Run the k-means clustering
kmeans = KMeans(n_clusters=K, random_state=0, n_init=num_initialization, verbose=1)
clusters = kmeans.fit(descriptors_ns[8])  # we use the descriptors extracted from the map (training) images before
centroids = clusters.cluster_centers_

print("Shape of the centroids matrix: ", centroids.shape)
print("We computed ", centroids.shape[0], "centroids of lengh ", centroids.shape[1], " (the same of the descriptor)")
# Rememeber: the centroids can be considered as the words that compose our documents 
# -> in this case the basic components of the images

Initialization complete
Iteration 0, inertia 149062686.0.
Iteration 1, inertia 88076495.58068186.
Iteration 2, inertia 87372735.89294986.
Iteration 3, inertia 87147671.92668228.
Iteration 4, inertia 87031906.93448861.
Iteration 5, inertia 86963244.80328077.
Iteration 6, inertia 86919177.95827934.
Iteration 7, inertia 86888134.20810387.
Iteration 8, inertia 86863958.19671434.
Iteration 9, inertia 86843316.40239117.
Iteration 10, inertia 86824924.80558933.
Iteration 11, inertia 86808103.19610372.
Iteration 12, inertia 86792437.63879672.
Iteration 13, inertia 86777551.02332123.
Iteration 14, inertia 86763224.77799484.
Iteration 15, inertia 86749461.93085279.
Iteration 16, inertia 86736288.65100043.
Iteration 17, inertia 86723836.44838306.
Iteration 18, inertia 86712270.80122522.
Iteration 19, inertia 86701721.20736068.
Iteration 20, inertia 86692178.34561649.
Iteration 21, inertia 86683605.97125459.
Iteration 22, inertia 86675766.35670102.
Iteration 23, inertia 86668549.10512197.
Iteratio

In [76]:
#f = open('./knn_centroids_8.bin', 'wb')
#data = pickle.dump(centroids, f)
#f.close()

# load pre-computed descriptors
f = open('./knn_32_centroids_9.bin', 'rb')
centroids = pickle.load(f)
f.close()

In [77]:
from sklearn.metrics import pairwise_distances_argmin_min
# compute the bag of word vector for an image
def bag_of_words(centroids, img_descriptors):
    n_centroids = centroids.shape[0]  # number of centroids found with the KMeans clustering
    n_descriptors = img_descriptors.shape[0]  # number of descriptors extracted from the image
    
    # initialization of the bag of words (BoW) vector
    # Note that the BoW vector has length equal to the number of cluster centroids
    # The cluster centroids are indeed our visual words, and the BoW will be the histogram of these words found in the given image
    bow_vector = np.zeros(n_centroids)  
    for i in range(n_descriptors):
        ## BEGIN ANSWER
        descriptor = img_descriptors[i]
        
        centroid_index, _ = pairwise_distances_argmin_min(descriptor.reshape(1, -1), centroids)
        bow_vector[centroid_index[0]] += 1
            
        ## END ANSWER
    return bow_vector

In [84]:
from skimage.feature import ORB
from tqdm import tqdm
from skimage.color import rgb2gray
descriptor_extractor = ORB(n_keypoints=10000)
ns = [50, 100, 500, 1000, 1500, 2000]
bow_map_images = None
bow_map_images_ns = {}
# loop over the images in the map set
for img_name in tqdm(m_imgs):
    # load image
    img = plt.imread(os.path.join('data02/', img_name))
    img = rgb2gray(img)
    
    # Extract ORB descriptors
    descriptor_extractor.detect_and_extract(img)  
    # keypoints1 = descriptor_extractor.keypoints  # position of the points (not interesting for us)
    descriptors_img = descriptor_extractor.descriptors  # descriptors (the feature vectors)
    responses_img = descriptor_extractor.responses
    
    # Accumulate the computed descriptors
    for n in ns:
        best_indices = responses_img.argsort()[::-1][:n]
        img_descriptors = descriptors_img[best_indices]
        # compute BoW representation of the image (using the basic 'words', i.e. centroids, computed earlier)
        bow = bag_of_words(centroids, img_descriptors)
        # add the computed BoW vector to the set of map representations
        if n in bow_map_images_ns:
            arr = bow_map_images_ns[n]
            arr = np.vstack((arr, bow))
            bow_map_images_ns[n] = arr
        else:
            bow_map_images_ns[n] = bow

100%|██████████| 1000/1000 [5:35:27<00:00, 20.13s/it] 


In [87]:
#f = open('./bow_map_images_ns_k32.bin', 'wb')
#data = pickle.dump(bow_map_images_ns, f)
#f.close()

# load pre-computed descriptors
f = open('./bow_map_images_ns_k32.bin', 'rb')
bow_map_images_ns = pickle.load(f)
f.close()

In [89]:
from sklearn import preprocessing

for key, value in bow_map_images_ns.items():
    # Compute z-score statistics
    scaler = preprocessing.StandardScaler().fit(value)
    # Normalize the vectors of the map collection (0 mean and 1 std)
    bow_map_images = scaler.transform(value)
    bow_map_images_ns[key] = bow_map_images

In [90]:
import h5py
# loading the relevance judgements
with h5py.File("data02/london_lite_gt.h5","r") as f:
    fovs = f["fov"][:]
    sim = f["sim"][:].astype(np.uint8)

In [93]:
# receives as input the:
#   - bag of words vectors of the map images
#   - the bag of work vector of the query image
from sklearn.metrics import pairwise_distances

def retrieve_images(map_bow_vectors, query_bow):
    n_map_bow_vectors = map_bow_vectors.shape[0]
    bow_distances = np.zeros(n_map_bow_vectors)
    most_similar = None  # use this to 
    
    ## BEGIN ANSWER
    bow_distances = pairwise_distances(query_bow.reshape(1, -1), map_bow_vectors)[0]
    most_similar = bow_distances.argsort()
    ## END ANSWER
    
    return most_similar



# Retrieve the most similar images to query image 221 (index 221-1=220)
query_idx = 220
img = plt.imread("data02/" + q_imgs[query_idx])
img = rgb2gray(img)
# compute bag of words
descriptor_extractor.detect_and_extract(img)  
query_img_descriptors = descriptor_extractor.descriptors 
bow = bag_of_words(centroids, query_img_descriptors)

# Normalize the query BoW vector using the mean and variance of the map (computed earlier and saved into the scaler object)
bow = scaler.transform(bow.reshape(-1, 1).transpose())
bow = bow.transpose().reshape(-1)

# Retrieve the indices of the top-10 similar images from the map
for n in ns:
    retrieved_images = retrieve_images(bow_map_images_ns[n], bow)
    print('n = ', n, ': Indices of similar images retrieved: ', retrieved_images[:10])
    # Indices of the relevant map images for the query: we have the relevance judgements (Ground truth)
relevant_images = np.where(sim[query_idx, :] == 1)[0]
print('Indices of relevant images (given in the GT relevance judgements): ', relevant_images)

n =  50 : Indices of similar images retrieved:  [663 897 484 937 380 916 263   5 970 673]
n =  100 : Indices of similar images retrieved:  [897 653 523 866 514 916 977 528 976 961]
n =  500 : Indices of similar images retrieved:  [904 928 918 810   5 827 399 960 916 442]
n =  1000 : Indices of similar images retrieved:  [791 796 213 904 929 523 596 637 895  30]
n =  1500 : Indices of similar images retrieved:  [791 895 794 810 929 524 213 523 928 912]
n =  2000 : Indices of similar images retrieved:  [929 810   5 738 928 524 779 722 523   4]
Indices of relevant images (given in the GT relevance judgements):  [310 311 312 313 314 315 316 317 318 319 320 321 322 323 324]
