In [44]:
# SIFT Features, ORB Features, Deep
# Cosine, euclidean, etc
# BoW, VLAD
# Differnt k (# of clusters)

In [45]:
import json
import numpy as np
import h5py
import matplotlib.pyplot as plt
import os
from tqdm.notebook import tqdm
from skimage.color import rgb2gray
from skimage.feature import SIFT, ORB
from sklearn.cluster import KMeans
from sklearn import preprocessing
import pickle
import math

In [46]:
# map
with open("data02/database/database_lite.json", "r") as f:
    m_idx = json.load(f)
    m_imgs = np.array(m_idx["im_paths"])
    m_loc = np.array(m_idx["loc"])

# query
with open("data02/query/query_lite.json", "r") as f:
    q_idx = json.load(f)
    q_imgs = np.array(q_idx["im_paths"])
    q_loc = np.array(q_idx["loc"])

with h5py.File("data02/london_lite_gt.h5", "r") as f:
    fovs = f["fov"][:]
    sim = f["sim"][:].astype(np.uint8)

In [47]:
def extract_all_descriptors(extractor):
    descriptors = None

    for img_name in tqdm(m_imgs):
        img = plt.imread(os.path.join('data02', img_name))
        img = rgb2gray(img)

        d = extractor(img)

        if descriptors is None:
            descriptors = d
        else:
            descriptors = np.vstack((descriptors, d))

    return descriptors

In [48]:
def find_centroids(descriptors, k):
    kmeans = KMeans(n_clusters=k, random_state=0, n_init=5, verbose=0)
    clusters = kmeans.fit(descriptors)
    return clusters.cluster_centers_


In [49]:
def manhatten(matrix, vector):
    return np.linalg.norm(matrix - vector, axis=1, ord=1)

def infinity(matrix, vector):
    return np.linalg.norm(matrix - vector, axis=1, ord=math.inf)

def eucledian(matrix, vector):
    return np.linalg.norm(matrix - vector, axis=1)

def cosine(matrix, vector):
    dists = np.zeros(len(vector))
    for i in range(matrix.shape[0]):
        dists[i] = np.dot(matrix[i], vector) / (np.linalg.norm(matrix[i]) * np.linalg.norm(vector))

    return -dists

In [50]:
def extract_with_sift(image):
    sift = SIFT()
    sift.detect_and_extract(image)
    return sift.descriptors

def extract_with_orb(image):
    orb = ORB()
    orb.detect_and_extract(image)
    return orb.descriptors

In [51]:
def bag_of_words(centroids, img_descriptors, distance_metric):
    n_centroids = centroids.shape[0]  # number of centroids found with the KMeans clustering
    n_descriptors = img_descriptors.shape[0]  # number of descriptors extracted from the image

    bow_vector = np.zeros(n_centroids)

    for i in range(n_descriptors):
        dists = distance_metric(centroids, img_descriptors[i])
        idx = np.argmin(dists)
        bow_vector[idx] += 1
    return bow_vector

In [52]:
def create_all_bow_vectors(extractor, centroids, distance_metric):
    bow_map_images = None
    for img_name in tqdm(m_imgs):
        img = plt.imread(os.path.join('data02', img_name))
        img = rgb2gray(img)

        img_descriptors = extractor(img)  # descriptors (the feature vectors)

        # compute BoW representation of the image (using the basic 'words', i.e. centroids, computed earlier)
        bow = bag_of_words(centroids, img_descriptors, distance_metric)
        # add the computed BoW vector to the set of map representations
        if bow_map_images is None:
            bow_map_images = bow
        else:
            bow_map_images = np.vstack( (bow_map_images, bow))

    return bow_map_images

In [53]:
def recall_at_k(relevant, retrieved, k):
    return np.sum(np.in1d(relevant, retrieved[:k])) / len(relevant)

In [54]:
def precision_at_k(relevant, retrieved, k):
    tp = np.sum(np.in1d(relevant, retrieved[:k]))
    return tp / k

def average_precision(relevant, retrieved):
    # BEGIN ANSWER
    avg_prec = 0
    for doc in relevant:
        k = np.where(retrieved == doc)[0][0] + 1
        avg_prec += precision_at_k(relevant, retrieved[:k], k)
    return avg_prec/len(relevant)

def mean_average_precision(all_relevant, all_retrieved):
    # BEGIN ANSWER
    total = 0
    count = len(all_retrieved)

    for qid in range(len(all_retrieved)):
        avg_precision = average_precision(all_relevant[qid], all_retrieved[qid])
        total += avg_precision
    # END ANSWER
    return total / count

def average_recall_at_k(all_relevant, all_retrieved, k):
    running_recall = 0
    for relevant, retrieved in zip(all_relevant, all_retrieved):
        r_k = recall_at_k(relevant, retrieved, k)
        running_recall += r_k

    return running_recall / len(all_relevant)

In [55]:
def run_all_queries(feature_extractor, centroids, distance_metric, scaler):
    all_relevant_images = []
    all_retrieved_images = []
    for query_idx in tqdm(range(len(q_imgs))):
        img = plt.imread(os.path.join("data02", q_imgs[query_idx]))
        img = rgb2gray(img)
        # compute bag of words
        query_img_descriptors = feature_extractor(img)
        bow = bag_of_words(centroids, query_img_descriptors, distance_metric)

        bow = scaler.transform(bow.reshape(-1, 1).transpose())
        bow = bow.transpose().reshape(-1)

        # Retrieve the indices of the top-10 similar images from the map
        # retrieved_images = np.argsort(np.linalg.norm(bow_map_images - bow, axis=1))
        dists = distance_metric(bow_map_images, bow)

        retrieved_images = np.argsort(np.array(dists))
        # retrieved_images = np.argsort(eucledian(bow_map_images, bow))
    #     print('Indices of similar images retrieved: ', retrieved_images[:10])
        all_retrieved_images.append(retrieved_images)
        # Indices of the relevant map images for the query: we have the relevance judgements (Ground truth)
        relevant_images = np.where(sim[query_idx, :] == 1)[0]
    #     print('Indices of relevant images (given in the GT relevance judgements): ', relevant_images)
        all_relevant_images.append(relevant_images)

    return all_retrieved_images, all_relevant_images

In [56]:
for feature_extractor in [extract_with_orb, extract_with_sift]:
    descriptors = extract_all_descriptors(feature_extractor)
    for n_centroids in [10, 30, 50]:
        centroids = find_centroids(descriptors, n_centroids)

        for distance_metric in [manhatten, infinity, cosine, eucledian]:
            bow_map_images = create_all_bow_vectors(feature_extractor, centroids, distance_metric)

            scaler = preprocessing.StandardScaler()
            bow_map_images = scaler.fit_transform(bow_map_images)

            all_retrieved_images, all_relevant_images = run_all_queries(feature_extractor, centroids, distance_metric, scaler)

            print("-------------")
            print(f"Feature type: {feature_extractor.__name__}")
            print(f"# of centroids: {n_centroids}")
            print(f"Distance metric: {distance_metric.__name__}")
            print("")
            mAP = mean_average_precision(all_relevant_images, all_retrieved_images)
            print(f"mAP: {mAP}")
            for k in [1, 5, 10]:
                r_k = average_recall_at_k(all_relevant_images, all_retrieved_images, k)
                print(f"Recall@{k}: {r_k}")

            print("-------------")

  0%|          | 0/1000 [00:00<?, ?it/s]

KeyboardInterrupt: 