In [1]:
import numpy as np
import cv2 as cv
import os
import math
import faiss

In [2]:
def get_sift_keypoints(img):
    gray= cv.cvtColor(img,cv.COLOR_BGR2GRAY)
    sift = cv.SIFT_create(nfeatures=5000)
    kp, des = sift.detectAndCompute(gray, None)
    return kp, des

In [3]:
def create_sift_database():
    labels = []
    features = []
    for image_name in os.listdir('images'):
        if '.jpeg' not in image_name and '.jpg' not in image_name:
            continue
        index_of_dot = image_name.find('.')
        building_name = image_name[0:(index_of_dot - 2)]

        image_path = 'images/' + image_name
        img = cv.imread(image_path)

        kp, des = get_sift_keypoints(img)
        labels += [building_name for i in range(len(kp))]

        features.append(np.vstack(des))
    return labels, features
    
        # img=cv.drawKeypoints(gray,kp,img)
        # cv.imshow('image',img)
        # cv.waitKey(0)

In [4]:
labels, features = create_sift_database()

In [5]:
len(labels)

98399

In [6]:
features_ar = np.vstack(features)
features_ar.shape

(98399, 128)

In [7]:
index = faiss.IndexFlatL2(128)
index.add(features_ar)
print(index.ntotal)

98399


In [8]:
def get_percentage_scores(top_tuple_list, softmax_temp=3):
    # Softmax calculation
    top_list = []
    sum_of_scores = 0
    for score, name in top_tuple_list:
        top_list.append(name)
        sum_of_scores += math.exp(score / softmax_temp)
    percentage_scores = [math.exp(top_tuple_list[i][0] / softmax_temp) * 100 / sum_of_scores for i in range(len(top_tuple_list))]
    return top_list, percentage_scores

In [9]:
def find_closest_image_match(img, k=5, method='sift'):
    if method == 'sift':
        kp, des = get_sift_keypoints(img)
        preds = {}
        for d in des:
            D, I = index.search(d.reshape((1, 128)), k)
            for i, idx in enumerate(I[0]):
                pred = labels[idx]
                if pred not in preds:
                    preds[pred] = 0
                preds[pred] += 1 / (i + 1)
        top_tuple_list = sorted([(v, k) for k, v in preds.items()], reverse=True)
        top_list, percentage_scores = get_percentage_scores(top_tuple_list)
        return top_list, percentage_scores
    elif method == 'surf':
        return None
    else:
        return None

In [10]:
search_image_path = 'search.jpg'
img = cv.imread(search_image_path)

In [11]:
top_list, percentage_scores = find_closest_image_match(img)
for i in range(len(top_list)):
    name = top_list[i]
    percentage_score = percentage_scores[i]
    print(f'{name} with {percentage_score:.2f}% confidence')

Bienen_School_of_Music with 100.00% confidence
The_Virginia_Wadsworth_Wirtz_Center with 0.00% confidence
Pick_Staiger_Concert_Hall with 0.00% confidence
Norris_University_Center with 0.00% confidence
Swift_Hall with 0.00% confidence
The_Rock with 0.00% confidence
Fisk_Hall with 0.00% confidence
Harris_Hall with 0.00% confidence
Cook_Hall with 0.00% confidence
Segal_Visitors_Center with 0.00% confidence
Kellogg_Global_Hub with 0.00% confidence
Block_Museum_of_Art with 0.00% confidence
Garrett-Evangelical_Theological_Seminary with 0.00% confidence
Mudd_Library with 0.00% confidence
Kresge_Centennial_Hall with 0.00% confidence
James_L_Allen_Center with 0.00% confidence
Department_of_Mathematics with 0.00% confidence
Crowe_Hall with 0.00% confidence
Dearborn_Observatory with 0.00% confidence
Henry_Crown_Sports_Pavilion with 0.00% confidence
Deering_Library with 0.00% confidence
Sargent_Hall with 0.00% confidence
Donald_P_Jacobs_Center with 0.00% confidence
Walter_Annenberg_Hall with 0.00% 