In [4]:
import torch
import numpy as np
import glob
import os
from torch_snippets import *
import glob
from IPython.display import clear_output
import pandas as pd
from sklearn.cluster import MiniBatchKMeans
from PIL import Image
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import cv2 as cv
import tqdm
from tqdm import tqdm
import time
import scipy
import timeit

N = 20

In [5]:
class DataLoader:
    def __init__(self, image_dir="\\data\\oxford_buildings\\oxbuild_images", gt_dir="\\data\\oxford_buildings\\gt_files"):
        self.root = os.getcwd()
        self.image_dir = image_dir
        self.gt_dir = gt_dir
        self.images = glob.glob(self.root + image_dir + "\\*.jpg")
        self.ground_truths = glob.glob(self.root + gt_dir + '\\*.txt')
        self.label_types =['_good.txt', '_junk.txt', '_ok.txt', '_query.txt']
        self.places = self.get_place_queries(self.ground_truths)
        self.places_dict = self.get_place_query_results(self.places)
        self.query_images = self.get_query_images()
        
    

    def get_place_queries(self, gt_labels):
        places = []
        for gt_label in gt_labels:
            for label in self.label_types:
                if gt_label.endswith(label):
                    x = gt_label.replace(label, " ")
                    x = x.replace(self.root + self.gt_dir, ' ').replace('\\', ' ')
                    places.append(x.strip())
                    break
        return places
    
    

    def get_place_query_results(self, places):
        all_places = {}
        for place in places:
            place_dict = {}
            fpath = self.root + '/data/oxford_buildings/gt_files\\' +  place 
            for label_type in self.label_types:
                query_path = fpath + label_type
                query_images =pd.read_csv(query_path, header=None, delimiter=' ').to_numpy().squeeze()
                x = label_type.replace('_', ' ').replace('.txt', ' ').strip()
                place_dict[x] = query_images
            all_places[place] = place_dict
        return all_places
    
    def get_query_images(self):
        query_images = []
        for k, v in self.places_dict.items():
            query_im = v["query"][0].replace('oxc1_', '')
            for img in self.images:
                if query_im in img:
                    query_images.append(img)
        return query_images
        
        


    

In [6]:
dataset = DataLoader()

In [7]:
class SIFT_VPR:
    def __init__(self, dataset, max_features_per_image=1000, n_visual_words=1000):
        
        self.ds = dataset
        self.sift_detector = cv.SIFT_create(nfeatures=max_features_per_image)
        self.neighbors = NearestNeighbors(n_neighbors=10)
        self.cluster = MiniBatchKMeans(n_clusters=n_visual_words)
        self.n_visual_words = n_visual_words
        
    def compute_query_desc(self, img_path):
        img = cv.imread(img_path)
        gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
        _, desc = self.sift_detector.detectAndCompute(gray, None)
        words = self.cluster.predict(desc)
        Fq = np.bincount(words, minlength=self.n_visual_words)
        return Fq
    
    def compute_map_features(self):
        print(" ============= Computing Local Descriptors ================ ")
        for img_path in tqdm(self.ds.images[:N]):
            if img_path in self.ds.query_images:
                continue
            img = cv.imread(img_path)
            gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
            _, desc = self.sift_detector.detectAndCompute(gray, None)
            try:
                all_desc = np.concatenate((all_desc, desc))
            except: 
                all_desc = desc
        
        print(" ============= Computing Visual Words ===================== ")
        delta = self.n_visual_words * 2
        i = 0
        n_batches = int(all_desc.shape[0]/delta)
        batch_n = 0
        pbar = tqdm(total=n_batches)
        while(i < all_desc.shape[0]):
            clear_output(wait=True)
            batch = all_desc[i:i + delta, :]
            self.cluster.partial_fit(batch)
            i = i+delta
            batch_n += 1
            pbar.update(1)
        pbar.close()
        self.visual_words = self.cluster.cluster_centers_
    
        print(" ============= Computing Visual Word Representations ================ ")
        for img_path in tqdm(self.ds.images[:N]):
            img = cv.imread(img_path)
            gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
            _, desc = self.sift_detector.detectAndCompute(gray, None)
            try:
                nearest_words = self.cluster.predict(desc)
            except:
                continue
                
            F = np.bincount(nearest_words, minlength=self.n_visual_words)
            F = np.concatenate((np.array([img_path]), np.array(F)))
            try:
                Fm = np.vstack((Fm, F))
            except:
                Fm = F
        self.Fm = Fm
        return Fm
                
        
    def perform_VPR(self, imQ_path, Fm, top_n=None):
        Fq = self.compute_query_desc(imQ_path)
        reps = Fm[:, 1:]
        paths = Fm[:, 0]
        try:
            norms = np.array([np.linalg.norm(Fq.astype(int) - word.astype(int)) for word in reps])
        except:
            print("you are testing an image already in the dataset")
        
        sort_args = np.argsort(-norms)
        scores = (np.exp(norms)/np.exp(norms).sum())
        paths = paths[sort_args]
        scores = scores[sort_args]
        if top_n:
            if top_n > 0:
                P = paths[:top_n]
                S = scores
                return P, S
            else:
                P = paths
                S = scores
                return P, S
        P = paths[0]
        S = scores[0] 
        return P, S

In [8]:
vpr = SIFT_VPR(dataset)

In [9]:
Fm = vpr.compute_map_features()

10it [00:00, 10.42it/s]                                                                                                


100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:02<00:00,  9.56it/s]


In [10]:
query_im = dataset.query_images[1]
query_im

'C:\\Users\\olive\\OneDrive - University of Southampton\\Documents\\GitHub\\Visual-Place-Recognition\\data\\oxford_buildings\\oxbuild_images\\all_souls_000026.jpg'

In [11]:
hr = vpr.compute_query_desc(query_im)

In [12]:
P, S = vpr.perform_VPR(query_im, Fm, top_n=-1)


In [19]:
class Evaluate:
    def __init__(self, dataset, vpr_module):
        self.ds = dataset
        self.vpr = vpr
        try:
            self.Fm = self.vpr.Fm
        except: 
            self.Fm = self.vpr.compute_map_features()
            
        
        self.place_matches = {}
        for k, v in self.ds.places_dict.items():
                self.place_matches[k] = v["good"]
    
    def precision(self, threshold=0.8):
        Tp = 0
        Fp = 0
        for place, matches in tqdm(self.place_matches.items()):
            place = self.ds.places_dict[place]["query"][0]
            place = place.replace('oxc1_', ' ').strip()
            place = self.ds.root + self.ds.image_dir + "\\" + place + ".jpg"
            P, S = self.vpr.perform_VPR(place, self.Fm, top_n=-1)
            preds = [self.prediction(prob, threshold) for prob in S]
            for i, pred in enumerate(preds):
                if pred:
                    match_flag = False
                    for match in matches:
                        if str(match) in str(P[i]):
                            match_flag = True
                            break
                    if match_flag == True:
                        Tp += 1
                    else: 
                        Fp += 1 
        print("Tp; ", Tp, " Fp; ", Fp)
        return (Tp/(Tp + Fp))
        
    def recall(self, threshold=0.8):
        Tp = 0
        Fn = 0
        for place, matches in tqdm(self.place_matches.items()):
            place = self.ds.places_dict[place]["query"][0]
            place = place.replace('oxc1_', ' ').strip()
            place = self.ds.root + self.ds.image_dir + "\\" + place + ".jpg"
            P, S = self.vpr.perform_VPR(place, self.Fm, top_n=-1)
            preds = [self.prediction(prob, threshold) for prob in S]
            for i, pred in enumerate(preds):
                if pred:
                    for match in matches:
                        if str(match) in str(P[i]):
                            Tp += 1
                            break
                else:
                    for match in matches:
                        if str(match) in str(P[i]):
                            Fn += 1
                            break 
        return (Tp/(Tp + Fn))
        

    def recall_N(self):
        return 0
        
    def precision_recall(self, delta=0.1):
        precisions = []
        recalls = []
        for threshold in np.arange(0, 1, delta):
            precisions.append(self.precision(threshold=threshold))
            recalls.append(self.recall(threshold=threshold))
        return precisions, recalls
        
    def auc_precision_recall(self, delta=0.1):
        area = 0
        precision, recall = self.precision_recall(delta=delta)
        for i in range(len(precision)):
            if i == 0:
                area += precision[i] * recall[i]
            else: 
                area += precision[i] * abs(recall[i-1] - recall[i])
        return area
        
    def ImageRetrievalTime(self):
        times = []
        for im in self.ds.query_images:
            start = time.time()
            P, S = self.vpr.perform_VPR(im, self.Fm)
            end = time.time()
            times.append(end - start)
        return np.array(times).mean(), np.array(times).var()
    
    def prediction(self, prob, threshold):
        if prob > threshold:
            return True
        else: 
            return False
                               

In [20]:
evaluate = Evaluate(dataset, vpr)

In [None]:
precision = evaluate.precision(threshold=0.5)
recall = evaluate.recall(threshold=0.5)

  0%|                                                                                           | 0/55 [00:00<?, ?it/s]

  2%|█▌                                                                                 | 1/55 [00:00<00:05,  9.67it/s]

  4%|███                                                                                | 2/55 [00:00<00:06,  8.83it/s]

  5%|████▌                                                                              | 3/55 [00:00<00:05,  9.03it/s]

  7%|██████                                                                             | 4/55 [00:00<00:05,  8.55it/s]

  9%|███████▌                                                                           | 5/55 [00:00<00:05,  8.91it/s]

 11%|█████████                                                                          | 6/55 [00:00<00:05,  8.71it/s]

 13%|██████████▌                                                                        | 7/55 [00:00<00:05,  8.23it/s]

 15%|████████████                                                                       | 8/55 [00:00<00:05,  8.48it/s]

 16%|█████████████▌                                                                     | 9/55 [00:01<00:05,  8.74it/s]

 18%|██████████████▉                                                                   | 10/55 [00:01<00:05,  8.33it/s]

 20%|████████████████▍                                                                 | 11/55 [00:01<00:05,  8.46it/s]

 24%|███████████████████▍                                                              | 13/55 [00:01<00:04,  8.74it/s]

 25%|████████████████████▊                                                             | 14/55 [00:01<00:04,  8.77it/s]

 27%|██████████████████████▎                                                           | 15/55 [00:01<00:04,  8.83it/s]

 29%|███████████████████████▊                                                          | 16/55 [00:01<00:04,  8.71it/s]

 31%|█████████████████████████▎                                                        | 17/55 [00:01<00:04,  8.54it/s]

 33%|██████████████████████████▊                                                       | 18/55 [00:02<00:04,  8.87it/s]

 35%|████████████████████████████▎                                                     | 19/55 [00:02<00:04,  7.99it/s]

 36%|█████████████████████████████▊                                                    | 20/55 [00:02<00:04,  8.12it/s]

 38%|███████████████████████████████▎                                                  | 21/55 [00:02<00:04,  7.77it/s]

 40%|████████████████████████████████▊                                                 | 22/55 [00:02<00:04,  7.85it/s]

 42%|██████████████████████████████████▎                                               | 23/55 [00:02<00:03,  8.37it/s]

 45%|█████████████████████████████████████▎                                            | 25/55 [00:02<00:03,  8.96it/s]

 47%|██████████████████████████████████████▊                                           | 26/55 [00:03<00:03,  8.79it/s]

 49%|████████████████████████████████████████▎                                         | 27/55 [00:03<00:03,  9.01it/s]

 51%|█████████████████████████████████████████▋                                        | 28/55 [00:03<00:03,  8.87it/s]

 53%|███████████████████████████████████████████▏                                      | 29/55 [00:03<00:03,  8.32it/s]

 55%|████████████████████████████████████████████▋                                     | 30/55 [00:03<00:03,  8.30it/s]

 56%|██████████████████████████████████████████████▏                                   | 31/55 [00:03<00:02,  8.06it/s]

 58%|███████████████████████████████████████████████▋                                  | 32/55 [00:03<00:02,  8.09it/s]

 60%|█████████████████████████████████████████████████▏                                | 33/55 [00:03<00:02,  8.20it/s]

 62%|██████████████████████████████████████████████████▋                               | 34/55 [00:04<00:02,  7.93it/s]

 64%|████████████████████████████████████████████████████▏                             | 35/55 [00:04<00:02,  8.15it/s]

 65%|█████████████████████████████████████████████████████▋                            | 36/55 [00:04<00:02,  8.19it/s]

 67%|███████████████████████████████████████████████████████▏                          | 37/55 [00:04<00:02,  8.21it/s]

 69%|████████████████████████████████████████████████████████▋                         | 38/55 [00:04<00:02,  8.14it/s]

 71%|██████████████████████████████████████████████████████████▏                       | 39/55 [00:04<00:01,  8.46it/s]

 73%|███████████████████████████████████████████████████████████▋                      | 40/55 [00:04<00:01,  8.53it/s]

 75%|█████████████████████████████████████████████████████████████▏                    | 41/55 [00:04<00:01,  8.84it/s]

 76%|██████████████████████████████████████████████████████████████▌                   | 42/55 [00:04<00:01,  8.84it/s]

 78%|████████████████████████████████████████████████████████████████                  | 43/55 [00:05<00:01,  8.81it/s]

 80%|█████████████████████████████████████████████████████████████████▌                | 44/55 [00:05<00:01,  8.76it/s]

 82%|███████████████████████████████████████████████████████████████████               | 45/55 [00:05<00:01,  9.05it/s]

 84%|████████████████████████████████████████████████████████████████████▌             | 46/55 [00:05<00:01,  8.26it/s]

 85%|██████████████████████████████████████████████████████████████████████            | 47/55 [00:05<00:00,  8.25it/s]

 87%|███████████████████████████████████████████████████████████████████████▌          | 48/55 [00:05<00:00,  8.25it/s]

 89%|█████████████████████████████████████████████████████████████████████████         | 49/55 [00:05<00:00,  8.52it/s]

 91%|██████████████████████████████████████████████████████████████████████████▌       | 50/55 [00:05<00:00,  8.60it/s]

 93%|████████████████████████████████████████████████████████████████████████████      | 51/55 [00:05<00:00,  8.94it/s]

 95%|█████████████████████████████████████████████████████████████████████████████▌    | 52/55 [00:06<00:00,  9.21it/s]

 96%|███████████████████████████████████████████████████████████████████████████████   | 53/55 [00:06<00:00,  9.11it/s]

 98%|████████████████████████████████████████████████████████████████████████████████▌ | 54/55 [00:06<00:00,  9.31it/s]

100%|██████████████████████████████████████████████████████████████████████████████████| 55/55 [00:06<00:00,  8.60it/s]
 27%|██████████████████████▎                                                           | 15/55 [00:01<00:04,  8.92it/s]

In [None]:
precision

In [18]:
recall

0.0

In [64]:
paths = np.array(["world", "hello"])
p = paths[[1,0]]

In [65]:
p

array(['hello', 'world'], dtype='<U5')