In [9]:
import json
import numpy as np
import h5py
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from torchvision import models
from torchvision import transforms
import torch
from torch.utils.data import Dataset, DataLoader
import os

In [10]:
with h5py.File("data02/london_lite_gt.h5","r") as f:
    fovs = f["fov"][:]
    sim = f["sim"][:].astype(np.uint8)

In [11]:
class CustomDataset(Dataset):
    def __init__(self, root_dir, json_path, transform=None, n=8):
        self.transform=transform
        self.n = n
        self.root_dir = root_dir
        with open(json_path,"r") as f:
            m_idx = json.load(f)
            self.m_imgs = np.array(m_idx["im_paths"])

    def __len__(self):
        return len(self.m_imgs)

    def __getitem__(self, idx):
        img = plt.imread(os.path.join(self.root_dir, self.m_imgs[idx]))

        if self.transform:
            img = self.transform(img)

        return img

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Loaded device: {device}")

Loaded device: cpu


In [13]:
model = models.densenet201(pretrained=True)
model = torch.nn.Sequential(*list(model.children())[:-1])
model = model.to(device)

Downloading: "https://download.pytorch.org/models/densenet201-c1103571.pth" to /Users/kjwdamme/.cache/torch/hub/checkpoints/densenet201-c1103571.pth

KeyboardInterrupt



In [None]:
# o = model(torch.zeros((16, 3, 224, 224)))
# # o = model(torch.zeros((64, 3, 512, 512)))
# o.shape

In [None]:
# model = models.resnet101(weights=models.ResNet101_Weights.DEFAULT)
# model = torch.nn.Sequential(*(list(model.children())[:-1]))
# model = model.to(device)

In [None]:
# model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
# model = torch.nn.Sequential(*(list(model.children())[:-1]))
# model = model.to(device)

In [None]:
# list(model.children())

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((512, 512), antialias=False)
    # transforms.Normalize(mean=0, std=1)
])

database = CustomDataset(root_dir="data02", json_path="data02/database/database_lite.json", transform=transform)
loader = DataLoader(database, batch_size=64)

In [None]:
# Finding global features
global_features = None
for img_batch in tqdm(loader):
    img_batch = img_batch.to(device)
    with torch.no_grad():
        output = model(img_batch)
        # Pooling
        output, _ = output.max(dim=2)  # Max pooling along the spatial dimensions (dim=2)
        output, _ = output.max(dim=2)

        if global_features is None:
            global_features = output.cpu().numpy().squeeze()
        else:
            global_features = np.vstack((global_features, output.cpu().numpy().squeeze()))

In [None]:
from sklearn import preprocessing

# Compute z-score statistics
scaler = preprocessing.StandardScaler().fit(global_features)
# Normalize the vectors of the map collection (0 mean and 1 std)
scaled_features = scaler.transform(global_features)

np.mean(scaled_features), np.std(scaled_features)

In [None]:
def cosine(map_bow_vectors, query_bow):
    cosine_sim = np.zeros(map_bow_vectors.shape[0])

    for i in range(map_bow_vectors.shape[0]):
        cosine_sim[i] = np.dot(map_bow_vectors[i], query_bow) / (np.linalg.norm(map_bow_vectors[i]) * np.linalg.norm(query_bow))

    return cosine_sim

In [None]:
def retrieve_images(map_bow_vectors, query_bow):
    return np.argsort(np.linalg.norm(map_bow_vectors - query_bow, axis=1))
    # return np.argsort(cosine(map_bow_vectors, query_bow))

In [None]:
## BEGIN ANSWER
def precision_at_k(relevant, retrieved, k):
    tp = np.sum(np.in1d(relevant, retrieved[:k]))
    fp = len(relevant) - tp
    return tp / k

In [None]:
## BEGIN ANSWER
## BEGIN ANSWER


def average_precision(relevant, retrieved):
    # BEGIN ANSWER
    precisions = []
    for k in range(1, len(retrieved)):
        prec_at_k = precision_at_k(relevant, retrieved, k)
        precisions.append(prec_at_k)
    return sum(precisions)/len(precisions)

def mean_average_precision(all_relevant, all_retrieved):
    # BEGIN ANSWER
    total = 0
    count = len(all_retrieved)
    for qid in range(len(all_retrieved)):
        avg_precision = average_precision(all_relevant[qid], all_retrieved[qid])
        total += avg_precision
    # END ANSWER
    return total / count

q_database = CustomDataset(root_dir="data02", json_path="data02/query/query_lite.json", transform=transform)
all_relevant_images = []
all_retrieved_images = []
for query_idx in tqdm(range(len(q_database))):
    img = q_database[query_idx]

    # compute bag of words
    with torch.no_grad():
        img = q_database[query_idx].to(device)
        o = model(img[None, :])
        # Pooling
        o, _ = o.max(dim=2)  # Max pooling along the spatial dimensions (dim=2)
        o, _ = o.max(dim=2)
        print(o.shape)

        repr = o.cpu().numpy().squeeze()


    new_repr = scaler.transform(repr.reshape(-1, 1).transpose())
    new_repr = new_repr.transpose().reshape(-1)

    # Retrieve the indices of the top-10 similar images from the map
    retrieved_images = retrieve_images(scaled_features, new_repr)
    # retrieved_images = retrieve_images(global_features, repr)
#     print('Indices of similar images retrieved: ', retrieved_images[:10])
    all_retrieved_images.append(retrieved_images)
    # Indices of the relevant map images for the query: we have the relevance judgements (Ground truth)
    relevant_images = np.where(sim[query_idx, :] == 1)[0]
#     print('Indices of relevant images (given in the GT relevance judgements): ', relevant_images)
    all_relevant_images.append(relevant_images)

mapr = mean_average_precision(all_relevant_images, all_retrieved_images)

In [None]:
mapr