In [19]:
import pickle

from pathlib import Path
from tqdm.auto import notebook_tqdm

from pytorch_metric_learning import losses
from pytorch_metric_learning import distances
from pytorch_metric_learning import losses
from pytorch_metric_learning import samplers

import numpy as np

import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision import models
from torchvision.utils import make_grid

In [20]:
def create_headless_resnet18():
    model = models.resnet18(pretrained=True, progress=False)
    model = nn.Sequential(*list(model.children())[:-1])
    return model

In [21]:
model = create_headless_resnet18()

In [22]:
data_path = Path("/home/pau/Documents/datasets/MIT_split")
feature_path = Path("./results/retrieval")

In [23]:
transfs = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224, 224)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

catalogue = ImageFolder(str(data_path / "train"), transform=transfs)
queries = ImageFolder(str(data_path / "test"), transform=transfs)

In [24]:
catalogue_meta = [(x[0].split('/')[-1], x[1]) for x in catalogue.imgs]
query_meta = [(x[0].split('/')[-1], x[1]) for x in queries.imgs]

with (feature_path / "catalogue_meta.pkl").open('wb') as f_meta:
    pickle.dump(catalogue_meta, f_meta)

with (feature_path / "query_meta.pkl").open('wb') as f_meta:
    pickle.dump(query_meta, f_meta)

In [25]:
catalogue_data = np.empty((len(catalogue), 512))
with torch.no_grad():
    for ii, (img, _) in enumerate(catalogue):
        catalogue_data[ii, :] = model(img.unsqueeze(0)).squeeze().numpy()

with open(feature_path / "catalogue.npy", "wb") as f:
    np.save(f, catalogue_data)

In [26]:
query_data = np.empty((len(queries), 512))
with torch.no_grad():
    for ii, (img, _) in enumerate(queries):
        query_data[ii, :] = model(img.unsqueeze(0)).squeeze().numpy()

with open(feature_path / "queries.npy", "wb") as f:
    np.save(f, query_data)

In [3]:
import pickle
from pathlib import Path
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score, average_precision_score
from ml_metrics import mapk,apk

data_path = Path("/home/group01/mcv/datasets/MIT_split")
feature_path = Path("./results/retrieval")

In [4]:
with open(feature_path / "queries.npy", "rb") as f:
    query_data = np.load(f)
with open(feature_path / "catalogue.npy", "rb") as f:
    catalogue_data = np.load(f)

with (feature_path / "catalogue_meta.pkl").open('rb') as fc, \
        (feature_path / "query_meta.pkl").open('rb') as fq:
    catalogue_meta = pickle.load(fc)
    query_meta = pickle.load(fq)

In [5]:
from sklearn.neighbors import KNeighborsClassifier

catalogue_labels = np.asarray([x[1] for x in catalogue_meta])
query_labels = np.asarray([x[1] for x in query_meta])

knn = KNeighborsClassifier(n_neighbors=5)
knn = knn.fit(catalogue_data, catalogue_labels)
predictions = knn.predict(query_data)
pr_prob = knn.predict_proba(query_data)

In [6]:
from sklearn.metrics import f1_score, average_precision_score

one_hot = np.zeros((predictions.shape[0], max(predictions) + 1), dtype=int)
one_hot[predictions] = 1

f1 = f1_score(query_labels, predictions, average="macro")
ap = average_precision_score(one_hot, pr_prob)

In [9]:
pr_prob

array([[0. , 0. , 0.8, ..., 0. , 0. , 0. ],
       [0.8, 0.2, 0. , ..., 0. , 0. , 0. ],
       [0.2, 0. , 0.6, ..., 0.2, 0. , 0. ],
       ...,
       [0. , 0. , 0. , ..., 0. , 0. , 1. ],
       [0. , 0. , 0. , ..., 0. , 0.2, 0.6],
       [0. , 0. , 0.2, ..., 0. , 0. , 0.6]])

In [23]:
query_meta

[('cdmc109.jpg', 0),
 ('cdmc518.jpg', 0),
 ('cdmc753.jpg', 0),
 ('cdmc795.jpg', 0),
 ('cdmc935.jpg', 0),
 ('fie10.jpg', 0),
 ('fie12.jpg', 0),
 ('fie14.jpg', 0),
 ('fie15.jpg', 0),
 ('fie21.jpg', 0),
 ('fie24.jpg', 0),
 ('fie25.jpg', 0),
 ('fie30.jpg', 0),
 ('fie36.jpg', 0),
 ('fie46.jpg', 0),
 ('fie6.jpg', 0),
 ('fie8.jpg', 0),
 ('for68.jpg', 0),
 ('land147.jpg', 0),
 ('land228.jpg', 0),
 ('land233.jpg', 0),
 ('land271.jpg', 0),
 ('land276.jpg', 0),
 ('land295.jpg', 0),
 ('land297.jpg', 0),
 ('land358.jpg', 0),
 ('land359.jpg', 0),
 ('land412.jpg', 0),
 ('land514.jpg', 0),
 ('land534.jpg', 0),
 ('land556.jpg', 0),
 ('land564.jpg', 0),
 ('land572.jpg', 0),
 ('land588.jpg', 0),
 ('land605.jpg', 0),
 ('land617.jpg', 0),
 ('land631.jpg', 0),
 ('land638.jpg', 0),
 ('land643.jpg', 0),
 ('land653.jpg', 0),
 ('land655.jpg', 0),
 ('land657.jpg', 0),
 ('land659.jpg', 0),
 ('land660.jpg', 0),
 ('land669.jpg', 0),
 ('land68.jpg', 0),
 ('land687.jpg', 0),
 ('land688.jpg', 0),
 ('land701.jpg', 0),


In [10]:
one_hot

array([[1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [44]:
print(ap, f1)

0.014844389654713325 0.3793890191797535


In [24]:
from ml_metrics import mapk

def evaluation_mapk(actual,predicted):
    k_1=mapk(actual, predicted, k=1)
    k_5=mapk(actual, predicted, k=5)
    return (k_1,k_5)

In [34]:
with open(feature_path / "top10_results.pkl", "rb") as f:
        top10 = pickle.load(f)

In [37]:
results = []
for i in range(len(top10)):
    results_class = [catalogue_meta[j][1] for j in top10[i]]
    results.append(results_class)
query_labels = [[x[1]] for x in query_meta]

In [32]:
# Compute the depth of a list (of lists (of lists ...) ...)
# Empty list not allowed!!
#https://stackoverflow.com/questions/6039103/counting-depth-or-the-deepest-level-a-nested-list-goes-to
list_depth = lambda L: isinstance(L, list) and max(map(list_depth, L))+1

def add_list_level(input_list):
    out = []
    for ll in input_list:
        tmp = []
        for q in ll:
            tmp.append([q])
        out.append(tmp)
    return (out)

def compute_mapk(gt,hypo,k_val):

    hypo = list(hypo)
    if list_depth(hypo) == 2:
        hypo = add_list_level(hypo.copy())

    apk_list = []
    for ii,query in enumerate(gt):
        for jj,sq in enumerate(query):
            apk_val = 0.0
            if len(hypo[ii]) > jj:
                apk_val = apk([sq],hypo[ii][jj], k_val)
            apk_list.append(apk_val)
            
    return np.mean(apk_list)

In [43]:
k_1 = compute_mapk(query_labels, results, 1)
k_5 = compute_mapk(query_labels, results, 5)
print('MAP@1=',k_1)
print('MAP@5=',k_5)

MAP@1= 0.34076827757125155
MAP@5= 0.34076827757125155


In [39]:
k_1, k_5 = evaluation_mapk(list(query_labels),results)
print('MAP@1=',k_1)
print('MAP@5=',k_5)

MAP@1= 0.34076827757125155
MAP@5= 0.4858323007021892
