In [10]:
import json
from fire import Fire
from pycocotools.coco import COCO


data_root='./coco'
task='instances'
data_type='val2017'

coco = COCO(f'{data_root}/annotations/captions_{data_type}.json')
coco_instance = COCO(f'{data_root}/annotations/instances_{data_type}.json')
coco_stuff = COCO(f'{data_root}/annotations/stuff_{data_type}.json')


# image features
with open(f'features/vit_L_14_336px_imgs_{data_type}.json', 'r') as f:
    imgs_feature = json.load(f)

# caption features
with open(f'features/vit_L_14_336px_captions_{data_type}.json', 'r') as f:
    caps_feature = json.load(f)

# instances
with open('features/vit_L_14_336px_instances_category.json', 'r') as f:
    instances_cat_features = json.load(f)

# instances
with open('features/vit_L_14_336px_stuff_category.json', 'r') as f:
    stuff_cat_features = json.load(f)

img_id = coco.getImgIds()[0]
print(coco.loadImgs(img_id))

cap_id = coco.getAnnIds()[0]
print(cap_id)
print(coco.loadAnns(cap_id))

loading annotations into memory...
Done (t=0.03s)
creating index...
index created!
loading annotations into memory...
Done (t=0.70s)
creating index...
index created!
loading annotations into memory...
Done (t=0.24s)
creating index...
index created!
[{'license': 4, 'file_name': '000000397133.jpg', 'coco_url': 'http://images.cocodataset.org/val2017/000000397133.jpg', 'height': 427, 'width': 640, 'date_captured': '2013-11-14 17:02:52', 'flickr_url': 'http://farm7.staticflickr.com/6116/6255196340_da26cf2c9e_z.jpg', 'id': 397133}]
38
[{'image_id': 179765, 'id': 38, 'caption': 'A black Honda motorcycle parked in front of a garage.'}]


In [11]:
cat_id = coco_instance.getCatIds()[0]
print(cat_id)
print(coco_instance.loadCats(cat_id))
cat_id = coco_stuff.getCatIds()[0]
print(cat_id)
print(coco_stuff.loadCats(cat_id))

1
[{'supercategory': 'person', 'id': 1, 'name': 'person'}]
92
[{'supercategory': 'textile', 'id': 92, 'name': 'banner'}]


In [12]:
caption_train = json.load(open('./coco/annotations/captions_train2017.json'))
caption_val = json.load(open('./coco/annotations/captions_val2017.json'))

In [13]:
len(caption_train['images']), len(caption_train['annotations'])

(118287, 591753)

In [14]:
len(caption_train['images']), len(caption_train['annotations'])
# np.array(imgs['397133']).shape, np.array(captions['38']).shape

(118287, 591753)

# Start of eval functions

In [37]:
# 1. For nearest neighbor queries (NN), we consider Precision, Recall, and F-1 to measure the effectiveness of similarity search;
# 2.  For kNN queries (kNN), we use precision@k, and NDCG to measure the effectiveness of similarity search;

from sklearn.metrics import ndcg_score
import numpy as np
from api import faiss_search
import json

def eval_nn(true_values, predictions):
    N = true_values.shape[1]
    accuracy = (true_values == predictions).sum() / N
    TP = ((predictions == 1) & (true_values == 1)).sum()
    FP = ((predictions == 1) & (true_values == 0)).sum()
    FN = ((predictions == 0) & (true_values == 1)).sum()
    precision = TP / (TP+FP)
    recall = TP / (TP+FN)
    return {
        'accuracy': accuracy,
        'precision': precision,
        'f1': 2 * (precision * recall) / (precision + recall)
    }


def eval_knn(true_values, predicted_scores, k):
    hits = 0
    ndcg = []
    results = []
    for value, scores in zip(true_values, predicted_scores):
        indices = scores.argsort()[-k:][::-1]
        if value in indices:
            hits += 1
        true_prob_distribution = np.zeros_like(indices)
        true_prob_distribution[value] = 1
        results.append(true_prob_distribution)
    # results = np.swapaxes(results, 0, 1)
    results = np.array(results)
    print(predicted_scores.shape, results.shape)
    ndcg.append(ndcg_score(predicted_scores, results))
    return {
        'p@k': hits / len(true_values),
        'ndcg': sum(ndcg) / len(ndcg)
    }


def evaluate(preds, gts, kNN=True, k=10):
    if not kNN:
        return eval_nn(preds, gts)
    else:
        return eval_knn(preds, gts, k=k)


true_values = np.array([[1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0]])
predictions = np.array([[1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0]])
print(evaluate(true_values, predictions, kNN=False))

# true_values = np.array([2, 11, 4])
true_values = np.array([9, 1, 3])
predictions = np.array([[0.05, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.15], [0.1, 0.05, 0.15, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.15, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.02, 0.13]])

print(evaluate(true_values, predictions, kNN=True))


{'accuracy': 0.3333333333333333, 'precision': 0.375, 'f1': 0.42857142857142855}
(3, 10) (3, 10)
{'p@k': 1.0, 'ndcg': 0.9211020043354664}


# Plug in dataset and fasii_search

In [17]:
# @member who looks at dataset: provide the val dataset
# dataset = ...
for _id, repre in dataset.values():
    pass
    # ensemble faiss and call evaluate