In [3]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from tqdm.auto import tqdm
from playground.load import dataset, clip_model, clip_processor, model_device, extract_image_vectors

In [4]:
# will take a few minutes
# df, image_vectors = extract_image_vectors(dataset)
# df.to_parquet('data/image_meta.parquet')
# np.save('data/image_vectors.npy', image_vectors)
df = pd.read_parquet('data/objectnet/image_meta.parquet')
image_vectors = np.load('data/objectnet/image_vectors.npy')

In [5]:
def get_y_true(df, label):
    assert label in df.label.unique()
    y_true = np.zeros(df.shape[0])
    y_true[df[df.label == label].index.values] =1
    return y_true

def get_query_vector(text):
    query_tokens = clip_processor(text=[text], return_tensors='pt')
    query_vector = clip_model.get_text_features(query_tokens['input_ids'].to(model_device))
    query_vector = F.normalize(query_vector)
    query_vector = query_vector.cpu().detach().numpy().reshape(-1)
    return query_vector

In [6]:
from sklearn.metrics import average_precision_score

In [7]:
y_true = get_y_true(df, 'banana')
query_vector = get_query_vector('a photo of a banana')
image_scores = image_vectors @ query_vector
average_precision_score(y_true, image_scores)

0.8783107024897996

In [8]:
# for each label, we will pick a few random 'train' images we will use for querying, and the rest will be used for measuring performance
np.random.seed(0)

In [32]:
df = df.assign(random_id=np.random.permutation(df.shape[0]))
df = df.assign(group_rank=df.groupby('label')['random_id'].rank(method='first').astype('int'))
df = df.assign(split=df.group_rank.apply(lambda x: 'query' if x <= 5 else 'test'))

In [33]:
# now, we will compute the average precision for each train example
query_df = df[df.split == 'query']
test_df = df[df.split == 'test']


In [36]:
test_vec_db = image_vectors[test_df.index.values]

In [38]:
image_query_vecs = image_vectors[query_df.index.values]