In [1]:
import pandas as pd
import numpy as np
import torch.nn.functional as F
from tqdm.auto import tqdm
from playground.load import clip_model, clip_processor, model_device

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
## compute embedding first time: this will take a few minutes
#from playground.load import dataset, extract_image_vectors
#df = extract_image_vectors(dataset)
#df.to_parquet('data/objectnet/embeddings.parquet')

In [3]:
# load precomputed embeddings
df = pd.read_parquet('data/objectnet/embeddings.parquet')

In [4]:
def get_ys(df, label : str):
    ''' get binary labels for a given class '''
    assert label in df.label.unique()
    return np.where(df.label == label, 1, 0)

def get_text_embedding(text : str, prompt_template='A picture of a {}'):
    ''' get CLIP vector representation of text query '''
    text = text.replace('_', ' ')
    text = prompt_template.format(text)
    query_tokens = clip_processor(text=[text], return_tensors='pt')
    query_vector = clip_model.get_text_features(query_tokens['input_ids'].to(model_device))
    query_vector = F.normalize(query_vector)
    query_vector = query_vector.cpu().detach().numpy().reshape(-1)
    return query_vector

In [5]:
from sklearn.metrics import average_precision_score

In [6]:
y_true = get_ys(df, 'banana')
query_vector = get_text_embedding('banana')
image_vectors = np.stack(df.vectors.values)
image_scores = image_vectors @ query_vector
average_precision_score(y_true, image_scores)

0.8389910760803937

In [7]:
# work with unit length vectors for dot product (works better for CLIP embeddings)
df = df.assign(normalized_vectors=[vec for vec in np.stack(df['vectors']) / np.linalg.norm(df['vectors'])])

# pick a random image from each class to be the query to be used as the positive example
np.random.seed(10)
df = df.assign(random_id=np.random.permutation(df.shape[0]))
df = df.assign(group_rank=df.groupby('label')['random_id'].rank(method='first').astype('int'))
df = df.assign(split=df.group_rank.apply(lambda x: 'query' if x <= 1 else 'test'))

query_df = df[df.split == 'query']
test_df = df[df.split == 'test']

# from the test set, take a random sample of the DB which we will use as pseudo-negative examples
# while training some of the linear models
number_svm_train_examples = 2000
random_sample = np.random.permutation(test_df.shape[0])[:number_svm_train_examples]
Xneg = np.stack(test_df.iloc[random_sample].normalized_vectors.values)
yneg = np.zeros(Xneg.shape[0])

# the full test set used for evaluation
Xtest = np.stack(test_df.normalized_vectors.values)

In [8]:
def get_vector_from_text(row):
    ''' get CLIP vector representation of text query, aka zero-shot search '''
    return get_text_embedding(row.label)

def get_vector_from_knn(row):
    ''' get the vector representation of the row, aka nearest neighbor search '''
    return row.normalized_vectors

from sklearn import svm
def get_vector_from_svm(row):
    ''' ExemplarSVM: get the vector representation from using one positive example, and a random sample
    labeled as negative, train using SVM and use this for the vector lookup '''
    clf = svm.LinearSVC(class_weight='balanced', verbose=False, max_iter=10000, tol=1e-6, C=0.1)
    Xpos = row.normalized_vectors.reshape(1, -1)
    X = np.concatenate([Xpos, Xneg], axis=0)
    y = np.concatenate([np.ones(1), yneg])
    clf.fit(X, y) # train
    return clf.coef_.reshape(-1)

from sklearn import linear_model
def get_vector_from_logistic_reg(row):
    ''' Similar to ExemplarSVM, but using logistic regression instead '''
    clf = linear_model.LogisticRegression(class_weight='balanced', fit_intercept=False, verbose=False, max_iter=10000, tol=1e-6, C=0.1)
    Xpos = row.normalized_vectors.reshape(1, -1)
    X = np.concatenate([Xpos, Xneg], axis=0)
    y = np.concatenate([np.ones(1), yneg])
    clf.fit(X, y) # train
    return clf.coef_.reshape(-1)

import importlib
import playground.logistic_regression
importlib.reload(playground.logistic_regression)
from playground.logistic_regression import LinearModel

def get_vector_svm_reg(row):
    regularizer_vector  = get_vector_from_text(row)
    clf = LinearModel(class_weight='balanced', label_loss_type='hinge_squared_loss', reg_norm_lambda=10.,
                      verbose=False,
                      regularizer_vector=regularizer_vector, reg_vector_lambda=1000.)
    Xpos = row.normalized_vectors.reshape(1, -1)
    X = np.concatenate([Xpos, Xneg], axis=0)
    y = np.concatenate([np.ones(1), yneg])
    clf.fit(X, y) # train
    coeff = clf._module.weight.detach().cpu().numpy().reshape(-1)
    return coeff

In [9]:
def eval_method(query_df, vector_fn):
    ''' run the evaluation for a given method over different categories on the dataset '''
    aps = []
    for (idx, row) in tqdm(query_df.iterrows(), total=query_df.shape[0]):
        query_vector = vector_fn(row)
        scores = Xtest @ query_vector
        y_true = get_ys(test_df, row.label)
        ap = average_precision_score(y_true, scores)
        aps.append(ap)
    return np.array(aps)

In [10]:
text_ap = eval_method(query_df, get_vector_from_text)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
  0%|          | 0/313 [00:00<?, ?it/s]TOKENIZERS_PARALLELISM=(true | false)
100%|██████████| 313/313 [00:16<00:00, 19.52it/s]


In [11]:
svm_ap = eval_method(query_df, get_vector_from_svm)

100%|██████████| 313/313 [00:35<00:00,  8.93it/s]


In [12]:
knn_ap = eval_method(query_df, get_vector_from_knn)

100%|██████████| 313/313 [00:06<00:00, 46.19it/s]


In [13]:
svm_reg_ap = eval_method(query_df, get_vector_svm_reg)

100%|██████████| 313/313 [01:47<00:00,  2.92it/s]


In [14]:
query_df = query_df.assign(svm_ap=svm_ap, text_ap=text_ap, knn_ap=knn_ap, svm_reg_ap=svm_reg_ap)
query_df[['svm_ap', 'text_ap', 'knn_ap', 'svm_reg_ap']].mean()

svm_ap        0.095897
text_ap       0.235935
knn_ap        0.091348
svm_reg_ap    0.253193
dtype: float64