In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from tqdm.auto import tqdm
from playground.load import dataset, clip_model, clip_processor, model_device, extract_image_vectors

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
#will take a few minutes
#df = extract_image_vectors(dataset)
#df.to_parquet('data/objectnet/embeddings.parquet')

In [12]:
df = pd.read_parquet('data/objectnet/embeddings.parquet')
image_vectors = np.stack(df['vectors'])

In [13]:
def get_y_true(df, label):
    assert label in df.label.unique()
    return np.where(df.label == label, 1, 0)

def get_query_vector(text):
    query_tokens = clip_processor(text=[text], return_tensors='pt')
    query_vector = clip_model.get_text_features(query_tokens['input_ids'].to(model_device))
    query_vector = F.normalize(query_vector)
    query_vector = query_vector.cpu().detach().numpy().reshape(-1)
    return query_vector

In [14]:
from sklearn.metrics import average_precision_score

In [15]:
y_true = get_y_true(df, 'banana')
query_vector = get_query_vector('a photo of a banana')
image_scores = image_vectors @ query_vector
average_precision_score(y_true, image_scores)

0.8448669959968025

In [7]:
# for each label, we will pick a few random 'train' images we will use for querying, and the rest will be used for measuring performance


In [44]:
np.random.seed(0)
df = df.assign(random_id=np.random.permutation(df.shape[0]))
df = df.assign(group_rank=df.groupby('label')['random_id'].rank(method='first').astype('int'))
df = df.assign(split=df.group_rank.apply(lambda x: 'query' if x <= 1 else 'test'))
df = df.assign(normalized_vectors=[vec for vec in np.stack(df['vectors'])/np.linalg.norm(df['vectors'])])

# now, we will compute the average precision for each train example
query_df = df[df.split == 'query']
test_df = df[df.split == 'test']

ntrain = 2000
random_sample = np.random.permutation(test_df.shape[0])[:ntrain]
# take a random sample of the DB, make it larger than the dimension?
Xneg = np.stack(test_df.iloc[random_sample].normalized_vectors.values)
yneg = np.zeros(ntrain)

Xtest = np.stack(test_df.normalized_vectors.values)

In [45]:
def make_query(label : str) -> str:
    remove_under = label.replace('_', ' ')
    return f'a photo of a {remove_under}'

In [57]:
def get_vector_from_text(row):
    return get_query_vector(make_query(row.label))

def get_vector_from_knn(row):
    return row.normalized_vectors

from sklearn import svm
def get_vector_from_svm(row):
    clf = svm.LinearSVC(class_weight='balanced', verbose=False, max_iter=10000, tol=1e-6, C=0.1)
    Xpos = row.normalized_vectors.reshape(1, -1)
    X = np.concatenate([Xpos, Xneg], axis=0)
    y = np.concatenate([np.ones(1), yneg])
    clf.fit(X, y) # train
    return clf.coef_.reshape(-1)

from sklearn import linear_model
def get_vector_from_logistic_reg(row):
    clf = linear_model.LogisticRegression(class_weight='balanced', fit_intercept=False, verbose=False, max_iter=10000, tol=1e-6, C=0.1)
    Xpos = row.normalized_vectors.reshape(1, -1)
    X = np.concatenate([Xpos, Xneg], axis=0)
    y = np.concatenate([np.ones(1), yneg])
    clf.fit(X, y) # train
    return clf.coef_.reshape(-1)

In [96]:
def eval_method(query_df, vector_fn):
    aps = []
    for (idx, row) in tqdm(query_df.iterrows(), total=query_df.shape[0]):
        query_vector = vector_fn(row)
        scores = Xtest @ query_vector
        y_true = get_y_true(test_df, row.label)
        ap = average_precision_score(y_true, scores)
        aps.append(ap)
    return np.array(aps)

In [48]:
text_ap = eval_method(query_df, get_vector_from_text)

100%|██████████| 313/313 [00:13<00:00, 23.55it/s]


In [50]:
svm_ap = eval_method(query_df, get_vector_from_svm)

100%|██████████| 313/313 [00:25<00:00, 12.41it/s]


In [55]:
lr_ap = eval_method(query_df, get_vector_from_logistic_reg)

100%|██████████| 313/313 [00:21<00:00, 14.51it/s]


In [58]:
knn_ap = eval_method(query_df, get_vector_from_knn)

100%|██████████| 313/313 [00:07<00:00, 44.14it/s]


In [59]:
query_df = query_df.assign(svm_ap=svm_ap, text_ap=text_ap, lr_ap=lr_ap, knn_ap=knn_ap)
query_df[['svm_ap', 'text_ap', 'lr_ap', 'knn_ap']].mean()

svm_ap     0.092111
text_ap    0.236096
lr_ap      0.091111
knn_ap     0.087740
dtype: float64

In [106]:
import importlib
import playground.logistic_regression
importlib.reload(playground.logistic_regression)
from playground.logistic_regression import LinearModel

def get_vector_svm_reg(row):
    regularizer_vector  = get_vector_from_text(row)
    clf = LinearModel(class_weight='balanced', label_loss_type='hinge_squared_loss', reg_norm_lambda=10.,
                      verbose=False,
                      regularizer_vector=regularizer_vector, reg_vector_lambda=1000.)
    Xpos = row.normalized_vectors.reshape(1, -1)
    X = np.concatenate([Xpos, Xneg], axis=0)
    y = np.concatenate([np.ones(1), yneg])
    clf.fit(X, y) # train
    coeff = clf._module.weight.detach().cpu().numpy().reshape(-1)
    return coeff

In [107]:
svm_reg_ap = eval_method(query_df, get_vector_svm_reg)

100%|██████████| 313/313 [01:47<00:00,  2.90it/s]


In [108]:
query_df = query_df.assign(svm_ap=svm_ap, text_ap=text_ap, lr_ap=lr_ap, knn_ap=knn_ap, svm_reg_ap=svm_reg_ap)
query_df[['svm_ap', 'text_ap', 'lr_ap', 'knn_ap', 'svm_reg_ap']].mean()

svm_ap        0.092111
text_ap       0.236096
lr_ap         0.091111
knn_ap        0.087740
svm_reg_ap    0.246876
dtype: float64

In [78]:
clf2 = svm.LinearSVC(class_weight='balanced', verbose=False, max_iter=10000, tol=1e-6, C=0.1)

In [85]:
ys = (y == 2).astype('float')

In [94]:
X = X.astype('float32')

In [95]:
clf.fit(X,ys)

[{'k': 'total_loss',
  'loss': 492.4753375472613,
  'grad_norm': 1010.6362915039062},
 {'k': 'total_loss',
  'loss': 212.23198399093678,
  'grad_norm': 140.1540985107422},
 {'k': 'total_loss',
  'loss': 169.39621252645577,
  'grad_norm': 119.19063568115234},
 {'k': 'total_loss',
  'loss': 109.26077046315245,
  'grad_norm': 226.56996154785156},
 {'k': 'total_loss',
  'loss': 95.10838870593969,
  'grad_norm': 123.43391418457031},
 {'k': 'total_loss',
  'loss': 80.21454797855716,
  'grad_norm': 37.132747650146484},
 {'k': 'total_loss',
  'loss': 69.63078859392593,
  'grad_norm': 24.307775497436523},
 {'k': 'total_loss',
  'loss': 56.410178121361014,
  'grad_norm': 21.143489837646484},
 {'k': 'total_loss',
  'loss': 47.40404180311625,
  'grad_norm': 23.538684844970703},
 {'k': 'total_loss',
  'loss': 45.194081021262576,
  'grad_norm': 19.654163360595703},
 {'k': 'total_loss',
  'loss': 44.51854413228839,
  'grad_norm': 13.927041053771973},
 {'k': 'total_loss',
  'loss': 44.251566352470576,

Unnamed: 0,label,path,random_id,group_rank,split,text_ap,knn_ap,reg_ap,svm_ap,svm2_ap,svm_squared_ap,svm_reg_ap
110,air_freshener,air_freshener/912a742e5b16421.png,2079,3,query,0.037112,0.011462,0.032078,0.011674,0.011502,0.011684,0.025183
132,air_freshener,air_freshener/af2cd7fef2f8477.png,913,1,query,0.037112,0.043908,0.032065,0.053525,0.044473,0.053093,0.025152
144,air_freshener,air_freshener/c175510563a849f.png,2369,4,query,0.037112,0.013797,0.032078,0.016202,0.016233,0.016015,0.025153
166,air_freshener,air_freshener/e5f1811779644f8.png,2953,5,query,0.037112,0.048782,0.032078,0.063500,0.049320,0.064074,0.025196
174,air_freshener,air_freshener/f30f586d53d9490.png,1734,2,query,0.037112,0.026023,0.032078,0.033975,0.025814,0.033882,0.025168
...,...,...,...,...,...,...,...,...,...,...,...,...
50077,ziploc_bag,ziploc_bag/02ebd941b75048c.png,469,1,query,0.421889,0.011251,0.416618,0.012339,0.011406,0.012279,0.410664
50086,ziploc_bag,ziploc_bag/0d66b8101246457.png,1913,5,query,0.421889,0.211507,0.416618,0.271601,0.211481,0.270142,0.410388
50162,ziploc_bag,ziploc_bag/67c864b8a64a4af.png,973,3,query,0.421889,0.046303,0.416618,0.048199,0.048654,0.048422,0.410673
50233,ziploc_bag,ziploc_bag/cd885ba732ab4c8.png,797,2,query,0.421889,0.027874,0.416618,0.039746,0.030127,0.039202,0.410494


In [99]:
np.bincount(y)

array([50, 50, 50])

In [100]:
y.shape[0] / (3 * np.bincount(y))

array([1., 1., 1.])

In [105]:
mys = np.array([1, 0, 0, 0, 0, 0])

In [106]:
mys.shape[0] / (2 * np.bincount(mys))

array([0.6, 3. ])

6.0

In [96]:
clf2.fit(X,ys)

In [97]:
average_precision_score(ys, clf.decision_function(X))

0.9970782542372485

In [98]:
average_precision_score(ys, clf2.decision_function(X))

0.9948175232429083

In [126]:
F.cosine_similarity(torch.tensor([1., 0.]).reshape(1,-1), torch.tensor([0., 1.]).reshape(1,-1))

tensor([0.])

In [131]:
query_df

Unnamed: 0,label,path,random_id,group_rank,split,text_ap,knn_ap,reg_ap,svm_ap,svm2_ap,svm_squared_ap,svm_reg_ap
110,air_freshener,air_freshener/912a742e5b16421.png,2079,3,query,0.037112,0.011462,0.032078,0.011674,0.011502,0.011684,0.025183
132,air_freshener,air_freshener/af2cd7fef2f8477.png,913,1,query,0.037112,0.043908,0.032065,0.053525,0.044473,0.053093,0.025152
144,air_freshener,air_freshener/c175510563a849f.png,2369,4,query,0.037112,0.013797,0.032078,0.016202,0.016233,0.016015,0.025153
166,air_freshener,air_freshener/e5f1811779644f8.png,2953,5,query,0.037112,0.048782,0.032078,0.063500,0.049320,0.064074,0.025196
174,air_freshener,air_freshener/f30f586d53d9490.png,1734,2,query,0.037112,0.026023,0.032078,0.033975,0.025814,0.033882,0.025168
...,...,...,...,...,...,...,...,...,...,...,...,...
50077,ziploc_bag,ziploc_bag/02ebd941b75048c.png,469,1,query,0.421889,0.011251,0.416618,0.012339,0.011406,0.012279,0.410664
50086,ziploc_bag,ziploc_bag/0d66b8101246457.png,1913,5,query,0.421889,0.211507,0.416618,0.271601,0.211481,0.270142,0.410388
50162,ziploc_bag,ziploc_bag/67c864b8a64a4af.png,973,3,query,0.421889,0.046303,0.416618,0.048199,0.048654,0.048422,0.410673
50233,ziploc_bag,ziploc_bag/cd885ba732ab4c8.png,797,2,query,0.421889,0.027874,0.416618,0.039746,0.030127,0.039202,0.410494


In [137]:
import playground.basic_trainer
import playground.logistic_regression

importlib.reload(playground.logistic_regression)
importlib.reload(playground.basic_trainer)
from playground.logistic_regression import LinearModel
aps = []

for (idx, row), query_vector in tqdm(zip(query_df.iterrows(), image_query_vecs), total=query_df.shape[0]):
    query_vector = get_query_vector(make_query(row.label))
    clf = LinearModel(class_weight='balanced', label_loss_type='hinge_squared_loss', reg_norm_lambda=10.,
                      verbose=False, regularizer_vector=query_vector, reg_vector_lambda=100.)
    Xpos = query_vector.reshape(1, -1)
    X = np.concatenate([Xpos, Xneg], axis=0)
    y = np.concatenate([np.ones(1), np.zeros(Xneg.shape[0])])
    clf.fit(X, y) # train
    image_scores = clf.decision_function(test_vec_db)
    y_true = get_y_true(test_df, row.label)
    ap = average_precision_score(y_true, image_scores)
    aps.append(ap)

100%|██████████| 1565/1565 [02:26<00:00, 10.67it/s]


In [135]:
#clf2 = svm.LinearSVC(class_weight='balanced', verbose=False, max_iter=10000, tol=1e-6, C=0.1)A

In [115]:
# image_scores2 = clf2.decision_function(test_vec_db)
# ap2 = average_precision_score(y_true, image_scores2)


In [138]:
query_df = query_df.assign(svm_reg_ap3=aps)
totals = query_df.groupby('label')[['knn_ap', 'text_ap', 'svm_reg_ap', 'svm_reg_ap2', 'svm_reg_ap3', 'svm_ap',  'svm_squared_ap']].mean()
totals.mean()

knn_ap            0.076140
text_ap           0.232334
svm_reg_ap        0.231584
svm_reg_ap2       0.230460
svm_reg_ap3       0.233197
svm_ap            0.088182
svm_squared_ap    0.087306
dtype: float64

In [23]:
totals = query_df.groupby('label')[['svm_ap']].mean()
totals.mean()

svm_ap    0.059137
dtype: float64

In [38]:
query_df = query_df.assign(reg_ap=aps)
totals = query_df.groupby('label')[['knn_ap', 'text_ap', 'reg_ap', 'svm_ap']].mean()
totals.mean()a

knn_ap     0.076140
text_ap    0.232334
reg_ap     0.233089
svm_ap     0.088182
dtype: float64

In [99]:
query_df = query_df.assign(lrap=aps)
totals = query_df.groupby('label')[['ap', 'svap', 'lrap']].mean()
totals.mean()

ap      0.076140
svap    0.088269
lrap    0.078470
dtype: float64

In [101]:
query_df = query_df.assign(lrap=aps)
totals = query_df.groupby('label')[['ap', 'svap', 'lrap']].mean()
totals.mean()

ap      0.076140
svap    0.088269
lrap    0.079271
dtype: float64