In [2]:
import typing as tp
import json
from pathlib import Path

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
outputs_dir = Path('.').resolve().parent.parent.parent / "outputs"
log_file_path = outputs_dir / "2022-10-15/21-28-48/image_attention.log"

In [4]:
def load_records(log_path: Path) -> tp.List[tp.Dict[str, tp.Any]]:
    with open(log_path) as f:
        return [json.loads(line.rstrip()) for line in f]

In [5]:
files_records = load_records(log_file_path)
log_records_df = pd.json_normalize(files_records).drop(columns=['message', 'asctime'])
log_records_df.head()

Unnamed: 0,name,levelname,acc1,acc5,preds_path,type,cache_size,cache_inds_path,cache_strategy._target_,cache_strategy.topk,alpha,cache_value_strategy._target_,cache_weights_strategy._target_,cache_weights_strategy.beta
0,image_attention,INFO,,,,,,,,,,,,
1,image_attention,INFO,55.62741,89.764955,/home/myurachinskiy/CLIP/summer-clip/outputs/2...,zero_shot,,,,,,,,
2,image_attention,INFO,69.387755,96.428571,,cache_info,196.0,/home/myurachinskiy/CLIP/summer-clip/outputs/2...,summer_clip.clip_searcher.cache_strategy.TopKS...,1.0,,,,
3,image_attention,INFO,55.62741,89.764955,/home/myurachinskiy/CLIP/summer-clip/outputs/2...,searcher_result,,,summer_clip.clip_searcher.cache_strategy.TopKS...,1.0,0.0,summer_clip.clip_searcher.cache_value_strategy...,summer_clip.clip_searcher.cache_weights_strate...,0.1
4,image_attention,INFO,55.62741,89.764955,/home/myurachinskiy/CLIP/summer-clip/outputs/2...,searcher_result,,,summer_clip.clip_searcher.cache_strategy.TopKS...,1.0,0.1,summer_clip.clip_searcher.cache_value_strategy...,summer_clip.clip_searcher.cache_weights_strate...,0.1


In [6]:
zero_shot_records = log_records_df[log_records_df['type'] == 'zero_shot']
assert len(zero_shot_records) == 1

zero_shot_preds_path = Path(zero_shot_records.iloc[0]['preds_path'])
zero_shot_preds = np.load(zero_shot_preds_path)
zero_shot_preds.shape

(8041,)

In [7]:
test_labels_path = zero_shot_preds_path.parent.parent / "gold_labels" / "test_labels.npy"
test_labels = np.load(test_labels_path)
test_labels.shape

(8041,)

In [11]:
searcher_results_df = log_records_df[log_records_df['type'] == 'searcher_result']
searcher_preds_path = searcher_results_df.sort_values(by='acc1', ascending=False).iloc[0]['preds_path']
searcher_preds = np.load(searcher_preds_path)
searcher_preds.shape

(8041,)

In [12]:
zero_shot_mask = (zero_shot_preds == test_labels)
searcher_mask = (searcher_preds == test_labels)

In [14]:
tt = (zero_shot_mask & searcher_mask).sum()
tf = (zero_shot_mask & ~searcher_mask).sum()
ft = (~zero_shot_mask & searcher_mask).sum()
ff = (~zero_shot_mask & ~searcher_mask).sum()

pd.DataFrame([[tt, tf], [ft, ff]], columns=['T', 'F'], index=['T', 'F'])

Unnamed: 0,T,F
T,4135,337
F,467,3102


In [16]:
(4135 + 337) / 8041

0.5561497326203209

In [18]:
(~zero_shot_mask & searcher_mask).nonzero()

(array([   0,   29,   65,  105,  122,  136,  140,  144,  155,  181,  191,
         243,  246,  297,  334,  339,  373,  374,  405,  421,  451,  453,
         455,  470,  471,  514,  520,  528,  539,  552,  556,  577,  579,
         626,  632,  655,  693,  696,  724,  727,  733,  743,  746,  773,
         781,  823,  840,  913,  938,  986, 1010, 1017, 1018, 1019, 1041,
        1090, 1091, 1097, 1112, 1152, 1158, 1174, 1181, 1192, 1203, 1240,
        1272, 1321, 1347, 1353, 1375, 1397, 1422, 1431, 1438, 1439, 1456,
        1466, 1472, 1473, 1478, 1488, 1531, 1539, 1545, 1551, 1570, 1574,
        1580, 1602, 1628, 1635, 1645, 1650, 1657, 1673, 1680, 1689, 1690,
        1696, 1701, 1781, 1785, 1789, 1798, 1836, 1867, 1890, 1924, 1930,
        1959, 1984, 2000, 2003, 2012, 2080, 2135, 2174, 2187, 2210, 2212,
        2215, 2228, 2231, 2238, 2296, 2367, 2387, 2409, 2418, 2436, 2439,
        2468, 2471, 2492, 2505, 2506, 2533, 2540, 2564, 2584, 2591, 2596,
        2600, 2619, 2630, 2641, 2642, 