Notebook explaining step by step how we build the `compute_likelihood_map` function.

In [1]:
import retinoto_py as fovea
args = fovea.Params()
args

Params(image_size=224, do_mask=False, do_fovea=False, rs_min=0.0, rs_max=-6.0, padding_mode='zeros', seed=2018, batch_size=80, num_workers=1, in_memory=False, model_name='convnext_base', num_epochs=24, subset_factor=1, lr=1e-06, delta1=0.1, delta2=0.007, weight_decay=0.03, label_smoothing=0.2, shuffle=True, verbose=False)

In [2]:
import torch
from torchvision.io import read_image

alpha = .5
s_max = 1000

In [3]:
idx_to_label = fovea.get_idx_to_label(args)
label2idx = fovea.get_label_to_idx(args)
label2idx['impala']

Loading labels from local cache cached_data/imagenet_class_index.json...
Loading labels from local cache cached_data/imagenet_class_index.json...


352

In [4]:
for key in label2idx.keys(): 
    if 'owl' in key: print(key)

great_grey_owl
howler_monkey
mixing_bowl
soup_bowl


In [5]:
dataset = 'bbox'
model_filename = args.data_cache / f'32_fovea_model_name={args.model_name}_dataset={dataset}.pth'
model = fovea.load_model(args, model_filename=model_filename)
model_filename

PosixPath('cached_data/32_fovea_model_name=convnext_base_dataset=bbox.pth')

In [None]:
from torchvision.transforms.functional import InterpolationMode, resize
image_size_full = 1024
resolution = (20, 20)
resolution = (60, 60)
resolution = (50, 50)
resolution = 256
size_ratio = .3
N_repeats = 10

for image_url in  [
            #    'wolf',
               'fluffy-white-clouds',
               'fractal',
            #    'farm_animals', # to debug
               'jwst-carina-nebula',
               'e2-PIA26210-Curiosity',
               'L3HYK4hkM2gUTjBQbBMVkG', # mars rover
               'Nanedi_Valles_valley_system_on_Mars_ESA199848',
               'my_jackson_pollock_painting_by_amau41200-d4vjeut',
               'JamesWebbSpaceTelescope',
               'jXwhLmwUt9XyJ9LGTSkVoD-1200-80',
               'stsci-01gf49bhzj1wx8w803281hrbcw',
               'hubble_ngc1858_1670218444546_1670218460358_1670218460358',
               'green-jungle-trees-plants-hd-1080P-wallpaper',
                                ]:
   print(50*'=-')
   print(f'{image_url=}')
   print(50*'.-')
   full_image = read_image('./images/' + image_url + '.jpg')[:3, :, :]/255
   full_image = resize(full_image, image_size_full, interpolation=InterpolationMode.BILINEAR, antialias=True)
   # print(f"{type(full_image) = }, {full_image.dtype = }, {full_image.shape = }")
   # Initialize a dictionary to store scores for each label
   label_scores = {label: [] for label in idx_to_label}


   args = fovea.Params(do_fovea=True)
   for repeat in range(N_repeats):
      with fovea.torch.no_grad():
         pos_H, pos_W, probas = fovea.compute_likelihood_map(args, model, full_image, size_ratio=size_ratio, resolution=resolution)
         probas = probas.cpu()

      proba_max, preds = torch.max(probas, dim=1)

      # Populate the dictionary
      for proba, pred, pos_h, pos_w in zip(proba_max, preds, pos_H, pos_W): 
         label = idx_to_label[pred]
         label_scores[label].append([proba.item(), pos_h, pos_w])

   # Calculate mean scores for each label
   total_scores = {label: fovea.np.max(scores) for label, scores in label_scores.items()}

   # Sort labels by mean score in descending order
   sorted_labels = sorted(total_scores.items(), key=lambda x: x[1], reverse=True)

   # Print the top labels with their scores
   print("Top 10 labels by mean score:")
   for label, total_score in sorted_labels[:15]:
      print(f"{label}: {total_score:.4f}")

   print(50*'.-')




=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
image_url='fluffy-white-clouds'
.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-


ValueError: zero-size array to reduction operation maximum which has no identity

In [10]:
label_scores['vulture']

[[0.148212268948555, np.int64(940), np.int64(905)],
 [0.2911164164543152, np.int64(1016), np.int64(377)],
 [0.21295669674873352, np.int64(1002), np.int64(259)],
 [0.11280938982963562, np.int64(1012), np.int64(464)],
 [0.24047112464904785, np.int64(983), np.int64(562)],
 [0.19969357550144196, np.int64(947), np.int64(453)],
 [0.17317454516887665, np.int64(938), np.int64(627)],
 [0.4295402765274048, np.int64(1014), np.int64(308)],
 [0.21902118623256683, np.int64(855), np.int64(435)],
 [0.15497525036334991, np.int64(913), np.int64(224)]]

In [7]:
probas.shape, proba_max.shape

(torch.Size([256, 1000]), torch.Size([256]))

In [None]:

for image_url, true_label in  [
               ('fluffy-white-clouds', 'airship'),
               ('fractal', 'peacock'),
               ('jwst-carina-nebula', 'brown_bear'),
               ('L3HYK4hkM2gUTjBQbBMVkG', 'horned_viper'), 
               ('e2-PIA26210-Curiosity', 'Arabian_camel'), 
               ('Nanedi_Valles_valley_system_on_Mars_ESA199848', 'water_bottle'),
               ('my_jackson_pollock_painting_by_amau41200-d4vjeut', 'parachute'),
               ('JamesWebbSpaceTelescope', 'sea_urchin'),
               ('green-jungle-trees-plants-hd-1080P-wallpaper', 'peacock'),
                                ]:
    print(50*'=-')
    print(f'{image_url=} contains a {true_label}?')
    print(50*'.-')

    full_image = read_image('./images/' + image_url + '.jpg')[:3, :, :]/255
    full_image = resize(full_image, image_size_full, interpolation=InterpolationMode.BILINEAR, antialias=True)
    # print(f"{type(full_image) = }, {full_image.dtype = }, {full_image.shape = }")

    args = fovea.Params(do_fovea=True)

    pos_H, pos_W, outputs = fovea.compute_likelihood_map(args, model, full_image, size_ratio=size_ratio, resolution=resolution)
    pos_H, pos_W, outputs = fovea.compute_likelihood_map(args, model, full_image, size_ratio=size_ratio)
    outputs = outputs.cpu()
    logit_label = outputs[:, label2idx[true_label]]
    idx_max = logit_label.argmax()

    fig, ax = fovea.plt.subplots()
    full_image_np = torch.movedim(full_image, (1, 2, 0), (0, 1, 2)).numpy()
    ax.imshow(full_image_np)
    ax.scatter(pos_W.ravel(), pos_H.ravel(), s=logit_label.abs()*s_max, c=logit_label, alpha=alpha, edgecolors='none', cmap='coolwarm', vmin=-max(abs(logit_label)), vmax=max(abs(logit_label)),)

    ax.scatter(pos_W.ravel()[idx_max], pos_H.ravel()[idx_max], s=logit_label[idx_max]*s_max, marker='*', c='white', alpha=alpha)
    fig.set_facecolor(color='white')
    fovea.plt.show()