In [1]:
import os
import numpy as np
import torch

from tqdm.notebook import tqdm
from src.imagenet_labels import lab_dict
from lavis.processors.blip_processors import BlipCaptionProcessor
from src.blip_lavis import define_model, get_transform
from src.dataloaders import imagenet_c_dataloader, imagenet_dataloader


# TODO: Read: Important notices:
# Note that one transformation is happening as preprocessing goes (the blip one)
# Note that only a low-dimensional projection of the features is used, namely, 'proj' in the next cell... the same for images... Consider checking the full dimension embeddings??

# Note that blip1 is used and not blip2... naturally lavis variant and not huggingface
# Note that 256 batch size is used due to memory limitations
# Note that things work, with initial proof... they just need to be fully run tomorrow morning 27.07.2023
# Note that each severity level takes 4 minutes to complete with batch size of 256 on a 3060TI 8GB

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
cls_names = [lab_dict[i].replace('_', ' ') for i in os.listdir('../data/imagenet')]

text_processor = BlipCaptionProcessor(prompt="A picture of ")

cls_prompt = [text_processor(cls_nm) for cls_nm in cls_names]

In [4]:
cls_prompt

['A picture of tench',
 'A picture of goldfish',
 'A picture of great white shark',
 'A picture of tiger shark',
 'A picture of hammerhead',
 'A picture of electric ray',
 'A picture of stingray',
 'A picture of cock',
 'A picture of hen',
 'A picture of ostrich',
 'A picture of brambling',
 'A picture of goldfinch',
 'A picture of house finch',
 'A picture of junco',
 'A picture of indigo bunting',
 'A picture of robin',
 'A picture of bulbul',
 'A picture of jay',
 'A picture of magpie',
 'A picture of chickadee',
 'A picture of water ouzel',
 'A picture of kite',
 'A picture of bald eagle',
 'A picture of vulture',
 'A picture of great grey owl',
 'A picture of european fire salamander',
 'A picture of common newt',
 'A picture of eft',
 'A picture of spotted salamander',
 'A picture of axolotl',
 'A picture of bullfrog',
 'A picture of tree frog',
 'A picture of tailed frog',
 'A picture of loggerhead',
 'A picture of leatherback turtle',
 'A picture of mud turtle',
 'A picture of 

In [5]:
def get_acc(gt, preds = None):
    if preds is not None:
        return ((preds.argmax(1)==gt).sum()/len(preds)).cpu().numpy()
    return ((preds.argmax(1)==gt).sum()/len(preds)).cpu().numpy()


def get_test_acc(model, loader, device='cuda'):
    eval_acc = []
    for batch in tqdm(loader, leave=False):
        ims, labels = batch
        ims, labels = ims.to(device), labels.to(device)

        image_features = model.extract_features( {"image": ims}, mode="image").image_embeds_proj[:, 0]

        sims = (image_features @ text_features.t()) / model.temp
        preds = torch.nn.Softmax(dim=1)(sims)

        val_acc = get_acc(labels.view(-1,), preds)
        eval_acc.append(val_acc)

    return np.mean(eval_acc)

In [6]:
model = define_model(device)
transform = get_transform(device)

In [7]:
text_features = model.extract_features({"text_input": cls_prompt}, mode="text").text_embeds_proj[:, 0]

In [8]:
gaussian_noise_acc = []
for sev in tqdm([1, 2, 3, 4, 5]):
    loader = imagenet_c_dataloader(corruption_name='gaussian_noise', severity=sev, batch_size=256, transform=transform)
    gaussian_noise_acc.append(get_test_acc(model, loader, device))

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

In [9]:
gaussian_noise_acc

[0.45566803, 0.41914064, 0.3546875, 0.26296237, 0.14225924]

In [10]:
impulse_noise_acc = []
for sev in tqdm([1, 2, 3, 4, 5]):
    loader = imagenet_c_dataloader(corruption_name='impulse_noise', severity=sev, batch_size=256, transform=transform)
    impulse_noise_acc.append(get_test_acc(model, loader, device))

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

In [18]:
impulse_noise_acc

[0.42414305, 0.3770767, 0.3341797, 0.23130979, 0.12862723]

In [12]:
shot_noise_acc = []
for sev in tqdm([1, 2, 3, 4, 5]):
    loader = imagenet_c_dataloader(corruption_name='shot_noise', severity=sev, batch_size=256, transform=transform)
    shot_noise_acc.append(get_test_acc(model, loader, device))

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

In [13]:
shot_noise_acc

[0.45365512, 0.40867347, 0.34552374, 0.23026147, 0.1415617]

In [14]:
speckle_noise_acc = []
for sev in tqdm([1, 2, 3, 4, 5]):
    loader = imagenet_c_dataloader(corruption_name='speckle_noise', severity=sev, batch_size=256, transform=transform)
    speckle_noise_acc.append(get_test_acc(model, loader, device))

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

In [15]:
speckle_noise_acc

[0.46370775, 0.44168526, 0.3695472, 0.31965482, 0.25065768]

In [16]:
loader = imagenet_dataloader(batch_size=256, transform=transform)
clean_acc = get_test_acc(model, loader, device)

  0%|          | 0/196 [00:00<?, ?it/s]

In [17]:
clean_acc

0.4355668