In [1]:
import torch
import os

import numpy as np
import sys 
sys.path.append('..')
from imagebind import data
from imagebind.models import imagebind_model
from src.imagenet_labels import lab_dict
from tqdm.notebook import tqdm
from imagebind.models.imagebind_model import ModalityType



In [2]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

In [3]:
model = imagebind_model.imagebind_huge(pretrained=True)
model.eval()
model = model.to(device)

In [4]:
text_list = [lab_dict[i].replace('_', ' ') for i in os.listdir('../imagenet_data/imagenet')]
text_list = [f"a {c}" for c in text_list]

In [5]:
text_list

['a carton',
 'a brain coral',
 'a wool',
 'a carbonara',
 'a triceratops',
 'a ballplayer',
 'a pug',
 'a Lhasa',
 'a cowboy boot',
 'a rock python',
 'a dial telephone',
 'a binoculars',
 'a Irish setter',
 'a gas pump',
 'a gown',
 'a pole',
 'a scuba diver',
 'a dragonfly',
 'a paddle',
 'a missile',
 'a mask',
 'a pineapple',
 'a computer keyboard',
 'a church',
 'a king penguin',
 'a wreck',
 'a catamaran',
 'a ant',
 'a dalmatian',
 'a planetarium',
 'a shopping basket',
 'a wire-haired fox terrier',
 'a garbage truck',
 'a pay-phone',
 'a spiny lobster',
 'a komondor',
 'a beacon',
 'a American lobster',
 'a ostrich',
 'a badger',
 'a doormat',
 'a racer',
 'a wine bottle',
 'a wombat',
 'a jaguar',
 'a tripod',
 'a strawberry',
 'a golf ball',
 'a tile roof',
 'a birdhouse',
 'a honeycomb',
 'a toyshop',
 'a West Highland white terrier',
 'a screwdriver',
 'a miniature poodle',
 'a face powder',
 'a bannister',
 'a hammerhead',
 'a frying pan',
 'a pool table',
 'a paddlewheel

In [15]:
def get_acc(gt, preds = None):
    if preds is not None: 
        return ((preds.argmax(1)==gt).sum()/len(preds)).cpu().numpy()
    return ((preds.argmax(1)==gt).sum()/len(preds)).cpu().numpy()


def compute(model, text, images, labels, device):
    inputs = {
        ModalityType.TEXT: data.load_and_transform_text(text, device),
        ModalityType.VISION: data.load_and_transform_vision_data(images, device),
    }
    
    with torch.no_grad():
        embeddings = model(inputs)
    
    probs = torch.softmax(embeddings[ModalityType.VISION] @ embeddings[ModalityType.TEXT].T, dim=-1)
    # print(probs, labels)
    val_acc = get_acc(labels.to(device), probs)
    return val_acc
    
def get_image_paths(root):
    path_dict = {}
    for cls in tqdm(os.listdir(root)):
        path_list = []
        cls_path = os.path.join(root, cls)
        for img in os.listdir(cls_path):
            img_path = os.path.join(cls_path, img)
            path_list.append(img_path)
        path_dict[lab_dict[cls].replace('_', ' ')] = path_list
    return path_dict

def get_test_acc(image_paths, device):
    eval_acc = []
    for i in tqdm(range(len(text_list)//20)):
        image_paths_batch = []
        labels = []
        for j in range(i*20, (i+1)*20):
            image_paths_batch += (image_paths[text_list[j][2:]])
            labels += [j]*len((image_paths[text_list[j][2:]]))
        
        # print(image_paths_batch)
        # print(labels)
        eval_acc.append(
            compute(model, text_list, image_paths_batch, torch.tensor(labels), device)
        ) # 50 samples per class; first 2 chars are "a "
        
    return np.mean(eval_acc)

In [9]:
image_paths_batch = []
for j in range(i, i+10):
    image_paths_batch.append(image_paths[text_list[j][2:]])

NameError: name 'i' is not defined

In [10]:
path_to_imagenet = '../imagenet_data/imagenet'
path_to_imagenet_c = '../imagenet_data/'

In [11]:
image_paths = get_image_paths(path_to_imagenet)


  0%|          | 0/1000 [00:00<?, ?it/s]

In [12]:
clean_acc = get_test_acc(image_paths, device)

  0%|          | 0/50 [00:00<?, ?it/s]

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


In [13]:
clean_acc

0.72628

In [16]:
gaussian_noise_acc = []
for sev in tqdm([1, 2, 3, 4, 5]):
    image_paths = get_image_paths(os.path.join(path_to_imagenet_c, 'gaussian_noise', str(sev)))
    gaussian_noise_acc.append(get_test_acc(image_paths, device))

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

In [None]:
gaussian_noise_acc

In [None]:
impulse_noise_acc = []
for sev in tqdm([1, 2, 3, 4, 5]):
    image_paths = get_image_paths(os.path.join(path_to_imagenet_c, 'impulse_noise', str(sev)))
    impulse_noise_acc.append(get_test_acc(image_paths, device))

In [None]:
impulse_noise_acc

In [None]:
shot_noise_acc = []
for sev in tqdm([1, 2, 3, 4, 5]):
    image_paths = get_image_paths(os.path.join(path_to_imagenet_c, 'shot_noise', str(sev)))
    shot_noise_acc.append(get_test_acc(image_paths, device))

In [None]:
shot_noise_acc

In [None]:
speckle_noise_acc = []
for sev in tqdm([1, 2, 3, 4, 5]):
    image_paths = get_image_paths(os.path.join(path_to_imagenet_c, 'speckle_noise', str(sev)))
    speckle_noise_acc.append(get_test_acc(image_paths, device))

In [None]:
speckle_noise_acc

In [None]:
res = {
    'gaussian_noise_acc' : gaussian_noise_acc,
    'impulse_noise_acc' : impulse_noise_acc,
    'shot_noise_acc': shot_noise_acc,
    'speckle_noise_acc': speckle_noise_acc,
    'clean_acc': clean_acc
}

In [None]:
res