# Evaluate CLIPpy on ImageNet (classification) and PASCAL VOC (semantic segmentation)

## Common
Set paths to checkpoints and datasets, and load pre-trained CLIPpy model. 

In [1]:
import os
import sys

os.chdir("../src")

In [2]:
pretrained_ckpt = "/raid/kanchana/checkpoints/open_clip/clippy_best/clippy_5k.pt"
inet_path = "/raid/datasets/img1k_k/val"

In [3]:
import torch

from open_clip import create_model_and_transforms, get_tokenizer
from training.data import get_imagenet
from training.zero_shot import zero_shot_eval

from utils import DummyArgs


tokenizor = get_tokenizer("clippy-B-16")
clippy, preprocess_train, preprocess_val = create_model_and_transforms(
    "clippy-B-16",
    precision='amp',
    device="cuda:0",
    pretrained=pretrained_ckpt
)

## ImageNet Classification
We load the ImageNet dataset and evaluate CLIPpy. Accuracy (top-1 & top-5) are reported.  

In [4]:
args = DummyArgs(
    imagenet_val=inet_path,
    batch_size=64,
    workers=1,
    distributed=False,
    horovod=False,
    precision="amp",  # ensure same precision for model (above) and data (here)
    zeroshot_frequency=1,
    epochs=0,
    model="clippy-B-16",
    device="cuda:0"  # ensure same device for model (above) and data (here)
)

dataset = get_imagenet(args, (preprocess_train, preprocess_val), "val")
data = {"imagenet-val": dataset}

In [5]:
zero_shot_metrics = zero_shot_eval(clippy, data, 1, args)
print(zero_shot_metrics)

100%|███████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:35<00:00, 28.03it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████| 50048/50048 [1:30:20<00:00,  9.23it/s]

{'imagenet-zeroshot-val-top1': 0.44976, 'imagenet-zeroshot-val-top5': 0.72826}





## PASCAL VOC Semantic Segmentation
Online eval script for fast approximate evaluation 

In [7]:
from evaluation.datasets import PascalDataset
from evaluation.segmentation import PascalMIoU, PascalEvaluator

In [8]:
args = DummyArgs(
    batch_size=64,
    workers=1,
    model="clippy-B-16",
    device="cuda:0",
    precision="amp"  # ensure same precision for model (above) and data (here)
)

In [9]:
data_dir = "/raid/datasets/pascal_voc/VOC2012"
pascal_dataset = PascalDataset(data_dir, transform=preprocess_val)

metric = PascalMIoU()
evaluator = PascalEvaluator(model=clippy, dataset=pascal_dataset, metric=metric, opts=args)

In [10]:
evaluator.evaluate()

100%|███████████████████████████████████████████████████████████████████████████████████████████████| 1472/1472 [04:12<00:00,  5.82it/s]


In [11]:
res = evaluator.metric.compute()
print(res)

{'mIoU': 0.4210667844036943, 'per_class': [0.7725213189576166, 0.3466781139113337, 0.2781234242346894, 0.38985045635578225, 0.38366327467925143, 0.36911746718162414, 0.5677382659622576, 0.5011231132193903, 0.6720588265609756, 0.16085995911895945, 0.5352997603574671, 0.27677756803172054, 0.6050781162315146, 0.5568816202049942, 0.5501736274433078, 0.1500900159722362, 0.2045999185819136, 0.5984697807708667, 0.2853137003254783, 0.44365273885047407, 0.1943314055257269]}


In [None]:
import pandas as pd
import numpy as np
from glob import glob
import os
import json
import cv2 as cv
from tqdm import tqdm
from PIL import Image

from utils import *

def IOU(a, b):
    assert a.shape == b.shape and len(a.shape) == 2
    return np.count_nonzero(np.logical_and(a, b)) / np.count_nonzero(np.logical_or(a, b))


In [None]:
save_path = "/home/kanchana/repo/temp_peekaboo"

data_path = "/nfs/ws1/datasets/RefVOC"
file_list = f"{data_path}/refvoc_files.txt"

with open(file_list, "r") as fo:
    files = fo.readlines()
files = [f"cropped-{x.strip()}.jpg" for x in files]

In [None]:
for f in files:
    assert os.path.exists(f"{data_path}/{f}"), f

In [None]:
for idx, file_path in tqdm(enumerate(files)):
    im = Image.open(f"{data_path}/{file_path}")
    im_orig = im
    im = preprocess_val(im)
    im = im.unsqueeze(0).cuda()
    
    image_features = clippy.encode_image(im, normalize=True, pool=False)[:, 1:]
    similarity = get_similarity(image_features, evaluator.class_embeddings, (224, 224), do_argmax=True)
    res = similarity[0, 0, :, :]
    res[res > 20] = 0
    
    # cv.imwrite(f"../notebooks/temp.png", res.to(torch.uint8).numpy())
    cv.imwrite(f"{save_path}/{file_path[8:].replace('jpg', 'png')}", res.to(torch.uint8).numpy())
    # cv2.imwrite(f"{target}/{last_name.replace("jpg", "png")}' if dataset == 'coco' else f'{last_name}.png'), res)
    # break

In [None]:
len("cropped-")

In [None]:
im_orig

In [None]:
res.unique()

In [None]:
Image.fromarray(res.to(torch.uint8).numpy())

In [None]:
res.shape

In [None]:
save_path = "/home/kanchana/repo/temp_peekaboo/image-{}.png"
