### Initialize clip

In [1]:
import evaluation

model = evaluation.model
preprocess = evaluation.preprocess

### Get images and titles for evaluation

In [4]:
images_path = evaluation.images_path
texts_file = evaluation.text_file_brands

texts_list = evaluation.read_text(texts_file)
images, titles = evaluation.get_image_title(texts_list)

print(len(images), "|", len(titles))

210 | 210


### Create dataset

In [5]:
dataset = evaluation.image_title_dataset(images, titles)

### brand weight

In [6]:
brands = evaluation.brands
brands_template = evaluation.brands_template

zeroshot_brand_weights = evaluation.zeroshot_weight_calculator_tmpl(brands, brands_template)

print(zeroshot_brand_weights.shape)

torch.Size([512, 40])


### calculate accuracy

In [7]:
import numpy as np
from tqdm.notebook import tqdm
import torch

with torch.no_grad():
    top1, top5, n = 0., 0., 0.
    
    for i, (img, title) in enumerate(tqdm(dataset)):
        
        image = []
        image.append(img)
        image = torch.tensor(np.stack(image))
        image_features = model.encode_image(image)
        image_features /= image_features.norm(dim=-1, keepdim=True)
        logits = 100. * image_features @ zeroshot_brand_weights
        target_index = brands.index(title)
        
        acc1, acc5 = evaluation.accuracy(logits, target_index, topk=(1,5))
        top1 += acc1
        top5 += acc5        
        n += image.size(0)        
        
top1 = (top1 / n) * 100
top5 = (top5 / n) * 100 

print(f"Top-1 accuracy: {top1:.2f}")
print(f"Top-5 accuracy: {top5:.2f}")

  0%|          | 0/210 [00:00<?, ?it/s]

Top-1 accuracy: 27.62
Top-5 accuracy: 55.24
