<a href="https://colab.research.google.com/github/oz-e/applied-ml/blob/main/CuPL_food.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Change the path if necessary
datasets_path = 'datasets'

import sys
import os

# Deploy AML code in colab
if 'google.colab' in sys.modules:
  if not os.path.exists('/content/applied-ml/'):
    !wget -q https://raw.githubusercontent.com/tsunrise/colab-github/main/colab_github.py
    import colab_github
    colab_github.github_auth(persistent_key=False)

    !git clone git@github.com:oz-e/applied-ml.git
    if not os.path.exists('/content/applied-ml/'):
      raise Exception('Please follow the instructions to add the SSH key to your account in order to clone private repo')
  %cd /content/applied-ml/

  # Install any other requirements (to be converted to requirements.txt)
  !pip install openai-clip

/content/applied-ml


In [2]:
import os

if not os.path.exists("CuPL"):
  !git clone https://github.com/sarahpratt/CuPL.git

In [3]:
import numpy as np
import torch
import clip
from pkg_resources import packaging
from CuPL.imagenet_prompts.standard_image_prompts import imagenet_templates
import pdb
from collections import defaultdict
from PIL import Image
import PIL
import json
from tqdm import tqdm

In [4]:
model, preprocess = clip.load("ViT-B/16")

In [5]:
import aml.datasets

all_images = aml.datasets.Food101(datasets_path, split='test', transform=preprocess)
loader = torch.utils.data.DataLoader(all_images, batch_size=512, num_workers=8)



In [6]:
PATH_TO_PROMPTS = './CuPL/all_prompts/full_prompts/food_prompts_full.json'

with open(PATH_TO_PROMPTS) as f:
  gpt3_prompts = json.load(f)

In [7]:
special_class_mapping = {
}

In [8]:
# For bridging classsname in dataset with name in CuLP prompts
new_gpt3_prompts = {}

for key, value in gpt3_prompts.items():
  new_key = special_class_mapping.get(key, key)
  new_gpt3_prompts[new_key.replace(' ', '_').lower()] = value

In [9]:
def zeroshot_classifier(classnames, templates):
	with torch.no_grad():
		zeroshot_weights = []
		for classname in tqdm(classnames):
			texts = [template.format(classname) for template in templates] #format with class
			texts = clip.tokenize(texts).cuda() #tokenize
			class_embeddings = model.encode_text(texts) #embed with text encoder
			class_embeddings /= class_embeddings.norm(dim=-1, keepdim=True)
			class_embedding = class_embeddings.mean(dim=0)
			class_embedding /= class_embedding.norm()
			zeroshot_weights.append(class_embedding)
		zeroshot_weights = torch.stack(zeroshot_weights, dim=1).cuda()
	return zeroshot_weights

In [10]:
def zeroshot_classifier_gpt(classnames, templates, use_both):
	with torch.no_grad():
		zeroshot_weights = []
		for classname in tqdm(classnames):
			if use_both:
				texts = [template.format(classname) for template in templates]
			else:
				texts = []

			for t in new_gpt3_prompts[classname]:
				texts.append(t)
			texts = clip.tokenize(texts, truncate=True).cuda() #tokenize
			class_embeddings = model.encode_text(texts) #embed with text encoder
			class_embeddings /= class_embeddings.norm(dim=-1, keepdim=True)
			class_embedding = class_embeddings.mean(dim=0)
			class_embedding /= class_embedding.norm()
			zeroshot_weights.append(class_embedding)

		zeroshot_weights = torch.stack(zeroshot_weights, dim=1).cuda()
	return zeroshot_weights

In [11]:
print("\nCreating standard text embeddings...")
zeroshot_weights_base = zeroshot_classifier(all_images.classnames, imagenet_templates)
print("Done.\n")

print("Creating CuPL text embeddings...")
zeroshot_weights_cupl = zeroshot_classifier_gpt(all_images.classnames, imagenet_templates, False)
print("Done.\n")

print("Creating combined text embeddings...")
zeroshot_weights_gpt_both = zeroshot_classifier_gpt(all_images.classnames, imagenet_templates, True)
print("Done.\n")


Creating standard text embeddings...


100%|██████████| 101/101 [00:06<00:00, 16.25it/s]


Done.

Creating CuPL text embeddings...


100%|██████████| 101/101 [00:03<00:00, 32.27it/s]


Done.

Creating combined text embeddings...


100%|██████████| 101/101 [00:08<00:00, 12.17it/s]

Done.






In [12]:
total = 0.
correct_base = 0.
correct_cupl = 0.
correct_both = 0.

print("Classifying ImageNet...")

with torch.no_grad():

	for i, (images, target) in enumerate(tqdm(loader)):
		images = images.cuda()
		target = target.cuda()

		# predict
		image_features = model.encode_image(images)
		image_features /= image_features.norm(dim=-1, keepdim=True)

		logits_base = image_features @ zeroshot_weights_base
		logits_cupl = image_features @ zeroshot_weights_cupl
		logits_both = image_features @ zeroshot_weights_gpt_both

		pred_base = torch.argmax(logits_base, dim =1)
		pred_cupl = torch.argmax(logits_cupl, dim =1)
		pred_both = torch.argmax(logits_both, dim =1)

		for j in range(len(target)):
			total += 1.
			if pred_base[j] == target[j]:
				correct_base += 1.
			if pred_cupl[j] == target[j]:
				correct_cupl += 1.
			if pred_both[j] == target[j]:
				correct_both += 1.

Classifying ImageNet...


100%|██████████| 60/60 [03:13<00:00,  3.22s/it]


In [13]:
print()
top1 = (correct_base / total) * 100
print(f"Top-1 accuracy standard: {top1:.2f}")

top1 = (correct_cupl / total) * 100
print(f"Top-1 accuracy CuPL: {top1:.2f}")

top1 = (correct_both / total) * 100
print(f"Top-1 accuracy both: {top1:.2f}")


Top-1 accuracy standard: 83.84
Top-1 accuracy CuPL: 86.02
Top-1 accuracy both: 85.06
