# Setup

In [20]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%%capture
! pip install git+https://github.com/openai/CLIP.git
!pip install transformers
!pip install yacs

In [1]:
cd /content/drive/Shareddrives/cis630/CoOp/data

/content/drive/Shareddrives/cis630/CoOp/data


In [2]:
import torch
import torchvision
import gc
import os

import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset, TensorDataset

from tqdm import tqdm

import numpy as np

In [4]:
# from config import get_cfg_defaults
# cfg = get_cfg_defaults()

In [5]:
def free_gpu_cache():
    gc.collect()
    torch.cuda.empty_cache()

In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [9]:
clip.available_models()

['RN50',
 'RN101',
 'RN50x4',
 'RN50x16',
 'RN50x64',
 'ViT-B/32',
 'ViT-B/16',
 'ViT-L/14',
 'ViT-L/14@336px']

In [10]:
import clip
print(f"Loading RN50")
clip_model, transform = clip.load('ViT-B/32')
clip_model.to(device)
print(f"Model in {device}")

Loading RN50
Model in cuda


## Flower

In [11]:
dataset = torchvision.datasets.Flowers102(root='',transform=transform,download=False,split='train')

In [12]:
'''GPU constrained (avoid OOM error)'''
subsets = [Subset(dataset, range(i,i+204)) for i in range(0, len(dataset), len(dataset)//5)]
dataloaders = [DataLoader(subset, batch_size=16, shuffle=False) for subset in subsets]

In [13]:
for i,dataloader in enumerate(tqdm(dataloaders)):
    img_embs, lbls = [], []
    for images, labels in dataloader:
        img_embs.append(clip_model.encode_image(images.to(device)))
        lbls.append(labels)
    free_gpu_cache()
    img_embs, lbls = torch.vstack(img_embs), torch.hstack(lbls)
    # print(f"Image embedding shape: {img_embs.shape}")
    # print(f"Labels shape:          {lbls.shape}")
    torch.save(img_embs, f'flower_vit_image_embs_{i}.pt')
    torch.save(lbls,     f'flower_vit_labels_{i}.pt')

100%|██████████| 5/5 [06:23<00:00, 76.75s/it]


In [15]:
img_embs = torch.vstack([torch.load(f'flower_vit_image_embs_{i}.pt') for i in range(len(dataloaders))])
lbls = torch.hstack([torch.load(f'flower_vit_labels_{i}.pt') for i in range(len(dataloaders))])

torch.save(img_embs, f'flower_vit_image_embs.pt')
torch.save(lbls,     f'flower_vit_labels.pt')

# Other Datasets

## Food101

In [None]:
dataset = torchvision.datasets.Food101(root='data/',
                                       transform=transform,download=False,split='train')

# '''GPU constrained (avoid OOM error)'''
subsets = [Subset(dataset, range(i,i+750)) for i in range(0, len(dataset), len(dataset)//101)]
dataloaders = [DataLoader(subset, batch_size=12, shuffle=False) for subset in subsets]

cfg.DATASET.NAME = 'food101'

# create image embeddings
for i,dataloader in enumerate(tqdm(dataloaders)):
    img_embs, lbls = [], []
    for images, labels in dataloader:
        img_embs.append(model.encode_image(images.to(device)))
        lbls.append(labels)
        break
    free_gpu_cache()
    img_embs, lbls = torch.vstack(img_embs), torch.hstack(lbls)
    print(f"Image embedding shape: {img_embs.shape}")
    print(f"Labels shape:          {lbls.shape}")
    torch.save(img_embs, f'checkpoints/{cfg.DATASET.NAME}_image_embs_{i}.pt')
    torch.save(lbls,     f'checkpoints/{cfg.DATASET.NAME}_labels_{i}.pt')


# create classnames txt files
with open('food101_names.txt','w') as fp:
  for item in dataset.classes:
        # write each item on a new line
        fp.write("%s\n" % item)
  print('Done')

# save
img_embs = torch.vstack([torch.load(f'checkpoints/{cfg.DATASET.NAME}_image_embs_{i}.pt') for i in range(len(dataloaders))])
lbls = torch.hstack([torch.load(f'checkpoints/{cfg.DATASET.NAME}_labels_{i}.pt') for i in range(len(dataloaders))])

torch.save(img_embs, f'checkpoints/{cfg.DATASET.NAME}_image_embs.pt')
torch.save(lbls,     f'checkpoints/{cfg.DATASET.NAME}_labels.pt')

In [None]:
# check
torch.unique(lbls,return_counts=True)

(tensor([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
          14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,
          28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,
          42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,
          56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
          70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,
          84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,
          98,  99, 100]),
 tensor([12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
         12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
         12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
         12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
         12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
         12, 12,

In [None]:
img_embs, lbls = torch.load(f'checkpoints/{cfg.DATASET.NAME}_image_embs.pt'), torch.load(f'checkpoints/{cfg.DATASET.NAME}_labels.pt')
print("Original Dataset:\t ",img_embs.shape, lbls.shape, '\n')

Original Dataset:	  torch.Size([1212, 1024]) torch.Size([1212]) 

Subset Dataset: Considering n-shot = 4
Test set:  torch.Size([202, 1024]) torch.Size([202])
Train set:  torch.Size([404, 1024]) torch.Size([404])



## DTD

In [None]:
dataset = torchvision.datasets.DTD(root='data/',split='test',
                                       transform=transform,download=True)

Downloading https://thor.robots.ox.ac.uk/datasets/dtd/dtd-r1.0.1.tar.gz to data/dtd/dtd-r1.0.1.tar.gz


100%|██████████| 625239812/625239812 [00:45<00:00, 13633603.16it/s]


Extracting data/dtd/dtd-r1.0.1.tar.gz to data/dtd


In [None]:
dataset

Dataset DTD
    Number of datapoints: 1880
    Root location: data/
    split=test, partition=1
    StandardTransform
Transform: Compose(
               Resize(size=224, interpolation=bicubic, max_size=None, antialias=warn)
               CenterCrop(size=(224, 224))
               <function _convert_image_to_rgb at 0x7fc9982280d0>
               ToTensor()
               Normalize(mean=(0.48145466, 0.4578275, 0.40821073), std=(0.26862954, 0.26130258, 0.27577711))
           )

In [None]:
with open('dtd_names.txt','w') as fp:
  for item in dataset.classes:
        # write each item on a new line
        fp.write("%s\n" % item)
  print('Done')

Done


In [None]:
subsets = [Subset(dataset, range(i,i+40)) for i in range(0, len(dataset), len(dataset)//len(dataset.classes))]
dataloaders = [DataLoader(subset, batch_size=12, shuffle=False) for subset in subsets]

cfg.DATASET.NAME = 'dtd'

for i,dataloader in enumerate(tqdm(dataloaders)):
    img_embs, lbls = [], []
    for images, labels in dataloader:
        img_embs.append(model.encode_image(images.to(device)))
        lbls.append(labels)
        break
    free_gpu_cache()
    img_embs, lbls = torch.vstack(img_embs), torch.hstack(lbls)
    # print(f"Image embedding shape: {img_embs.shape}")
    # print(f"Labels shape:          {lbls.shape}")
    torch.save(img_embs, f'checkpoints/{cfg.DATASET.NAME}_image_embs_{i}.pt')
    torch.save(lbls,     f'checkpoints/{cfg.DATASET.NAME}_labels_{i}.pt')

img_embs = torch.vstack([torch.load(f'checkpoints/{cfg.DATASET.NAME}_image_embs_{i}.pt') for i in range(len(dataloaders))])
lbls = torch.hstack([torch.load(f'checkpoints/{cfg.DATASET.NAME}_labels_{i}.pt') for i in range(len(dataloaders))])

torch.save(img_embs, f'checkpoints/{cfg.DATASET.NAME}_image_embs.pt')
torch.save(lbls,     f'checkpoints/{cfg.DATASET.NAME}_labels.pt')

img_embs.shape,lbls.shape

torch.Size([564, 1024])

In [None]:
torch.unique(lbls,return_counts=True)

(tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
         18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
         36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46]),
 tensor([12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
         12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
         12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12]))

## StandfordCars

In [None]:
dataset = torchvision.datasets.StanfordCars(root='data/',
                                       transform=transform,download=False,split='train')

## OxfordPet

In [16]:
cd data

/content/drive/Shareddrives/cis630/CoOp/data


In [9]:
dataset = torchvision.datasets.OxfordIIITPet(root='',
                                       transform=transform,download=True,split='trainval')

Downloading https://thor.robots.ox.ac.uk/datasets/pets/images.tar.gz to oxford-iiit-pet/images.tar.gz


100%|██████████| 791918971/791918971 [00:27<00:00, 28682247.52it/s]


Extracting oxford-iiit-pet/images.tar.gz to oxford-iiit-pet
Downloading https://thor.robots.ox.ac.uk/datasets/pets/annotations.tar.gz to oxford-iiit-pet/annotations.tar.gz


100%|██████████| 19173078/19173078 [00:01<00:00, 15266608.98it/s]


Extracting oxford-iiit-pet/annotations.tar.gz to oxford-iiit-pet


In [10]:
dataset.classes

['Abyssinian',
 'American Bulldog',
 'American Pit Bull Terrier',
 'Basset Hound',
 'Beagle',
 'Bengal',
 'Birman',
 'Bombay',
 'Boxer',
 'British Shorthair',
 'Chihuahua',
 'Egyptian Mau',
 'English Cocker Spaniel',
 'English Setter',
 'German Shorthaired',
 'Great Pyrenees',
 'Havanese',
 'Japanese Chin',
 'Keeshond',
 'Leonberger',
 'Maine Coon',
 'Miniature Pinscher',
 'Newfoundland',
 'Persian',
 'Pomeranian',
 'Pug',
 'Ragdoll',
 'Russian Blue',
 'Saint Bernard',
 'Samoyed',
 'Scottish Terrier',
 'Shiba Inu',
 'Siamese',
 'Sphynx',
 'Staffordshire Bull Terrier',
 'Wheaten Terrier',
 'Yorkshire Terrier']

In [11]:
# create classnames txt files
with open('oxfordpet_names.txt','w') as fp:
  for item in dataset.classes:
        # write each item on a new line
        fp.write("%s\n" % item)
  print('Done')

Done


In [12]:
subsets = [Subset(dataset, range(i,i+200)) for i in range(0, len(dataset), len(dataset)//len(dataset.classes))]
dataloaders = [DataLoader(subset, batch_size=12, shuffle=False) for subset in subsets]

In [29]:
cd /content

/content


In [60]:
# for i,dataloader in enumerate(tqdm(dataloaders)):
#     img_embs, lbls = [], []
#     for images, labels in dataloader:
#         img_embs.append(clip_model.encode_image(images.to(device)))
#         lbls.append(labels)
#     free_gpu_cache()
#     img_embs, lbls = torch.vstack(img_embs), torch.hstack(lbls)
#     # print(f"Image embedding shape: {img_embs.shape}")
#     # print(f"Labels shape:          {lbls.shape}")
#     torch.save(img_embs, f'oxfordpet_image_embs_{i}.pt')
#     torch.save(lbls,     f'oxfordpet_labels_{i}.pt')

img_embs = torch.vstack([torch.load(f'oxfordpet_image_embs_{i}.pt') for i in range(36)])
lbls = torch.hstack([torch.load(f'oxfordpet_labels_{i}.pt') for i in range(36)])

torch.save(img_embs, f'oxfordpet_image_embs.pt')
torch.save(lbls,     f'oxfordpet_labels.pt')

img_embs.shape,lbls.shape

(torch.Size([7200, 1024]), torch.Size([7200]))

In [61]:
torch.unique(lbls,return_counts=True)

(tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
         18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
         36]),
 tensor([152, 151, 202, 202, 202, 202, 202, 194, 202, 202, 202, 188, 194, 202,
         202, 202, 202, 202, 202, 202, 202, 202, 194, 202, 202, 202, 202, 202,
         202, 202, 202, 202, 200, 202, 186, 152, 135]))

In [62]:
img = []
label = []

for i in range(37):
  index = np.random.choice(torch.where(lbls==i)[0].numpy(),20)
  label.append(lbls[index])
  img.append(img_embs[index])

In [63]:
# .shape, .shape

torch.save(torch.vstack(img), f'oxfordpet_image_embs.pt')
torch.save(torch.hstack(label),     f'oxfordpet_labels.pt')

In [64]:
torch.load('oxfordpet_image_embs.pt').shape

torch.Size([740, 1024])

In [65]:
torch.unique(torch.load('oxfordpet_labels.pt'))

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36])

In [43]:
# dataset.classes

In [44]:
dataset

Dataset Flowers102
    Number of datapoints: 1020
    Root location: 
    split=train
    StandardTransform
Transform: Compose(
               Resize(size=224, interpolation=bicubic, max_size=None, antialias=warn)
               CenterCrop(size=(224, 224))
               <function _convert_image_to_rgb at 0x7f0dd5240dc0>
               ToTensor()
               Normalize(mean=(0.48145466, 0.4578275, 0.40821073), std=(0.26862954, 0.26130258, 0.27577711))
           )