Using Finetuned CLIP's image encoder for feature extraction, trained on few of the images and the 512 dimension embedding is used for training the ML models

In [1]:
import os
import clip
import torch
import numpy as np
from tqdm import tqdm
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from skimage import io, transform

In [2]:
#Load the model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32",device=device,jit=False) #Must set jit=False for training
checkpoint = torch.load("./model_30_5e7_001_fixed.pt")
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [3]:
number = 10 # No. of samples per class

In [4]:
root_pth = './birds/CUB_200_2011/images/'
imgID_pth_df = pd.read_csv('./birds/CUB_200_2011/images.txt', sep=' ', header=None, names=['img_id', 'sub_pth'])
train_split_df = pd.read_csv('./birds/CUB_200_2011/train_test_split.txt', sep=' ', names=['img_id', 'is_training'])
class_names = pd.read_csv('./birds/CUB_200_2011/classes.txt', sep=' ', header=None, names=['class_id', 'class_name'])
class_names.class_name = class_names.class_name.map(lambda x: x.split('.')[1].lower())

cub_dataset_df = imgID_pth_df.merge(train_split_df, on='img_id', how='inner')


target=[]
for pth in cub_dataset_df['sub_pth']:
    target.append(int(pth.split('.')[0]))
df = pd.DataFrame(target, columns=['class'])
train_df=cub_dataset_df[cub_dataset_df.is_training==1]
final_df = train_df.join(df)

fewshot_df=pd.DataFrame(columns=['img_id', 'sub_pth', 'is_training', 'class'])
for i in range(1,201):
    few_shot=final_df[final_df['class']==i].sample(n=number, random_state=42)
    fewshot_df=fewshot_df.append(few_shot, ignore_index=True)

fewshot_df


  fewshot_df=fewshot_df.append(few_shot, ignore_index=True)


Unnamed: 0,img_id,sub_pth,is_training,class
0,56,001.Black_footed_Albatross/Black_Footed_Albatr...,1,1
1,35,001.Black_footed_Albatross/Black_Footed_Albatr...,1,1
2,49,001.Black_footed_Albatross/Black_Footed_Albatr...,1,1
3,37,001.Black_footed_Albatross/Black_Footed_Albatr...,1,1
4,18,001.Black_footed_Albatross/Black_Footed_Albatr...,1,1
...,...,...,...,...
1995,11750,200.Common_Yellowthroat/Common_Yellowthroat_01...,1,200
1996,11784,200.Common_Yellowthroat/Common_Yellowthroat_00...,1,200
1997,11776,200.Common_Yellowthroat/Common_Yellowthroat_00...,1,200
1998,11754,200.Common_Yellowthroat/Common_Yellowthroat_00...,1,200


In [5]:

class CUBDataset(Dataset):
    """CUB dataset."""

    def __init__(self, dataframe, root_dir, transform=None):
        """
        Args:
            dataframe (pd.DataFrame): Dataframe with paths and train/test split information.
            root_dir (string): Root directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.cub_img_df = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.cub_img_df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        sub_pth = self.cub_img_df.iloc[idx, 1]
        img_name = os.path.join(self.root_dir,
                                sub_pth)
        image = io.imread(img_name)
        target = int(sub_pth.split('.')[0])#.split('.')[1].lower()
        if self.transform:
            image = self.transform(image)
        return image, target

# loading CUB-200-2011 train&test dataset
train_set = CUBDataset(fewshot_df, root_pth, transform=transforms.Compose([transforms.ToPILImage(), preprocess]))
test_set = CUBDataset(cub_dataset_df[cub_dataset_df.is_training==0], root_pth, transform=transforms.Compose([transforms.ToPILImage(), preprocess]))

In [6]:
def get_features(dataset):
    all_features = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in tqdm(DataLoader(dataset, batch_size=16,shuffle=True)):
            features = model.encode_image(images.to(device))
            #features /= features.norm(dim=-1, keepdim=True)
            all_features.append(features)
            all_labels.append(labels)
    return torch.cat(all_features).cpu().numpy(), torch.cat(all_labels).cpu().numpy()

In [7]:
train_features, train_labels = get_features(train_set)
test_features, test_labels = get_features(test_set)

100%|██████████| 125/125 [00:20<00:00,  6.09it/s]
100%|██████████| 363/363 [00:59<00:00,  6.05it/s]


In [8]:
print(train_features.shape, test_features.shape)

(2000, 512) (5794, 512)


In [9]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier.fit(train_features, train_labels)
predictions = classifier.predict(test_features)

accuracy = np.mean((test_labels == predictions).astype(np.float)) * 100.
print(f"Accuracy Logistic = {accuracy:.3f}")

from sklearn import neighbors
clf=neighbors.KNeighborsClassifier()
clf.fit(train_features, train_labels)
predictions = clf.predict(test_features)


accuracy = np.mean((test_labels == predictions).astype(np.float)) * 100.
print(f"Accuracy KNN= {accuracy:.3f}")

from sklearn.svm import SVC
scl=SVC(kernel='linear')
scl.fit(train_features, train_labels)
predictions = scl.predict(test_features)
accuracy = np.mean((test_labels == predictions).astype(np.float)) * 100.
print(f"Accuracy SVM-Linear= {accuracy:.3f}")

from sklearn.svm import SVC
sc=SVC(kernel='rbf')
sc.fit(train_features, train_labels)
predictions = sc.predict(test_features)
accuracy = np.mean((test_labels == predictions).astype(np.float)) * 100.
print(f"Accuracy SVM-RBF= {accuracy:.3f}")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  accuracy = np.mean((test_labels == predictions).astype(np.float)) * 100.


Accuracy Logistic = 66.310


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  accuracy = np.mean((test_labels == predictions).astype(np.float)) * 100.


Accuracy KNN= 52.313


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  accuracy = np.mean((test_labels == predictions).astype(np.float)) * 100.


Accuracy SVM-Linear= 65.809
Accuracy SVM-RBF= 60.459


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  accuracy = np.mean((test_labels == predictions).astype(np.float)) * 100.
