## Kaggle Notebook:
https://www.kaggle.com/code/naderyouhanna/task1-clip/edit

In [1]:
import torch
import clip
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


### Hyperparameters

In [2]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE

'cuda'

### Dataset

In [3]:
# Load the CUB-200-2011 dataset
def load_cub_dataset(data_dir):
    images = pd.read_csv(os.path.join(data_dir, 'images.txt'), sep=' ', names=['image_id', 'file_path'])
    labels = pd.read_csv(os.path.join(data_dir, 'image_class_labels.txt'), sep=' ', names=['image_id', 'class_id'])
    classes = pd.read_csv(os.path.join(data_dir, 'classes.txt'), sep=' ', names=['class_id', 'class_name'])
    return images, labels, classes
data_dir = 'data'

images, labels, classes = load_cub_dataset(data_dir)

print(images.head())
print(labels.head())
print(classes.head())

print(images.shape)
print(labels.shape)
print(classes.shape)

   image_id                                          file_path
0         1  001.Black_footed_Albatross/Black_Footed_Albatr...
1         2  001.Black_footed_Albatross/Black_Footed_Albatr...
2         3  001.Black_footed_Albatross/Black_Footed_Albatr...
3         4  001.Black_footed_Albatross/Black_Footed_Albatr...
4         5  001.Black_footed_Albatross/Black_Footed_Albatr...
   image_id  class_id
0         1         1
1         2         1
2         3         1
3         4         1
4         5         1
   class_id                  class_name
0         1  001.Black_footed_Albatross
1         2        002.Laysan_Albatross
2         3         003.Sooty_Albatross
3         4       004.Groove_billed_Ani
4         5          005.Crested_Auklet
(11788, 2)
(11788, 2)
(200, 2)


In [4]:
len(images)

11788

## Task 1

### CLIP

In [4]:
clip_model, clip_preprocess = clip.load("ViT-B/32", device=DEVICE, jit=False)
clip_model.eval()

def get_clip_features(img_path):
    img = Image.open(img_path)
    img_input = clip_preprocess(img).unsqueeze(0).to(DEVICE)
    with torch.no_grad():
        img_features = clip_model.encode_image(img_input)
    return img_features

def get_clip_text_features(text):
    text_input = clip.tokenize([text]).to(DEVICE)
    with torch.no_grad():
        text_features = clip_model.encode_text(text_input)
    return text_features

def get_clip_similarity_score(img_features, text_features):
    img_features /= img_features.norm(dim=-1, keepdim=True)
    text_features /= text_features.norm(dim=-1, keepdim=True)
    similarity_score = (100.0 * img_features @ text_features.T).softmax(dim=-1)
    return similarity_score.item()

def recognize_bird_species(img_path):
    img_features = get_clip_features(img_path)
    similarities = []
    for class_name in classes['class_name']:
        text_features = get_clip_text_features(class_name)
        similarity_score = get_clip_similarity_score(img_features, text_features)
        similarities.append((class_name, similarity_score))
    similarities.sort(key=lambda x: x[1], reverse=True)
    return similarities[0][0]

In [5]:
classes['class_name']

0      001.Black_footed_Albatross
1            002.Laysan_Albatross
2             003.Sooty_Albatross
3           004.Groove_billed_Ani
4              005.Crested_Auklet
                  ...            
195                196.House_Wren
196                197.Marsh_Wren
197                 198.Rock_Wren
198               199.Winter_Wren
199       200.Common_Yellowthroat
Name: class_name, Length: 200, dtype: object

In [6]:
img_id = 1
image_name = images.iloc[img_id]['file_path']
gt_class = labels.iloc[img_id]['class_id']
gt_class_name = classes[classes['class_id'] == gt_class]['class_name'][0]
bird_species = recognize_bird_species(os.path.join(os.path.join(data_dir, 'images'), image_name))
print(bird_species)
pred_class = classes[classes['class_name'] == bird_species]['class_id'][0]
print(f'Predicted class: {pred_class}, class name: {bird_species}')
print(f"GT class: {gt_class}, class name: {gt_class_name}")

001.Black_footed_Albatross
Predicted class: 1, class name: 001.Black_footed_Albatross
GT class: 1, class name: 001.Black_footed_Albatross


#### Run CLIP on all images

In [None]:
accuracy = 0
with open('clip_predictions.txt', 'w') as f:
    for img_id in tqdm(range(1034, 2500)):
        image_name = images.iloc[img_id]['file_path']
        gt_class = labels.iloc[img_id]['class_id']
        bird_species = recognize_bird_species(os.path.join(os.path.join(data_dir, 'images'), image_name))
        pred_class = classes[classes['class_name'] == bird_species]['class_id'][0]
        f.write(f'{img_id} {pred_class} {int(gt_class == pred_class)}\n')
        accuracy += int(gt_class == pred_class)
    accuracy /= len(images)
    print(f'Accuracy: {accuracy}')

In [4]:
def measure_accuracy(file_path):
    accuracy = 0
    nlines = 0
    with open(file_path, 'r') as f:
        for line in f.readlines():
            accuracy += int(line.split(' ')[2])
            nlines += 1
    return accuracy / nlines
accuracy = measure_accuracy('./clip_predictions_1_9000.txt')
print(f'Accuracy = {np.round(accuracy*100, 2)}%')

Accuracy = 0.66%


In [5]:
def count_class_percentage(file_path, class_id):
    num_class = 0
    nlines = 0
    with open(file_path, 'r') as f:
        for line in f.readlines():
            if (int(line.split(' ')[1]) == class_id):
                num_class += 1
            nlines += 1
    return num_class / nlines
percentage_1 = count_class_percentage('./clip_predictions_1_8000.txt', 1)
print(f'Percentage of 1 = {np.round(percentage_1*100, 2)}%')

FileNotFoundError: [Errno 2] No such file or directory: './clip_predictions_1_8000.txt'