In [1]:
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os, json
import pandas as pd
from tqdm import tqdm
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [2]:
# Load the CUB-200-2011 dataset
def load_cub_dataset(data_dir):
    images = pd.read_csv(os.path.join(data_dir, 'images.txt'), sep=' ', names=['image_id', 'file_path'])
    labels = pd.read_csv(os.path.join(data_dir, 'image_class_labels.txt'), sep=' ', names=['image_id', 'class_id'])
    classes = pd.read_csv(os.path.join(data_dir, 'classes.txt'), sep=' ', names=['class_id', 'class_name'])
    bounding_boxes = pd.read_csv(os.path.join(data_dir, 'bounding_boxes.txt'), sep=' ', names=['image_id', 'x', 'y', 'width', 'height'])
    part_locs = pd.read_csv(os.path.join(data_dir, 'parts/part_locs.txt'), sep=' ', names=['img_id', 'part_id', 'x', 'y', 'visible'])
    # parts = pd.read_csv(os.path.join(data_dir, 'parts/parts.txt'), delimiter =' ', names=['part_id', 'part_name'])
    parts = pd.read_fwf(os.path.join(data_dir, 'parts/parts.txt'), colspecs=[(0, 2), (2, None)], header=None, names=['part_id', 'part_name'])
    parts_click_locs = pd.read_csv(os.path.join(data_dir, 'parts/part_click_locs.txt'), sep = ' ', names=['image_id', 'part_id', 'x', 'y', 'visible', 'time'])
    attributes = pd.read_csv(os.path.join(data_dir, 'attributes/attributes.txt'), sep = ' ', names=['attribute_id', 'attribute_name'])
    certainties = pd.read_fwf(os.path.join(data_dir, 'attributes/certainties.txt'), colspecs=[(0, 1), (2, None)], names=["certainty_id", "certainty_name"])
    image_attribute_labels = pd.read_csv(os.path.join(data_dir, 'attributes/image_attribute_labels.txt'),
                                             # sep = ' ',
                                             names=['image_id', 'attribute_id', 'is_present', 'certainty_id', 'time'],
                                             delim_whitespace=True, usecols=range(5)
                                            )    
    with open(os.path.join(data_dir, 'llava_captions.json'), 'r') as f:
        llava_captions = json.load(f)
    return images, labels, classes,  bounding_boxes, parts, part_locs, parts_click_locs, attributes, certainties, image_attribute_labels, llava_captions
# data_dir = '/content/drive/MyDrive/Bird-Species-Exploration-and-Retrieval/Dataset/CUB_200_2011'
data_dir = './data'
images_dir = os.path.join(data_dir, 'images')
parts_dir = os.path.join(data_dir, 'parts')

images, labels_df, classes, bounding_boxes, parts, part_locs, parts_click_locs, attributes, certainties, image_attribute_labels, llava_captions = load_cub_dataset(data_dir)

print(images.head())
print(labels_df.head())
print(classes.head())

print(images.shape)
print(labels_df.shape)
print(classes.shape)

  image_attribute_labels = pd.read_csv(os.path.join(data_dir, 'attributes/image_attribute_labels.txt'),


   image_id                                          file_path
0         1  001.Black_footed_Albatross/Black_Footed_Albatr...
1         2  001.Black_footed_Albatross/Black_Footed_Albatr...
2         3  001.Black_footed_Albatross/Black_Footed_Albatr...
3         4  001.Black_footed_Albatross/Black_Footed_Albatr...
4         5  001.Black_footed_Albatross/Black_Footed_Albatr...
   image_id  class_id
0         1         1
1         2         1
2         3         1
3         4         1
4         5         1
   class_id                  class_name
0         1  001.Black_footed_Albatross
1         2        002.Laysan_Albatross
2         3         003.Sooty_Albatross
3         4       004.Groove_billed_Ani
4         5          005.Crested_Auklet
(11788, 2)
(11788, 2)
(200, 2)


In [3]:
clip_embeds_text = np.load('./data/clip_embeds_text.npy')
clip_embeds_imgs = np.load('./data/clip_embeds_imgs.npy')
print(clip_embeds_imgs.shape, clip_embeds_text.shape)

(11788, 512) (11788, 512)


In [4]:
clip_embeds = (clip_embeds_imgs + clip_embeds_text) / 2.0

In [5]:
labels = np.load('./data/labels.npy')
print(labels.shape)

(11788,)


## Classifier

In [6]:
dataset_size = len(clip_embeds)
train_size = int(0.8 * dataset_size)
test_size = dataset_size - train_size

X_train, X_test, y_train, y_test = train_test_split(clip_embeds_text, labels, test_size=0.2, random_state=42)

text_clf = LogisticRegression(max_iter=1000, solver="lbfgs", multi_class="multinomial")
text_clf.fit(X_train, y_train)

train_preds = text_clf.predict(X_train)
test_preds = text_clf.predict(X_test)

# Compute accuracy for each set
train_acc = accuracy_score(y_train, train_preds)
test_acc = accuracy_score(y_test, test_preds)

# Print results
print(f"Train Accuracy: {train_acc:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")



Train Accuracy: 0.8637
Test Accuracy: 0.7926


In [58]:
dataset_size = len(clip_embeds)
train_size = int(0.8 * dataset_size)
test_size = dataset_size - train_size

X_train, X_test, y_train, y_test = train_test_split(clip_embeds, labels, test_size=0.2, random_state=42)

clf = LogisticRegression(max_iter=1000, solver="lbfgs", multi_class="multinomial")
clf.fit(X_train, y_train)

train_preds = clf.predict(X_train)
test_preds = clf.predict(X_test)

# Compute accuracy for each set
train_acc = accuracy_score(y_train, train_preds)
test_acc = accuracy_score(y_test, test_preds)

# Print results
print(f"Train Accuracy: {train_acc:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")



Train Accuracy: 0.9688
Test Accuracy: 0.8075


In [55]:
dataset_size = len(clip_embeds)
train_size = int(0.8 * dataset_size)
test_size = dataset_size - train_size

X_train, X_test, y_train, y_test = train_test_split(clip_embeds_imgs, labels, test_size=0.2, random_state=42)

img_clf = LogisticRegression(max_iter=1000, solver="lbfgs", multi_class="multinomial")
img_clf.fit(X_train, y_train)

train_preds = img_clf.predict(X_train)
test_preds = img_clf.predict(X_test)

# Compute accuracy for each set
train_acc = accuracy_score(y_train, train_preds)
test_acc = accuracy_score(y_test, test_preds)

# Print results
print(f"Train Accuracy: {train_acc:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")



Train Accuracy: 0.9945
Test Accuracy: 0.7583
