# Import libs

In [1]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
import openai
import torch
from torchvision import datasets
from torchvision.transforms import ToTensor
from torchvision.datasets import OxfordIIITPet
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import torchmetrics
from torch.utils.data import DataLoader, Subset
from torch.nn import functional as F
import logging
import clip
import json
openai.api_key  = os.getenv('OPENAI_API_KEY')
from collections import OrderedDict
import numpy as np
from tqdm import tqdm

# Load Dataset using the Mean and std dev of training dataset

In [2]:
oxfordpet_path = '/home/kush/Desktop/CLIP/'
transform = transforms.Compose([
    transforms.Resize(224, interpolation=Image.BICUBIC),
    transforms.CenterCrop(224),
    lambda image: image.convert("RGB"),
    transforms.ToTensor(),
    transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
])
oxfordpet_dataset_train = OxfordIIITPet(oxfordpet_path, split="trainval", transform = transform)
oxfordpet_dataset_test = OxfordIIITPet(oxfordpet_path, split="test", transform = transform)

  "Argument 'interpolation' of type int is deprecated since 0.13 and will be removed in 0.15. "


In [3]:
oxfordpet_dataset_train.classes

['Abyssinian',
 'American Bulldog',
 'American Pit Bull Terrier',
 'Basset Hound',
 'Beagle',
 'Bengal',
 'Birman',
 'Bombay',
 'Boxer',
 'British Shorthair',
 'Chihuahua',
 'Egyptian Mau',
 'English Cocker Spaniel',
 'English Setter',
 'German Shorthaired',
 'Great Pyrenees',
 'Havanese',
 'Japanese Chin',
 'Keeshond',
 'Leonberger',
 'Maine Coon',
 'Miniature Pinscher',
 'Newfoundland',
 'Persian',
 'Pomeranian',
 'Pug',
 'Ragdoll',
 'Russian Blue',
 'Saint Bernard',
 'Samoyed',
 'Scottish Terrier',
 'Shiba Inu',
 'Siamese',
 'Sphynx',
 'Staffordshire Bull Terrier',
 'Wheaten Terrier',
 'Yorkshire Terrier']

# Change Class names -> They are not proper

In [4]:
def get_full_class_labels(file_path):
    classes_full_names = set()
    with open(file_path, 'r') as file:
        for line in file:
            if line.startswith('#'):
                continue
            class_id, _, species, _ = line.strip('\n').split(' ', 3)
            class_id = ' '.join(class_id.split('_')[0: -1]).title()
            if int(species) == 1:
                class_id += ' Cat'
            else:
                class_id += ' Dog'
            classes_full_names.add(class_id)
    return classes_full_names

In [5]:
def fix_class_names(dataset, complete_class_names):
    for class_name in complete_class_names:
        incomplete_class_name = ' '.join(class_name.split(' ')[0: -1]).strip()
        find_index = lambda l, x: l.index(x) if x in l else -1
        index_of_incorrect_class = find_index(dataset.classes, incomplete_class_name)
        if index_of_incorrect_class != -1:
            dataset.classes[index_of_incorrect_class] = class_name
        idx = dataset.class_to_idx.pop(incomplete_class_name, -1)
        if idx != -1:
            dataset.class_to_idx[class_name] = idx

In [6]:
# list_txt_path = 'oxford-iiit-pet/annotations/list.txt'
# full_class_labels = get_full_class_labels(os.path.join(oxfordpet_path,list_txt_path))
# fix_class_names(oxfordpet_dataset_train, full_class_labels)
# fix_class_names(oxfordpet_dataset_test, full_class_labels)
# print(oxfordpet_dataset_train.classes)
# print(oxfordpet_dataset_train.class_to_idx)

# Calc mean and std dev from training DS

In [7]:
# mean = torch.zeros(3)
# std_dev = torch.zeros(3)

# for img, _ in oxfordpet_dataset:
#     mean += img.mean(dim=(1, 2))
#     std_dev += img.std(dim=(1, 2))
# mean /= len(oxfordpet_dataset)
# std_dev /= len(oxfordpet_dataset)
# print("Mean values:", mean)
# print("Std Dev values:", std_dev)

# Load model

In [8]:
oxfordpet_dataset_train.classes

['Abyssinian',
 'American Bulldog',
 'American Pit Bull Terrier',
 'Basset Hound',
 'Beagle',
 'Bengal',
 'Birman',
 'Bombay',
 'Boxer',
 'British Shorthair',
 'Chihuahua',
 'Egyptian Mau',
 'English Cocker Spaniel',
 'English Setter',
 'German Shorthaired',
 'Great Pyrenees',
 'Havanese',
 'Japanese Chin',
 'Keeshond',
 'Leonberger',
 'Maine Coon',
 'Miniature Pinscher',
 'Newfoundland',
 'Persian',
 'Pomeranian',
 'Pug',
 'Ragdoll',
 'Russian Blue',
 'Saint Bernard',
 'Samoyed',
 'Scottish Terrier',
 'Shiba Inu',
 'Siamese',
 'Sphynx',
 'Staffordshire Bull Terrier',
 'Wheaten Terrier',
 'Yorkshire Terrier']

In [9]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load('ViT-B/32', device=device, jit=False)
model = model.eval()
model = model.requires_grad_(False)

# Function for computing Label text emebddings

In [10]:
def compute_encodings(model, labels):
    with torch.no_grad():
        label_encodings = F.normalize(model.encode_text(clip.tokenize(labels, truncate=True).to(device)))
    return label_encodings

# Function for index to label

In [11]:
def idx_to_label(label_to_idx):
    dataset_items = list(label_to_idx.items())
    return dict(map(lambda x: x[::-1], dataset_items))

In [12]:
def initialize_counter_dict(labels):
    dict_ = dict()
    for label1 in labels:
        dict_[label1] = dict()
        for label2 in labels:
            dict_[label1][label2] = 0
    return dict_

In [13]:
def increase_count(counter_dict, actual_class, top_classes):
    for class_ in top_classes:
        counter_dict[actual_class][class_] += 1
    return counter_dict

In [24]:
def add_to_counter_dict(counter_dict, index_to_class_dict, actual_index_list, 
                        pred_index_list, topK_is_one = False):
    print(pred_index_list.size())
    for idx, actual_class_index in enumerate(actual_index_list):
        actual_class = index_to_class_dict[int(actual_class_index)]
        if topK_is_one == True:
            top_classes = list(map(lambda x: index_to_class_dict[int(x)], 
                               list(pred_index_list[idx][0: 1])))
        else:
            top_classes = list(map(lambda x: index_to_class_dict[int(x)], 
                               pred_index_list[idx].squeeze()))
        counter_dict = increase_count(counter_dict, actual_class, top_classes)
    return counter_dict

In [25]:
def calc_acc(model, dataset, descriptions, batch_size = 64*10, count_classes = False, 
             topK = 5):
    topK_is_one = False
    if topK == 1:
        topK = 5
        topK_is_one = True
    elif topK < 1:
        return "Please use k >= 1!"
    
    count_classes_dict = initialize_counter_dict(dataset.classes)
    index_to_class_dict = idx_to_label(dataset.class_to_idx)
    
    
    encodings = compute_encodings(model, dataset.classes)
    clip_accuracy_metric = torchmetrics.Accuracy(task="multiclass", num_classes=len(dataset.classes)).to(device)
    clip_accuracy_metric_topk = torchmetrics.Accuracy(top_k=topK, task="multiclass", num_classes=len(dataset.classes)).to(device)
    dataloader = DataLoader(dataset, batch_size, shuffle=True, num_workers=16, pin_memory=True)
    for batch_number, batch in enumerate(tqdm(dataloader)):
        images, labels = batch

        images = images.to(device)
        labels = labels.to(device)

        image_encodings = model.encode_image(images)
        image_encodings = F.normalize(image_encodings)

        image_labels_similarity = image_encodings @ encodings.T
        topk_values, topk_indices = image_labels_similarity.topk(topK, dim=1)
        clip_predictions = image_labels_similarity.argmax(dim=1)


        clip_acc = clip_accuracy_metric(image_labels_similarity, labels)
        clip_acc_topk = clip_accuracy_metric_topk(image_labels_similarity, labels)
        
        
        if count_classes == True:
            count_classes_dict = add_to_counter_dict(count_classes_dict, index_to_class_dict,
                                                 labels, topk_indices, topK_is_one)

    # Make Top k 1 again
    
    if topK_is_one == True:
        topK = 1

    accuracy_logs = {}
    accuracy_logs["Total CLIP-Standard Top-1 Accuracy: "] = 100*clip_accuracy_metric.compute().item()
    if topK > 1:
        accuracy_logs[f"Total CLIP-Standard Top-{topK} Accuracy: "] = 100*clip_accuracy_metric_topk.compute().item()
    
    if count_classes == True:
        for actual_key in count_classes_dict.keys():
            count_classes_dict[actual_key] = sorted(count_classes_dict[actual_key].items(),
                                                key=lambda x: x[1], reverse=True)

        json_string = json.dumps(count_classes_dict, indent=4)  # indent for pretty formatting

        if topK_is_one == True:
            topK = 1
        # Write JSON string to a text file
        with open(f'top{topK}.txt', 'w') as file:
            file.write(json_string)

    
    return accuracy_logs

In [23]:
calc_acc(model, oxfordpet_dataset_train, oxfordpet_dataset_train.classes,
         count_classes = True, topK=1)

 17%|███████████████████▏                                                                                               | 1/6 [00:11<00:58, 11.62s/it]

torch.Size([640, 5])
torch.Size([640, 5])


 50%|█████████████████████████████████████████████████████████▌                                                         | 3/6 [00:11<00:08,  2.75s/it]

torch.Size([640, 5])
torch.Size([640, 5])


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:12<00:00,  1.21it/s]

torch.Size([640, 5])
torch.Size([480, 5])


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:12<00:00,  2.13s/it]


{'Total CLIP-Standard Top-1 Accuracy: ': 76.52173638343811}

# Graphs

In [19]:
def create_graph(actual_label, top5_pred):
    top5labels = list(map(lambda x: x[0], top5_pred))
    top5counts = list(map(lambda x: x[1], top5_pred))
    colors = ['royalblue' if label == actual_label else 'lightsteelblue' for label in top5labels]
    plt.barh(top5labels, top5counts, color=colors)

    num_ticks = 6  # Adjust the number of ticks as needed
    tick_positions = np.linspace(0, max(top5counts), num_ticks)
    plt.xticks(tick_positions, rotation=45, ha='right')  # Set the rotation and horizontal alignment

    plt.xlabel('Top-5 counts')
    plt.ylabel('Classes')
    plt.title('Score-wise Diagram')
    
    # Show the plot
    plt.show()


In [22]:
with open('top1_pets.json', 'r') as file:
    top5 = json.load(file)

for class_ in top5.keys():
    create_graph(class_, top5[class_][0:5])

FileNotFoundError: [Errno 2] No such file or directory: 'top1_pets.json'

In [18]:
top5[class_][0:5]

[['Yorkshire Terrier Dog', 100],
 ['Havanese Dog', 98],
 ['Scottish Terrier Dog', 88],
 ['Wheaten Terrier Dog', 66],
 ['Miniature Pinscher Dog', 57]]

# Calc Top-5 using labels

In [20]:
encodings = compute_encodings(model, oxfordpet_dataset_train.classes)
encodings_similarity = encodings @ encodings.T
topkvalues, topkidx = encodings_similarity.topk(5, dim=1)
# print(topkvalues)
idx_to_label_dict = idx_to_label(oxfordpet_dataset_train.class_to_idx)
for labels in topkidx:
    print(list(map(lambda x: idx_to_label_dict[int(x)], labels[1: 6])))

['Miniature Pinscher Dog', 'Sphynx Cat', 'Siamese Cat', 'Bengal Cat']
['American Pit Bull Terrier Dog', 'Staffordshire Bull Terrier Dog', 'Boxer Dog', 'Pug Dog']
['Staffordshire Bull Terrier Dog', 'American Bulldog Dog', 'Shiba Inu Dog', 'Boxer Dog']
['Beagle Dog', 'American Bulldog Dog', 'Shiba Inu Dog', 'American Pit Bull Terrier Dog']
['Basset Hound Dog', 'Shiba Inu Dog', 'American Bulldog Dog', 'Chihuahua Dog']
['Egyptian Mau Cat', 'Siamese Cat', 'Persian Cat', 'Sphynx Cat']
['Ragdoll Cat', 'Siamese Cat', 'Persian Cat', 'Keeshond Dog']
['Russian Blue Cat', 'Siamese Cat', 'Sphynx Cat', 'Scottish Terrier Dog']
['American Bulldog Dog', 'American Pit Bull Terrier Dog', 'Staffordshire Bull Terrier Dog', 'Pug Dog']
['Russian Blue Cat', 'Persian Cat', 'Egyptian Mau Cat', 'Siamese Cat']
['Pomeranian Dog', 'Shiba Inu Dog', 'Pug Dog', 'Persian Cat']
['British Shorthair Cat', 'Bengal Cat', 'Russian Blue Cat', 'Sphynx Cat']
['Havanese Dog', 'Chihuahua Dog', 'Pomeranian Dog', 'English Setter Do

# Create Attributes of Pairs from Top-5 Distribution

# Generate attributes functions

In [21]:
def logs():
    logging.basicConfig(
        level=logging.DEBUG,            # Set the minimum log level to DEBUG
        filename='gpt_3_my_cub.log',        # Specify the log file name
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )

    logger = logging.getLogger(__name__)
    return logger


In [22]:
def get_completion(prompt, logger, model="gpt-3.5-turbo", temperature=0., max_tokens=300):
    messages = [{'role':'system', 'content':"""
    Use this as an example to generate you answers for user's query. Give the answer in a JSON format just like given in the example

    ```Q: What are useful visual features for distinguishing a tiger from a lion in a photo?
       A: There are several useful visual features to tell the difference between a tiger and a lion in a photo:-


        {
            "tiger": [
                     "have orange coats with vertical 'black' stripes",
                     "are generally larger and robust",
                     "often have a more rounded face with prominent cheekbones",
                     "ears are usually smaller and rounded",
                     "tails are long and have a characteristic 'white spot' at the tip",
                     "are found in a variety of habitats, including dense forests and grasslands"
                     ],

            "lion": [
                    "typically have a tawny or 'beige coat', and their fur is often smoother and they have a mane",
                    "have a more compact and muscular build",
                    "lions may have a more squared-off face",
                    "ears are large and often have a more pointed shape",
                    "tails are shorter, ending in a tuft of hair",
                    "habitats are savannas and open woodlands"
                    ]
        }
        ```
    Remember not to use the first class name in the response of second class and vice-a-versa. For example here, in the answer for tiger do not use lion and in answer for lion do not use the word tiger.
"""},
    {"role": "user", "content": prompt}]
    
    try:
        logger.debug(f'Input: {messages}')
        response = openai.ChatCompletion.create(
            model=model,
            messages=messages,
            temperature=temperature,
            max_tokens=max_tokens
        )
        logger.debug(f'Output: {response}')
    except Exception as e:
        logger.error(f'error: {str(e)}')
        raise e
    return response.choices[0].message["content"]

In [None]:
()

# Create Attributes of Pairs from Top-5 Distribution

In [23]:
def generate_attributes(class1, class2):
    logger = logs()
    
    prompt = f"""
    Q: What are useful features for distinguishing a {class1} from {class2} in a photo?
    A: There are several useful visual features to tell the difference between a {class1} and a {class2} in a photo
    """
    print(prompt)
    response = get_completion(prompt, logger)
#     response = get_completion_dummy(prompt)
    answer = response
    print(response)
    
    return answer

In [24]:
def generate_attributes_dummy(class1, class2):
    logger = logs()
    
    prompt = f"""
    Q: What are useful features for distinguishing a {class1} from {class2} in a photo?
    A: There are several useful visual features to tell the difference between a {class1} and a {class2} in a photo
    """
#     print(prompt)
    return """aasdasd
    asfasfasfdas
    {
    "Laysan Albatross": [
    "have a white head and neck with a dark eye patch",
    "have a black upperwing with a white trailing edge",
    "underside is mostly white with dark edges on the underwing",
    "have a pink bill with a dark tip",
    "are generally medium-sized with a wingspan of around 6 feet",
    "nest in the Northwestern Hawaiian Islands"
    ],

    "Sooty Albatross": [
        "have a dark plumage overall, including the head and neck",
        "have a uniformly dark upperwing without a distinct pattern",
        "underside is dark, often with paler edges on the underwing",
        "have a dark bill without significant markings",
        "are larger in size with a wingspan of around 7 feet",
        "nest on sub-Antarctic islands and some isolated islands in the Southern Ocean"
    ]
    }sfsdfsdfsdf"""

In [25]:
def process_response(response):
    try:
        json_response = response
        json.loads(json_response)
    except Exception as e:
        lines = response.split('\n')
        json_response = ''
        for index, line in enumerate(lines):
            if line.find('{') != -1:
                json_response = line[line.find('{'):] + "\n"
                index += 1
                while index < len(lines) and lines[index].find('}') == -1:
                    json_response += lines[index]
                    index += 1
                if index < len(lines) and lines[index].find('}') != -1:
                    json_response += lines[index][:lines[index].find('}')+1]
                    break
                else:
                    html_string = f'<font color="red">{str(response)}</font>'
                    display(HTML(html_string))
    return json.loads(json_response)

In [26]:
def convert_to_json(dict_set):
    json_dict = dict_set.copy()
    for key in dict_set.keys():
        json_dict[key] = list(dict_set[key])
    return json.dumps(json_dict, indent=4)

In [36]:
descriptor_dict = dict()

In [37]:
already_seen = set()
total_pairs = 0

for class_ in tqdm(top5.keys()):
    number_of_pairs = 0
    for top_class, count in top5[class_][0:2]:
        if class_ == top_class or (class_, top_class) in already_seen or number_of_pairs >=5:
            continue
        response = generate_attributes(class_, top_class)
        try:
            json_response = process_response(response)
        except:
            print("Could not process the input, continue...")
            continue
        for key in json_response.keys():
            if key in descriptor_dict:
                descriptor_dict[key] |= set(json_response[key])
            else:
                descriptor_dict[key] = set(json_response[key])
        already_seen.add((class_, top_class))
        already_seen.add((top_class, class_))
        number_of_pairs += 1
        total_pairs += 1
        
        descriptor_json = convert_to_json(descriptor_dict)
                
        with open(f'descriptors.txt', 'w') as file:
            file.write(descriptor_json)
    

  0%|                                                    | 0/37 [00:00<?, ?it/s]


    Q: What are useful features for distinguishing a Abyssinian Cat from Siamese Cat in a photo?
    A: There are several useful visual features to tell the difference between a Abyssinian Cat and a Siamese Cat in a photo
    


  3%|█▏                                          | 1/37 [00:06<03:39,  6.11s/it]

{
    "Abyssinian Cat": [
        "have a short, ticked coat with a warm reddish-brown color",
        "have a slender and athletic build",
        "have almond-shaped eyes that are usually green or gold",
        "have a wedge-shaped head with large ears that are set wide apart",
        "have a short tail that tapers to a point"
    ],
    "Siamese Cat": [
        "have a short, sleek coat with a color point pattern",
        "have a slim and elegant build",
        "have striking blue almond-shaped eyes",
        "have a triangular-shaped head with large ears that are set close together",
        "have a long, slender tail"
    ]
}

    Q: What are useful features for distinguishing a American Bulldog Dog from American Pit Bull Terrier Dog in a photo?
    A: There are several useful visual features to tell the difference between a American Bulldog Dog and a American Pit Bull Terrier Dog in a photo
    


  5%|██▍                                         | 2/37 [00:14<04:10,  7.16s/it]

{
    "American Bulldog Dog": [
        "have a large and muscular build",
        "have a broad and square-shaped head",
        "often have a wrinkled forehead",
        "ears are typically medium-sized and can be either rose-shaped or drop-shaped",
        "have a short coat that can come in various colors, including white, brindle, or fawn",
        "have a strong and powerful jaw"
    ],
    "American Pit Bull Terrier Dog": [
        "have a medium-sized and athletic build",
        "have a more streamlined and wedge-shaped head",
        "ears are usually medium-sized and can be either cropped or left natural",
        "have a short coat that can come in various colors, including brindle, black, or blue",
        "have a muscular neck and shoulders",
        "have a strong and determined expression"
    ]
}

    Q: What are useful features for distinguishing a Basset Hound Dog from Beagle Dog in a photo?
    A: There are several useful visual features to tell the difference betwe

 11%|████▊                                       | 4/37 [00:19<02:20,  4.25s/it]

{
    "Basset Hound Dog": [
        "have long, droopy ears that hang down",
        "have a long body with short legs",
        "have a wrinkled face with loose skin",
        "have a deep, mournful expression",
        "have a short, smooth coat",
        "are generally larger and heavier"
    ],
    "Beagle Dog": [
        "have shorter, more upright ears",
        "have a compact body with medium-length legs",
        "have a more alert and curious expression",
        "have a short, dense coat",
        "are generally smaller and lighter"
    ]
}

    Q: What are useful features for distinguishing a Bengal Cat from Abyssinian Cat in a photo?
    A: There are several useful visual features to tell the difference between a Bengal Cat and a Abyssinian Cat in a photo
    


 16%|███████▏                                    | 6/37 [00:25<01:57,  3.81s/it]

{
    "Bengal Cat": [
        "have distinctive spotted or marbled coats",
        "coats are usually brown or gold with black markings",
        "have a muscular and athletic build",
        "often have a sleek and shiny coat",
        "have a more rounded face with prominent cheekbones",
        "ears are usually small and rounded",
        "tails are long and have a characteristic ringed pattern"
    ],
    "Abyssinian Cat": [
        "have short and ticked coats",
        "coats are usually a warm reddish-brown color",
        "have a slender and graceful build",
        "often have a soft and silky coat",
        "have a more triangular face with large almond-shaped eyes",
        "ears are large and often have a pointed shape",
        "tails are long and taper towards the tip"
    ]
}

    Q: What are useful features for distinguishing a Birman Cat from Persian Cat in a photo?
    A: There are several useful visual features to tell the difference between a Birman Cat and a Persi

 19%|████████▎                                   | 7/37 [00:32<02:20,  4.69s/it]

{
    "Birman Cat": [
        "have a medium-sized body with a muscular build",
        "have a semi-long, silky coat that comes in a variety of colors, including seal, blue, chocolate, and lilac",
        "have striking blue eyes",
        "have a distinctive color pattern called 'points', where the face, ears, paws, and tail are darker than the rest of the body",
        "have a sweet and gentle expression"
    ],
    "Persian Cat": [
        "have a large, round body with a stocky build",
        "have a long, thick coat that comes in a variety of colors, including white, black, blue, and cream",
        "have large, round eyes that can be various colors",
        "have a flat face with a short nose",
        "have a calm and placid expression"
    ]
}

    Q: What are useful features for distinguishing a Bombay Cat from Russian Blue Cat in a photo?
    A: There are several useful visual features to tell the difference between a Bombay Cat and a Russian Blue Cat in a photo
    


 22%|█████████▌                                  | 8/37 [00:37<02:17,  4.75s/it]

{
    "Bombay Cat": [
        "have a sleek and shiny black coat",
        "have a muscular and medium-sized body",
        "have round and expressive copper or gold eyes",
        "have a short and straight tail",
        "have a rounded head shape"
    ],
    "Russian Blue Cat": [
        "have a dense and plush blue-gray coat",
        "have a slender and medium-sized body",
        "have large and round green eyes",
        "have a long and tapering tail",
        "have a wedge-shaped head with straight profile"
    ]
}

    Q: What are useful features for distinguishing a Boxer Dog from American Bulldog Dog in a photo?
    A: There are several useful visual features to tell the difference between a Boxer Dog and a American Bulldog Dog in a photo
    


 24%|██████████▋                                 | 9/37 [00:44<02:29,  5.34s/it]

{
    "Boxer Dog": [
        "have a square-shaped head with a strong jawline",
        "have a short coat that is smooth and shiny",
        "have a muscular build with a deep chest",
        "have a distinctive underbite",
        "have a docked tail that is set high",
        "have a brachycephalic (short-nosed) face"
    ],
    "American Bulldog Dog": [
        "have a large, square-shaped head with a broad muzzle",
        "have a short coat that is dense and coarse",
        "have a stocky and muscular build",
        "have a pronounced underbite",
        "have a natural, medium-length tail",
        "have a mesocephalic (medium-nosed) face"
    ]
}

    Q: What are useful features for distinguishing a British Shorthair Cat from Russian Blue Cat in a photo?
    A: There are several useful visual features to tell the difference between a British Shorthair Cat and a Russian Blue Cat in a photo
    


 27%|███████████▌                               | 10/37 [00:50<02:30,  5.58s/it]

{
    "British Shorthair Cat": [
        "have a round face with chubby cheeks",
        "have a dense and plush coat",
        "come in a variety of colors and patterns, including solid colors like blue, black, and white",
        "have a stocky and muscular build",
        "have round and large eyes",
        "have a short and thick tail"
    ],
    "Russian Blue Cat": [
        "have a triangular-shaped face with a straight profile",
        "have a short and dense coat that is bluish-gray in color",
        "have a slender and graceful build",
        "have almond-shaped green eyes",
        "have a long and slender tail",
        "have a more elegant and sleek appearance"
    ]
}

    Q: What are useful features for distinguishing a Chihuahua Dog from Miniature Pinscher Dog in a photo?
    A: There are several useful visual features to tell the difference between a Chihuahua Dog and a Miniature Pinscher Dog in a photo
    


 30%|████████████▊                              | 11/37 [00:58<02:38,  6.10s/it]

{
    "Chihuahua Dog": [
        "have a small size, typically weighing less than 6 pounds",
        "have a rounded head with large, round eyes",
        "ears are large and stand upright",
        "have a short coat that can be smooth or long",
        "coat colors can vary, including tan, black, white, and brown",
        "have a distinct apple-shaped head with a short muzzle"
    ],
    "Miniature Pinscher Dog": [
        "are slightly larger than Chihuahuas, weighing between 8 and 12 pounds",
        "have a more slender and athletic build",
        "have a more pointed head with almond-shaped eyes",
        "ears are medium-sized and stand upright",
        "have a short, smooth coat that is usually black and tan",
        "have a more elongated muzzle compared to Chihuahuas"
    ]
}

    Q: What are useful features for distinguishing a Egyptian Mau Cat from Bengal Cat in a photo?
    A: There are several useful visual features to tell the difference between a Egyptian Mau Cat an

 32%|█████████████▉                             | 12/37 [01:05<02:43,  6.53s/it]

{
    "Egyptian Mau Cat": [
        "have a medium-sized body with a muscular build",
        "have a short, dense coat with a spotted or marbled pattern",
        "often have a distinctive 'M' marking on their forehead",
        "have almond-shaped eyes that are usually green",
        "ears are medium-sized and slightly pointed",
        "tails are medium-length and taper towards the tip"
    ],
    "Bengal Cat": [
        "have a medium to large-sized body with a muscular build",
        "have a short, dense coat with a spotted or marbled pattern",
        "often have a distinctive 'wild' appearance",
        "have large, round eyes that can be various colors",
        "ears are medium to large-sized and have a slightly rounded tip",
        "tails are medium to long and have a thick base, tapering towards the tip"
    ]
}

    Q: What are useful features for distinguishing a English Cocker Spaniel Dog from English Setter Dog in a photo?
    A: There are several useful visual featur

 35%|███████████████                            | 13/37 [01:13<02:42,  6.79s/it]

{
    "English Cocker Spaniel Dog": [
        "have a medium-sized build with a sturdy and compact body",
        "have a long, silky coat that is usually solid in color or with some markings",
        "have long, droopy ears that hang down close to their face",
        "have a rounded head with a well-defined stop",
        "have a shorter muzzle with a square-shaped nose",
        "have a docked tail or a naturally short tail"
    ],
    "English Setter Dog": [
        "have a larger build with a lean and athletic body",
        "have a medium to long, silky coat that is usually white with patches of color",
        "have long, feathered ears that hang down close to their face",
        "have a long, narrow head with a defined stop",
        "have a longer muzzle with a pointed nose",
        "have a long, feathered tail"
    ]
}

    Q: What are useful features for distinguishing a German Shorthaired Dog from English Setter Dog in a photo?
    A: There are several useful visual feat

 41%|█████████████████▍                         | 15/37 [01:19<01:52,  5.11s/it]

{
    "German Shorthaired Dog": [
        "have short, dense coats that are usually solid liver or liver and white",
        "have a more athletic and streamlined build",
        "ears are medium-sized and set high on the head",
        "tails are usually docked to a medium length",
        "are known for their hunting abilities and versatility in the field"
    ],
    "English Setter Dog": [
        "have longer, silky coats that are usually white with speckles or patches of color",
        "have a more elegant and graceful build",
        "ears are long and hang down close to the head",
        "tails are long and feathered",
        "are known for their gentle and friendly nature"
    ]
}

    Q: What are useful features for distinguishing a Great Pyrenees Dog from Samoyed Dog in a photo?
    A: There are several useful visual features to tell the difference between a Great Pyrenees Dog and a Samoyed Dog in a photo
    


 43%|██████████████████▌                        | 16/37 [01:26<01:58,  5.66s/it]

{
    "Great Pyrenees Dog": [
        "have a thick double coat that is usually white in color",
        "are large and muscular dogs with a strong build",
        "have a broad head with a slightly rounded skull",
        "ears are medium-sized and set high on the head",
        "have a long, fluffy tail that curls over the back",
        "are known for their calm and gentle temperament"
    ],
    "Samoyed Dog": [
        "also have a thick double coat, but it can be white, cream, or biscuit in color",
        "are slightly smaller and more compact than Great Pyrenees Dogs",
        "have a wedge-shaped head with a slightly domed skull",
        "ears are erect and set high on the head",
        "have a long, bushy tail that curls over the back",
        "are known for their friendly and outgoing personality"
    ]
}

    Q: What are useful features for distinguishing a Havanese Dog from Yorkshire Terrier Dog in a photo?
    A: There are several useful visual features to tell the dif

 46%|███████████████████▊                       | 17/37 [01:32<01:51,  5.56s/it]

{
    "Havanese Dog": [
        "have a longer and silkier coat",
        "often have a more rounded face with a shorter snout",
        "ears are usually longer and hang down",
        "tails are carried over the back",
        "are generally larger in size"
    ],
    "Yorkshire Terrier Dog": [
        "have a shorter and smoother coat",
        "often have a more angular face with a longer snout",
        "ears are usually smaller and stand up",
        "tails are docked or naturally short",
        "are generally smaller in size"
    ]
}

    Q: What are useful features for distinguishing a Japanese Chin Dog from Pomeranian Dog in a photo?
    A: There are several useful visual features to tell the difference between a Japanese Chin Dog and a Pomeranian Dog in a photo
    


 49%|████████████████████▉                      | 18/37 [01:38<01:48,  5.72s/it]

{
    "Japanese Chin Dog": [
        "have a small and compact body",
        "have a flat face with large, round eyes",
        "ears are set high on the head and are feathered",
        "have a silky, straight coat that comes in various colors",
        "have a plumed tail that curls over the back"
    ],
    "Pomeranian Dog": [
        "have a small and compact body",
        "have a fox-like face with small, almond-shaped eyes",
        "ears are set high on the head and stand erect",
        "have a thick double coat that comes in various colors",
        "have a plumed tail that curls over the back"
    ]
}

    Q: What are useful features for distinguishing a Keeshond Dog from Pomeranian Dog in a photo?
    A: There are several useful visual features to tell the difference between a Keeshond Dog and a Pomeranian Dog in a photo
    


 51%|██████████████████████                     | 19/37 [01:44<01:44,  5.80s/it]

{
    "Keeshond Dog": [
        "have a thick double coat with a dense undercoat",
        "have a distinct ruff around their neck",
        "ears are medium-sized and erect",
        "have a fox-like face with a wedge-shaped head",
        "tails are plumed and carried over their back",
        "are generally larger in size"
    ],
    "Pomeranian Dog": [
        "have a thick double coat with a fluffy appearance",
        "may have a lion-like mane around their neck",
        "ears are small and erect",
        "have a teddy bear-like face with a rounded head",
        "tails are plumed and carried over their back",
        "are generally smaller in size"
    ]
}

    Q: What are useful features for distinguishing a Leonberger Dog from Newfoundland Dog in a photo?
    A: There are several useful visual features to tell the difference between a Leonberger Dog and a Newfoundland Dog in a photo
    


 54%|███████████████████████▏                   | 20/37 [01:51<01:45,  6.21s/it]

{
    "Leonberger Dog": [
        "have a thick, double coat that is usually a golden or red color",
        "are large and muscular, with a sturdy build",
        "have a broad head with a slightly arched forehead",
        "ears are medium-sized and hang down close to the head",
        "have a long, bushy tail that hangs down when at rest",
        "are known for their friendly and gentle temperament"
    ],
    "Newfoundland Dog": [
        "have a thick, water-resistant double coat that is usually black, brown, or gray",
        "are also large and muscular, but have a more massive build",
        "have a broad head with a straight or slightly convex profile",
        "ears are medium-sized and hang down close to the head",
        "have a long, thick tail that hangs down when at rest",
        "are known for their calm and gentle nature"
    ]
}

    Q: What are useful features for distinguishing a Maine Coon Cat from Persian Cat in a photo?
    A: There are several useful visual

 57%|████████████████████████▍                  | 21/37 [01:56<01:35,  5.97s/it]

{
    "Maine Coon Cat": [
        "have a large and muscular build",
        "have tufted ears with lynx-like tips",
        "have a long and bushy tail",
        "have a shaggy and water-resistant coat",
        "have a rectangular-shaped body",
        "have a more rugged and wild appearance"
    ],
    "Persian Cat": [
        "have a small and round face",
        "have a flat and pushed-in nose",
        "have large and round eyes",
        "have a dense and luxurious coat",
        "have a stocky and cobby body",
        "have a more delicate and refined appearance"
    ]
}

    Q: What are useful features for distinguishing a Pomeranian Dog from Chihuahua Dog in a photo?
    A: There are several useful visual features to tell the difference between a Pomeranian Dog and a Chihuahua Dog in a photo
    


 68%|█████████████████████████████              | 25/37 [02:03<00:40,  3.34s/it]

{
    "Pomeranian Dog": [
        "have a fluffy double coat with a thick ruff around the neck",
        "are generally larger in size",
        "often have a fox-like face with a pointed snout",
        "ears are small and erect",
        "tails are plumed and carried over the back",
        "come in a variety of colors, including orange, red, cream, and black"
    ],
    "Chihuahua Dog": [
        "typically have a short and smooth coat",
        "are generally smaller in size",
        "have a rounded apple-shaped head",
        "ears are large and stand upright",
        "tails are long and carried high or curled over the back",
        "come in a variety of colors, including fawn, black, white, and tan"
    ]
}

    Q: What are useful features for distinguishing a Pug Dog from American Bulldog Dog in a photo?
    A: There are several useful visual features to tell the difference between a Pug Dog and a American Bulldog Dog in a photo
    


 70%|██████████████████████████████▏            | 26/37 [02:09<00:41,  3.81s/it]

{
    "Pug Dog": [
        "have a wrinkled face with a flat nose",
        "have a compact and muscular build",
        "ears are small and folded",
        "have a short and curly tail",
        "coat colors can vary, but often include fawn, black, or silver"
    ],
    "American Bulldog Dog": [
        "have a square-shaped head with a pronounced jaw",
        "have a large and muscular build",
        "ears are medium-sized and can be either cropped or left natural",
        "have a medium-length tail that is straight or slightly curved",
        "coat colors can vary, but often include white with patches of brindle, fawn, or brown"
    ]
}

    Q: What are useful features for distinguishing a Ragdoll Cat from Birman Cat in a photo?
    A: There are several useful visual features to tell the difference between a Ragdoll Cat and a Birman Cat in a photo
    


 73%|███████████████████████████████▍           | 27/37 [02:15<00:42,  4.27s/it]

{
    "Ragdoll Cat": [
        "have semi-long hair with a silky texture",
        "are known for their striking blue eyes",
        "have a large and muscular body",
        "have a broad head with a flat profile",
        "have a soft and plush coat",
        "come in a variety of colors and patterns"
    ],
    "Birman Cat": [
        "have medium-length hair with a silky texture",
        "also have striking blue eyes",
        "have a medium-sized and muscular body",
        "have a rounded head with a Roman nose",
        "have a silky and glossy coat",
        "have a color-point pattern with darker points on the ears, face, paws, and tail"
    ]
}

    Q: What are useful features for distinguishing a Saint Bernard Dog from Great Pyrenees Dog in a photo?
    A: There are several useful visual features to tell the difference between a Saint Bernard Dog and a Great Pyrenees Dog in a photo
    


 78%|█████████████████████████████████▋         | 29/37 [02:21<00:30,  3.80s/it]

{
    "Saint Bernard Dog": [
        "have a larger and heavier build",
        "have a broader head with a more pronounced forehead",
        "have droopy jowls and a more wrinkled face",
        "have shorter and thicker fur",
        "have a shorter and wider muzzle",
        "have a barrel-shaped body",
        "have a shorter and more muscular neck"
    ],
    "Great Pyrenees Dog": [
        "have a more slender and athletic build",
        "have a narrower head with a flatter forehead",
        "have a more alert expression",
        "have longer and thicker fur",
        "have a longer and narrower muzzle",
        "have a more elongated body",
        "have a longer and more graceful neck"
    ]
}

    Q: What are useful features for distinguishing a Scottish Terrier Dog from Wheaten Terrier Dog in a photo?
    A: There are several useful visual features to tell the difference between a Scottish Terrier Dog and a Wheaten Terrier Dog in a photo
    


 84%|████████████████████████████████████       | 31/37 [02:27<00:20,  3.41s/it]

{
    "Scottish Terrier Dog": [
        "have a wiry and dense double coat",
        "have a distinctive beard and eyebrows",
        "have a more compact and muscular build",
        "ears are small and erect",
        "tails are short and carried upright",
        "coat colors are usually black or brindle"
    ],
    "Wheaten Terrier Dog": [
        "have a soft and silky single coat",
        "have a more slender and graceful build",
        "ears are medium-sized and folded",
        "tails are long and carried straight or slightly curved",
        "coat color is usually wheaten, ranging from pale beige to golden"
    ]
}

    Q: What are useful features for distinguishing a Shiba Inu Dog from Pomeranian Dog in a photo?
    A: There are several useful visual features to tell the difference between a Shiba Inu Dog and a Pomeranian Dog in a photo
    


 86%|█████████████████████████████████████▏     | 32/37 [02:33<00:19,  3.95s/it]

{
    "Shiba Inu Dog": [
        "have a distinct fox-like face with a pointed snout",
        "ears are small and triangular-shaped",
        "have a thick double coat that comes in various colors including red, sesame, black, and tan",
        "tails are curled and carried high over the back",
        "are medium-sized dogs with a sturdy build"
    ],
    "Pomeranian Dog": [
        "have a compact and fluffy appearance",
        "ears are small and erect",
        "have a thick double coat that comes in various colors including orange, cream, black, and sable",
        "tails are plumed and carried over the back",
        "are small-sized dogs with a dainty build"
    ]
}

    Q: What are useful features for distinguishing a Siamese Cat from Birman Cat in a photo?
    A: There are several useful visual features to tell the difference between a Siamese Cat and a Birman Cat in a photo
    


 89%|██████████████████████████████████████▎    | 33/37 [02:39<00:17,  4.49s/it]

{
    "Siamese Cat": [
        "have a distinctive color pattern with a light-colored body and darker points on the ears, face, paws, and tail",
        "have almond-shaped blue eyes",
        "have a sleek and slender body",
        "have a short coat",
        "have a triangular-shaped head",
        "have a vocal and talkative nature"
    ],
    "Birman Cat": [
        "have a medium to large size with a sturdy build",
        "have a semi-long silky coat",
        "have a colorpoint pattern with darker points on the ears, face, paws, and tail",
        "have round blue eyes",
        "have a broad and rounded head",
        "have a gentle and affectionate nature"
    ]
}

    Q: What are useful features for distinguishing a Sphynx Cat from Abyssinian Cat in a photo?
    A: There are several useful visual features to tell the difference between a Sphynx Cat and a Abyssinian Cat in a photo
    


 92%|███████████████████████████████████████▌   | 34/37 [02:45<00:14,  4.74s/it]

{
    "Sphynx Cat": [
        "have a hairless or very short coat",
        "often have wrinkled skin",
        "have large ears that are wide at the base",
        "have a muscular and sturdy build",
        "have a round head shape",
        "have prominent cheekbones"
    ],
    "Abyssinian Cat": [
        "have a short and dense coat with a ticked pattern",
        "have a slender and graceful build",
        "have a wedge-shaped head",
        "have almond-shaped eyes",
        "have large ears that are pointed at the tips",
        "have a bushy tail"
    ]
}

    Q: What are useful features for distinguishing a Staffordshire Bull Terrier Dog from American Bulldog Dog in a photo?
    A: There are several useful visual features to tell the difference between a Staffordshire Bull Terrier Dog and a American Bulldog Dog in a photo
    


 95%|████████████████████████████████████████▋  | 35/37 [02:52<00:10,  5.38s/it]

{
    "Staffordshire Bull Terrier Dog": [
        "have a muscular and stocky build",
        "have a broad and short head with a pronounced cheekbone",
        "ears are small and rose-shaped",
        "have a short and smooth coat",
        "coat colors can vary, but often include brindle, black, or white",
        "have a medium-sized tail that is set low"
    ],
    "American Bulldog Dog": [
        "have a large and powerful build",
        "have a square-shaped head with a wide muzzle",
        "ears are medium-sized and can be either rose-shaped or drop-shaped",
        "have a short and dense coat",
        "coat colors can vary, but often include white with patches of brindle, fawn, or red",
        "have a medium-sized tail that is set high"
    ]
}

    Q: What are useful features for distinguishing a Wheaten Terrier Dog from Havanese Dog in a photo?
    A: There are several useful visual features to tell the difference between a Wheaten Terrier Dog and a Havanese Dog in a p

100%|███████████████████████████████████████████| 37/37 [03:00<00:00,  4.89s/it]

{
    "Wheaten Terrier Dog": [
        "have a medium-sized body with a square-shaped build",
        "have a soft, wavy coat that is typically a pale wheaten color",
        "often have a longer muzzle and a more pronounced stop",
        "ears are medium-sized and set high on the head",
        "tails are usually docked to a medium length",
        "are known for their friendly and outgoing personality"
    ],
    "Havanese Dog": [
        "have a small-sized body with a longer, rectangular-shaped build",
        "have a long, silky coat that can come in a variety of colors",
        "often have a shorter muzzle and a more rounded head",
        "ears are large and drop down to the sides of the head",
        "tails are usually carried over the back in a plume",
        "are known for their affectionate and playful nature"
    ]
}





In [38]:
len(descriptor_dict.keys())

37

In [21]:
def load_json(filename):
    if not filename.endswith('.txt'):
        filename += '.txt'
    with open(filename, 'r') as fp:
        return json.load(fp)
    

In [22]:
gpt_descriptions = load_json('descriptors.txt')

In [47]:
def compute_description_encodings(model, gpt_descriptions):
    description_encodings = []
    for k, v in gpt_descriptions.items():
        print(list(map(lambda x: f"{k} {x}", v)))
        print("--"*20)
        tokens = clip.tokenize(list(map(lambda x: f"{k} {x}", v))).to(device)
        description_encodings.append(F.normalize(model.encode_text(tokens).sum(dim=0, keepdim=True)))
        print(description_encodings[-1].size())
    return torch.stack(description_encodings, dim = 1).squeeze()

In [48]:
def aggregate_similarity(similarity_matrix_chunk, aggregation_method='mean'):
    if aggregation_method == 'max': return similarity_matrix_chunk.max(dim=1)[0]
    elif aggregation_method == 'sum': return similarity_matrix_chunk.sum(dim=1)
    elif aggregation_method == 'mean': return similarity_matrix_chunk.mean(dim=1)
    else: raise ValueError("Unknown aggregate_similarity")

In [50]:
def calc_acc_complete(model, dataset, descriptions, batch_size = 10, count_classes = False, 
             topK = 5):
    
    count_classes_dict = initialize_counter_dict(dataset.classes)
    index_to_class_dict = idx_to_label(dataset.class_to_idx)
    
    
    description_encodings = compute_description_encodings(model, gpt_descriptions)
    print(description_encodings.size())
    label_encodings = compute_encodings(model, dataset.classes)
    
    
    clip_accuracy_metric = torchmetrics.Accuracy(task="multiclass", num_classes=len(dataset.classes)).to(device)
    clip_accuracy_metric_top5 = torchmetrics.Accuracy(top_k=5, task="multiclass", num_classes=len(dataset.classes)).to(device)
    
    
    lang_accuracy_metric = torchmetrics.Accuracy(task="multiclass", num_classes=len(dataset.classes)).to(device)
    lang_accuracy_metric_top5 = torchmetrics.Accuracy(top_k=5, task="multiclass", num_classes=len(dataset.classes)).to(device)

    
    
    dataloader = DataLoader(dataset, batch_size, shuffle=True, num_workers=16, pin_memory=True)
    for batch_number, batch in enumerate(tqdm(dataloader)):
        images, labels = batch

        images = images.to(device)
        labels = labels.to(device)

        image_encodings = model.encode_image(images)
        image_encodings = F.normalize(image_encodings)

        image_labels_similarity = image_encodings @ label_encodings.T


        clip_acc = clip_accuracy_metric(image_labels_similarity, labels)
        clip_acc_top5 = clip_accuracy_metric_top5(image_labels_similarity, labels)
        
        dot_product_matrix = image_encodings @ description_encodings.T
        
        lang_acc = lang_accuracy_metric(dot_product_matrix, labels)
        lang_acc_top5 = lang_accuracy_metric_top5(dot_product_matrix, labels)



    print("\n")

    accuracy_logs = {}
    accuracy_logs["Total Description-based Top-1 Accuracy: "] = 100*lang_accuracy_metric.compute().item()
    accuracy_logs["Total Description-based Top-5 Accuracy: "] = 100*lang_accuracy_metric_top5.compute().item()

    accuracy_logs["Total CLIP-Standard Top-1 Accuracy: "] = 100*clip_accuracy_metric.compute().item()
    accuracy_logs["Total CLIP-Standard Top-5 Accuracy: "] = 100*clip_accuracy_metric_top5.compute().item()

    # print the dictionary
    print("\n")
    for key, value in accuracy_logs.items():
        print(key, value)
    
    return accuracy_logs

In [51]:
calc_acc_complete(model, oxfordpet_dataset_test, oxfordpet_dataset_train.classes,
         count_classes = False)

['Abyssinian Cat have a long and slender tail', 'Abyssinian Cat have almond-shaped eyes that are usually green or gold', 'Abyssinian Cat have a short, dense coat that is soft to the touch', 'Abyssinian Cat have a wedge-shaped head with large ears that are set wide apart', 'Abyssinian Cat are known for their playful and active nature', 'Abyssinian Cat typically have a ticked coat pattern, with each hair having multiple bands of color', 'Abyssinian Cat have a more triangular face with a slightly rounded forehead', 'Abyssinian Cat have almond-shaped eyes that are usually green or gold in color', 'Abyssinian Cat tails are medium-length and have a characteristic dark tip', 'Abyssinian Cat have large, almond-shaped eyes', 'Abyssinian Cat have a wedge-shaped head with large ears', 'Abyssinian Cat have a more slender and graceful build', 'Abyssinian Cat usually have a warm, reddish-brown coat color', 'Abyssinian Cat have a short, ticked coat with a warm reddish-brown color', 'Abyssinian Cat ha

torch.Size([1, 512])
['Sphynx Cat have a medium to large size with a slender body', 'Sphynx Cat have large, lemon-shaped eyes that can be any color', 'Sphynx Cat have large ears that are wide at the base and taper to a point', 'Sphynx Cat have a rounded head with prominent cheekbones', 'Sphynx Cat have a long and slender tail', 'Sphynx Cat have a muscular and sturdy build', 'Sphynx Cat have a distinctive facial expression', 'Sphynx Cat have a long, slender tail', 'Sphynx Cat often have large ears', 'Sphynx Cat have a round face with prominent cheekbones', 'Sphynx Cat have a hairless or nearly hairless coat', 'Sphynx Cat have a hairless coat', 'Sphynx Cat have a sturdy and solid appearance', 'Sphynx Cat have a hairless or very short coat', 'Sphynx Cat have a distinctive facial expression with prominent cheekbones', 'Sphynx Cat have a long and whip-like tail', 'Sphynx Cat have a wedge-shaped head with prominent cheekbones', 'Sphynx Cat have large ears', 'Sphynx Cat have wrinkled skin', '

torch.Size([1, 512])
['Saint Bernard Dog have a strong and muscular body', 'Saint Bernard Dog have a calm and gentle expression', 'Saint Bernard Dog have a large and powerful build', 'Saint Bernard Dog have a distinctive white and red or white and mahogany coat color', 'Saint Bernard Dog have shorter and more rounded ears', 'Saint Bernard Dog have a large, floppy ears that hang down', 'Saint Bernard Dog have a long tail that is usually carried low', 'Saint Bernard Dog have a thick, dense coat that is usually red and white or mahogany and white', 'Saint Bernard Dog have a broad head with a gentle expression', 'Saint Bernard Dog have a broader head with a more pronounced forehead', 'Saint Bernard Dog are generally larger and heavier', 'Saint Bernard Dog have a shorter tail that hangs down', 'Saint Bernard Dog ears are set high on the head and hang down', 'Saint Bernard Dog have a white coat with patches of brown, red, or brindle', 'Saint Bernard Dog tails are long and usually held low', 

torch.Size([1, 512])
['English Setter Dog come in a variety of colors including white with black, orange, or liver markings', 'English Setter Dog have a medium to long coat that is usually white with patches of color', 'English Setter Dog have a deep chest and a long, feathered tail', 'English Setter Dog have large, expressive eyes that are usually dark in color', 'English Setter Dog ears are long and hang down', 'English Setter Dog have a longer and more flowing coat', 'English Setter Dog tails are long and feathered', 'English Setter Dog have a long, feathered tail that is carried high when in motion', 'English Setter Dog have a medium-sized, athletic build', 'English Setter Dog have a long, elegant face with a pointed snout', 'English Setter Dog have a medium-length coat that is usually white with patches of color', 'English Setter Dog have a well-defined stop between the forehead and the muzzle', 'English Setter Dog have a long, feathered tail that is usually held low', 'English Se

torch.Size([1, 512])
['Ragdoll Cat have a docile and relaxed temperament', 'Ragdoll Cat have striking blue eyes', 'Ragdoll Cat have a large, muscular body with a broad chest', 'Ragdoll Cat have a soft and plush coat that is easy to groom', 'Ragdoll Cat have a large and sturdy body with a semi-long coat', 'Ragdoll Cat have a medium to large size', 'Ragdoll Cat have a round face with full cheeks', 'Ragdoll Cat have semi-long, silky coats that are usually lighter in color', 'Ragdoll Cat come in a variety of colors and patterns, including pointed, mitted, and bicolor', 'Ragdoll Cat have a triangular face with a long nose and large oval eyes', 'Ragdoll Cat have a broad and rounded head', 'Ragdoll Cat have a bushy tail', 'Ragdoll Cat have a semi-long, silky coat', 'Ragdoll Cat have a large and muscular build with long legs', 'Ragdoll Cat have large, oval-shaped eyes that are usually blue', 'Ragdoll Cat have a soft and plush fur', "Ragdoll Cat have a tendency to go limp when picked up, hence 

torch.Size([1, 512])
['Yorkshire Terrier Dog are known for their elegant and glamorous appearance', 'Yorkshire Terrier Dog have a confident and alert expression', 'Yorkshire Terrier Dog have a small size and compact body', 'Yorkshire Terrier Dog ears are medium-sized and carried erect', 'Yorkshire Terrier Dog have a smooth and rounded head', 'Yorkshire Terrier Dog tails are usually docked or carried low', 'Yorkshire Terrier Dog are small dogs with a delicate build', 'Yorkshire Terrier Dog often have a topknot of hair on the head', 'Yorkshire Terrier Dog are generally smaller in size', 'Yorkshire Terrier Dog have a long, silky coat', 'Yorkshire Terrier Dog tails are usually docked to a medium length', 'Yorkshire Terrier Dog ears are usually erect and V-shaped', 'Yorkshire Terrier Dog ears are usually smaller and stand up', 'Yorkshire Terrier Dog ears are small and V-shaped', 'Yorkshire Terrier Dog tails are docked to a medium length', 'Yorkshire Terrier Dog are generally smaller and mor

torch.Size([1, 512])
['Newfoundland Dog have a wide, deep chest', 'Newfoundland Dog have a large and powerful build', 'Newfoundland Dog have a long, thick tail that hangs down when at rest', 'Newfoundland Dog have a slightly smaller and more balanced build', 'Newfoundland Dog have a thick, muscular neck', 'Newfoundland Dog have droopy ears that are set high on the head', 'Newfoundland Dog have a narrower head with a more tapered forehead', 'Newfoundland Dog have a thick, strong tail that hangs down', 'Newfoundland Dog have a broad head with a straight or slightly convex profile', 'Newfoundland Dog have a thick double coat that is usually black or brown', 'Newfoundland Dog are also large and muscular dogs, but with a more stocky build', 'Newfoundland Dog have longer and more triangular ears', 'Newfoundland Dog have a broad head with a square-shaped muzzle', 'Newfoundland Dog are known for their webbed feet, which are helpful for swimming', 'Newfoundland Dog have a more alert expression 

torch.Size([1, 512])
['Shiba Inu Dog have small, triangular ears that stand erect', 'Shiba Inu Dog typically have a compact and muscular build', 'Shiba Inu Dog are medium-sized dogs with a sturdy and muscular build', 'Shiba Inu Dog tails are curled and carried high over the back', 'Shiba Inu Dog have erect ears that are triangular in shape', 'Shiba Inu Dog have a curly or sickle-shaped tail that is carried high over the back', 'Shiba Inu Dog have a curled or sickle-shaped tail that is carried high over the back', 'Shiba Inu Dog have a short, dense coat in colors like red, sesame, or black and tan', 'Shiba Inu Dog are generally small to medium-sized dogs with a sturdy build', 'Shiba Inu Dog are known for their independent and spirited nature', 'Shiba Inu Dog ears are small and triangular', 'Shiba Inu Dog have a curled tail that is carried high over the back', 'Shiba Inu Dog have a more fox-like face with a confident expression', 'Shiba Inu Dog have a thick double coat that comes in vari

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 367/367 [00:55<00:00,  6.57it/s]





Total Description-based Top-1 Accuracy:  2.562006004154682
Total Description-based Top-5 Accuracy:  17.4161896109581
Total CLIP-Standard Top-1 Accuracy:  89.01607990264893
Total CLIP-Standard Top-5 Accuracy:  99.75470304489136





{'Total Description-based Top-1 Accuracy: ': 2.562006004154682,
 'Total Description-based Top-5 Accuracy: ': 17.4161896109581,
 'Total CLIP-Standard Top-1 Accuracy: ': 89.01607990264893,
 'Total CLIP-Standard Top-5 Accuracy: ': 99.75470304489136}