# Md Jakaria Mashud Shahria (2431751)

**Task 1**

The first task will require the realization of:

*   usage of existing pre-trained (pre-trained) image classification model adaptation to new task using few-shot,one-shot and zero-shot learning.
*   calculate accuracy, precision, recovery and F1 statistics for selected new class on unseen 1000 images from OpenImages,
*   to implement threshold value (threshold) change, enabling classification of images for each assigned class by changing T∈[0,1]. Statistics must be recalculated after changing the threshold value.







First I tried without gpu, used tensorflow dataset and this method to load dataset:

```
dataset = tfds.load(‘open_images/v7’, split='train')
```

Both did not work. Enabling GPU in colab and use FiftyOne package to load openimages_v7 dataset.

In [10]:
!pip install "sse-starlette<1"
!pip install -q fiftyone transformers datasets scikit-learn tqdm torch



Use CUDA to get GPU Power, and use OpenAI's ClipModel

In [11]:
import torch
from transformers import CLIPProcessor, CLIPModel
from datasets import load_dataset
from PIL import Image
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm
import random
from huggingface_hub import login
from google.colab import userdata

# Use a GPU if available (which we enabled in Colab's runtime settings)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

# This line securely retrieves the secret you just created
hf_token = userdata.get('HF_TOKEN')
login(token=hf_token)

print("Successfully logged in to Hugging Face!")

# Load the pre-trained CLIP model and its processor
MODEL_NAME = "openai/clip-vit-base-patch32"
print(f"Loading model: {MODEL_NAME}...")
model = CLIPModel.from_pretrained(MODEL_NAME).to(DEVICE)
processor = CLIPProcessor.from_pretrained(MODEL_NAME)
print("Model loaded successfully!")

Using device: cuda
Successfully logged in to Hugging Face!
Loading model: openai/clip-vit-base-patch32...


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Model loaded successfully!


In [12]:
# DATA PREPARATION (Using FiftyOne)
import fiftyone as fo
import fiftyone.zoo as foz
from PIL import Image
from tqdm import tqdm
import random

TARGET_CLASSES = ["Horse", "Cat", "Dog"]  # Changed to a list of target classes
NUM_EVAL_IMAGES_PER_CLASS = 50 # Reduced number of evaluation images per class (positive)
NUM_NEGATIVE_IMAGES_PER_CLASS = 20 # Reduced number of negative images per class
NUM_FEW_SHOT_EXAMPLES = 3  # Reduced number of examples for few-shot learning per class

def prepare_dataset():
    """
    Loads and filters the OpenImages v7 dataset using the FiftyOne Zoo.
    Collects positive and negative samples for multiple target classes.
    """
    print("Preparing dataset from the FiftyOne Zoo...")

    # We'll load a larger number of random samples and then filter them.
    # This is an easy way to get both positive and negative examples.
    num_samples_to_load = (NUM_EVAL_IMAGES_PER_CLASS + NUM_NEGATIVE_IMAGES_PER_CLASS + NUM_FEW_SHOT_EXAMPLES) * len(TARGET_CLASSES) * 2 # Load more to ensure we find enough of each class

    # Load a random subset of the dataset from the zoo
    # This downloads only the images and metadata we need.
    dataset = foz.load_zoo_dataset(
        "open-images-v7",
        split="test",
        label_types=["detections"],
        max_samples=num_samples_to_load,
        shuffle=True,
    )

    positive_samples_by_class = {cls: [] for cls in TARGET_CLASSES}
    negative_samples = []
    support_samples_by_class = {cls: [] for cls in TARGET_CLASSES}

    print("Filtering for positive and negative samples...")
    # Use a view to make processing faster
    view = dataset.select_fields("ground_truth")

    total_samples_needed = (NUM_EVAL_IMAGES_PER_CLASS * len(TARGET_CLASSES)) + (NUM_NEGATIVE_IMAGES_PER_CLASS * len(TARGET_CLASSES)) + (NUM_FEW_SHOT_EXAMPLES * len(TARGET_CLASSES))
    pbar = tqdm(total=total_samples_needed)

    for sample in view.iter_samples(autosave=True, progress=False):
        # Get all labels for the current sample
        if not sample.ground_truth:
            continue

        labels = [d.label for d in sample.ground_truth.detections]

        # Load the image from its filepath
        try:
            pil_image = Image.open(sample.filepath).convert("RGB")
        except Exception as e:
            # Skip images that cannot be opened
            # print(f"Could not open image {sample.filepath}: {e}")
            continue


        # Check for support samples
        for target_class in TARGET_CLASSES:
            if target_class in labels and len(support_samples_by_class[target_class]) < NUM_FEW_SHOT_EXAMPLES:
                support_samples_by_class[target_class].append(pil_image)
                pbar.update(1)

        # Check for positive and negative evaluation samples
        is_target_class = any(cls in labels for cls in TARGET_CLASSES)

        if is_target_class:
            for target_class in TARGET_CLASSES:
                if target_class in labels and len(positive_samples_by_class[target_class]) < NUM_EVAL_IMAGES_PER_CLASS:
                     positive_samples_by_class[target_class].append(pil_image)
                     pbar.update(1)
        elif len(negative_samples) < NUM_NEGATIVE_IMAGES_PER_CLASS * len(TARGET_CLASSES): # Collect negative samples for all classes
             negative_samples.append(pil_image)
             pbar.update(1)


        # Check if we have enough samples of all types
        have_enough_support = all(len(support_samples_by_class[cls]) >= NUM_FEW_SHOT_EXAMPLES for cls in TARGET_CLASSES)
        have_enough_positives = all(len(positive_samples_by_class[cls]) >= NUM_EVAL_IMAGES_PER_CLASS for cls in TARGET_CLASSES)
        have_enough_negatives = len(negative_samples) >= NUM_NEGATIVE_IMAGES_PER_CLASS * len(TARGET_CLASSES)


        if have_enough_positives and have_enough_negatives and have_enough_support:
            break

    pbar.close()

    # Clean up the downloaded dataset to save space
    dataset.delete()

    eval_images = []
    true_labels = []
    support_images = {cls: support_samples_by_class[cls] for cls in TARGET_CLASSES}

    # Combine positive and negative samples for evaluation
    for i, target_class in enumerate(TARGET_CLASSES):
        eval_images.extend(positive_samples_by_class[target_class])
        true_labels.extend([target_class] * len(positive_samples_by_class[target_class]))

    eval_images.extend(negative_samples)
    true_labels.extend(["Negative"] * len(negative_samples)) # Assign a generic "Negative" label


    combined = list(zip(eval_images, true_labels))
    random.shuffle(combined)
    eval_images, true_labels = zip(*combined)

    print(f"\nDataset prepared: {len(eval_images)} evaluation images and {NUM_FEW_SHOT_EXAMPLES} support images per class.")
    return list(eval_images), list(true_labels), support_images

In [13]:
# CLASSIFICATION METHODS

import torch
from transformers import CLIPProcessor, CLIPModel
from datasets import load_dataset
from PIL import Image
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm
import random



def predict_zero_shot(image, text_labels):
    """
    Classifies an image using zero-shot learning with text prompts.
    Returns the probability score for each provided text label.
    """
    with torch.no_grad():
        inputs = processor(text=text_labels, images=image, return_tensors="pt", padding=True).to(DEVICE)
        outputs = model(**inputs)
        logits_per_image = outputs.logits_per_image
        probs = logits_per_image.softmax(dim=1)
        return probs[0].tolist() # Return probabilities for all text labels

def get_image_embedding(image):
    """Helper function to get the embedding for a single image."""
    with torch.no_grad():
        inputs = processor(images=image, return_tensors="pt").to(DEVICE)
        embedding = model.get_image_features(**inputs)
        return torch.nn.functional.normalize(embedding, p=2, dim=-1)

def predict_few_shot(query_image, support_embeddings_by_class):
    """
    Classifies an image by comparing it to the average embedding of support images for each class.
    Returns the cosine similarity score for each class.
    """
    with torch.no_grad():
        query_embedding = get_image_embedding(query_image)
        similarities = {}
        for class_name, support_embeddings in support_embeddings_by_class.items():
            if support_embeddings.numel() > 0:  # Check if tensor is not empty
                # Ensure avg_support_embedding is a single vector
                avg_support_embedding = torch.mean(support_embeddings, dim=0, keepdim=True)

                # Calculate cosine similarity manually
                dot_product = torch.sum(query_embedding * avg_support_embedding, dim=-1)
                # Since embeddings are already L2 normalized, the dot product is the cosine similarity
                similarity = dot_product

                # Get the scalar value
                similarities[class_name] = similarity.item()
            else:
                 similarities[class_name] = 0.0  # Assign 0.0 if no support embeddings
        return similarities

In [14]:
# EVALUATION

def calculate_and_print_metrics(scores, true_labels, threshold):
    """
    Calculates and prints classification metrics based on a given threshold.
    """
    predictions = [1 if score >= threshold else 0 for score in scores]

    accuracy = accuracy_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions, zero_division=0)
    recall = recall_score(true_labels, predictions, zero_division=0)
    f1 = f1_score(true_labels, predictions, zero_division=0)

    print(f"Threshold: {threshold:.2f}")
    print(f"  Accuracy:  {accuracy:.4f}")
    print(f"  Precision: {precision:.4f}")
    print(f"  Recall (Recovery): {recall:.4f}")
    print(f"  F1-Score:  {f1:.4f}")
    print("-" * 30)

In [15]:
#Mani Execution

eval_images, true_labels, support_images = prepare_dataset()

#ZERO-SHOT LEARNING
print("\n" + "="*50)
print("Starting Zero-Shot Classification...")
print("="*50)

for target_class in TARGET_CLASSES:
    print(f"\nEvaluating Zero-Shot for class: {target_class}")
    zero_shot_labels = [f"a photo of a {target_class}", "a photo of something else"]
    # Filter eval_images and true_labels for the current target class and negative samples
    class_eval_images = [img for img, label in zip(eval_images, true_labels) if label == target_class or label == "Negative"]
    class_true_labels = [1 if label == target_class else 0 for label in true_labels if label == target_class or label == "Negative"]

    # Extract the probability of the target class
    zero_shot_scores = [predict_zero_shot(img, zero_shot_labels)[0] for img in tqdm(class_eval_images, desc=f"Zero-Shot ({target_class})")]

    print(f"\nZero-Shot Evaluation Results for {target_class}:")

    for T in [0.1, 0.3, 0.5, 0.7, 0.9]:
        calculate_and_print_metrics(zero_shot_scores, class_true_labels, threshold=T)


#ONE-SHOT LEARNING
print("\n" + "="*50)
print("Starting One-Shot Classification...")
print("="*50)

for target_class in TARGET_CLASSES:
    print(f"\nEvaluating One-Shot for class: {target_class}")
    if support_images[target_class]:
        one_shot_support_embedding = get_image_embedding(support_images[target_class][0])

        # Filter eval_images and true_labels for the current target class and negative samples
        class_eval_images = [img for img, label in zip(eval_images, true_labels) if label == target_class or label == "Negative"]
        class_true_labels = [1 if label == target_class else 0 for label in true_labels if label == target_class or label == "Negative"]

        # Extract the similarity score for the target class
        one_shot_scores = [predict_few_shot(img, {target_class: one_shot_support_embedding.unsqueeze(0)})[target_class] for img in tqdm(class_eval_images, desc=f"One-Shot ({target_class})")]


        print(f"\nOne-Shot Evaluation Results for {target_class}:")

        for T in [0.20, 0.25, 0.30, 0.35, 0.40]:
            calculate_and_print_metrics(one_shot_scores, class_true_labels, threshold=T)
    else:
        print(f"  No support images found for {target_class}. Skipping One-Shot evaluation.")


#FEW-SHOT LEARNING
print("\n" + "="*50)
print(f"Starting Few-Shot ({NUM_FEW_SHOT_EXAMPLES} examples) Classification...")
print("="*50)

for target_class in TARGET_CLASSES:
    print(f"\nEvaluating Few-Shot for class: {target_class}")
    if support_images[target_class]:
        few_shot_support_embeddings = torch.cat([get_image_embedding(img) for img in support_images[target_class]], dim=0)

        # Filter eval_images and true_labels for the current target class and negative samples
        class_eval_images = [img for img, label in zip(eval_images, true_labels) if label == target_class or label == "Negative"]
        class_true_labels = [1 if label == target_class else 0 for label in true_labels if label == target_class or label == "Negative"]

        # Extract the similarity score for the target class
        few_shot_scores = [predict_few_shot(img, {target_class: few_shot_support_embeddings})[target_class] for img in tqdm(class_eval_images, desc=f"Few-Shot ({target_class})")]


        print(f"\nFew-Shot ({NUM_FEW_SHOT_EXAMPLES} examples) Evaluation Results for {target_class}:")

        for T in [0.20, 0.25, 0.30, 0.35, 0.40]:
            calculate_and_print_metrics(few_shot_scores, class_true_labels, threshold=T)
    else:
        print(f"  No support images found for {target_class}. Skipping Few-Shot evaluation.")

Preparing dataset from the FiftyOne Zoo...
Downloading split 'test' to '/root/fiftyone/open-images-v7/test' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'test' to '/root/fiftyone/open-images-v7/test' if necessary


Necessary images already downloaded


INFO:fiftyone.utils.openimages:Necessary images already downloaded


Existing download of split 'test' is sufficient


INFO:fiftyone.zoo.datasets:Existing download of split 'test' is sufficient


Loading 'open-images-v7' split 'test'


INFO:fiftyone.zoo.datasets:Loading 'open-images-v7' split 'test'


 100% |█████████████████| 438/438 [7.3s elapsed, 0s remaining, 72.6 samples/s]       


INFO:eta.core.utils: 100% |█████████████████| 438/438 [7.3s elapsed, 0s remaining, 72.6 samples/s]       


Dataset 'open-images-v7-test-438' created


INFO:fiftyone.zoo.datasets:Dataset 'open-images-v7-test-438' created


Filtering for positive and negative samples...


 42%|████▏     | 93/219 [00:10<00:13,  9.19it/s]



Dataset prepared: 85 evaluation images and 3 support images per class.

Starting Zero-Shot Classification...

Evaluating Zero-Shot for class: Horse


Zero-Shot (Horse): 100%|██████████| 65/65 [00:03<00:00, 20.09it/s]



Zero-Shot Evaluation Results for Horse:
Threshold: 0.10
  Accuracy:  0.4769
  Precision: 0.1282
  Recall (Recovery): 1.0000
  F1-Score:  0.2273
------------------------------
Threshold: 0.30
  Accuracy:  0.8308
  Precision: 0.3125
  Recall (Recovery): 1.0000
  F1-Score:  0.4762
------------------------------
Threshold: 0.50
  Accuracy:  0.9231
  Precision: 0.5000
  Recall (Recovery): 1.0000
  F1-Score:  0.6667
------------------------------
Threshold: 0.70
  Accuracy:  0.9692
  Precision: 0.7143
  Recall (Recovery): 1.0000
  F1-Score:  0.8333
------------------------------
Threshold: 0.90
  Accuracy:  0.9846
  Precision: 0.8333
  Recall (Recovery): 1.0000
  F1-Score:  0.9091
------------------------------

Evaluating Zero-Shot for class: Cat


Zero-Shot (Cat): 100%|██████████| 62/62 [00:06<00:00,  9.44it/s]



Zero-Shot Evaluation Results for Cat:
Threshold: 0.10
  Accuracy:  0.9032
  Precision: 0.2500
  Recall (Recovery): 1.0000
  F1-Score:  0.4000
------------------------------
Threshold: 0.30
  Accuracy:  0.9677
  Precision: 0.5000
  Recall (Recovery): 1.0000
  F1-Score:  0.6667
------------------------------
Threshold: 0.50
  Accuracy:  0.9839
  Precision: 0.6667
  Recall (Recovery): 1.0000
  F1-Score:  0.8000
------------------------------
Threshold: 0.70
  Accuracy:  1.0000
  Precision: 1.0000
  Recall (Recovery): 1.0000
  F1-Score:  1.0000
------------------------------
Threshold: 0.90
  Accuracy:  1.0000
  Precision: 1.0000
  Recall (Recovery): 1.0000
  F1-Score:  1.0000
------------------------------

Evaluating Zero-Shot for class: Dog


Zero-Shot (Dog): 100%|██████████| 78/78 [00:04<00:00, 18.95it/s]



Zero-Shot Evaluation Results for Dog:
Threshold: 0.10
  Accuracy:  0.6154
  Precision: 0.3750
  Recall (Recovery): 1.0000
  F1-Score:  0.5455
------------------------------
Threshold: 0.30
  Accuracy:  0.9231
  Precision: 0.7500
  Recall (Recovery): 1.0000
  F1-Score:  0.8571
------------------------------
Threshold: 0.50
  Accuracy:  0.9872
  Precision: 0.9474
  Recall (Recovery): 1.0000
  F1-Score:  0.9730
------------------------------
Threshold: 0.70
  Accuracy:  0.9872
  Precision: 0.9474
  Recall (Recovery): 1.0000
  F1-Score:  0.9730
------------------------------
Threshold: 0.90
  Accuracy:  0.9872
  Precision: 1.0000
  Recall (Recovery): 0.9444
  F1-Score:  0.9714
------------------------------

Starting One-Shot Classification...

Evaluating One-Shot for class: Horse


One-Shot (Horse): 100%|██████████| 65/65 [00:02<00:00, 25.43it/s]



One-Shot Evaluation Results for Horse:
Threshold: 0.20
  Accuracy:  0.0769
  Precision: 0.0769
  Recall (Recovery): 1.0000
  F1-Score:  0.1429
------------------------------
Threshold: 0.25
  Accuracy:  0.0769
  Precision: 0.0769
  Recall (Recovery): 1.0000
  F1-Score:  0.1429
------------------------------
Threshold: 0.30
  Accuracy:  0.0769
  Precision: 0.0769
  Recall (Recovery): 1.0000
  F1-Score:  0.1429
------------------------------
Threshold: 0.35
  Accuracy:  0.0769
  Precision: 0.0769
  Recall (Recovery): 1.0000
  F1-Score:  0.1429
------------------------------
Threshold: 0.40
  Accuracy:  0.1077
  Precision: 0.0794
  Recall (Recovery): 1.0000
  F1-Score:  0.1471
------------------------------

Evaluating One-Shot for class: Cat


One-Shot (Cat): 100%|██████████| 62/62 [00:03<00:00, 15.93it/s]



One-Shot Evaluation Results for Cat:
Threshold: 0.20
  Accuracy:  0.0323
  Precision: 0.0323
  Recall (Recovery): 1.0000
  F1-Score:  0.0625
------------------------------
Threshold: 0.25
  Accuracy:  0.0323
  Precision: 0.0323
  Recall (Recovery): 1.0000
  F1-Score:  0.0625
------------------------------
Threshold: 0.30
  Accuracy:  0.0323
  Precision: 0.0323
  Recall (Recovery): 1.0000
  F1-Score:  0.0625
------------------------------
Threshold: 0.35
  Accuracy:  0.0484
  Precision: 0.0328
  Recall (Recovery): 1.0000
  F1-Score:  0.0635
------------------------------
Threshold: 0.40
  Accuracy:  0.1129
  Precision: 0.0351
  Recall (Recovery): 1.0000
  F1-Score:  0.0678
------------------------------

Evaluating One-Shot for class: Dog


One-Shot (Dog): 100%|██████████| 78/78 [00:05<00:00, 14.88it/s]



One-Shot Evaluation Results for Dog:
Threshold: 0.20
  Accuracy:  0.2308
  Precision: 0.2308
  Recall (Recovery): 1.0000
  F1-Score:  0.3750
------------------------------
Threshold: 0.25
  Accuracy:  0.2308
  Precision: 0.2308
  Recall (Recovery): 1.0000
  F1-Score:  0.3750
------------------------------
Threshold: 0.30
  Accuracy:  0.2308
  Precision: 0.2308
  Recall (Recovery): 1.0000
  F1-Score:  0.3750
------------------------------
Threshold: 0.35
  Accuracy:  0.2564
  Precision: 0.2368
  Recall (Recovery): 1.0000
  F1-Score:  0.3830
------------------------------
Threshold: 0.40
  Accuracy:  0.3077
  Precision: 0.2500
  Recall (Recovery): 1.0000
  F1-Score:  0.4000
------------------------------

Starting Few-Shot (3 examples) Classification...

Evaluating Few-Shot for class: Horse


Few-Shot (Horse): 100%|██████████| 65/65 [00:01<00:00, 39.69it/s]



Few-Shot (3 examples) Evaluation Results for Horse:
Threshold: 0.20
  Accuracy:  0.0769
  Precision: 0.0769
  Recall (Recovery): 1.0000
  F1-Score:  0.1429
------------------------------
Threshold: 0.25
  Accuracy:  0.0769
  Precision: 0.0769
  Recall (Recovery): 1.0000
  F1-Score:  0.1429
------------------------------
Threshold: 0.30
  Accuracy:  0.0769
  Precision: 0.0769
  Recall (Recovery): 1.0000
  F1-Score:  0.1429
------------------------------
Threshold: 0.35
  Accuracy:  0.0923
  Precision: 0.0781
  Recall (Recovery): 1.0000
  F1-Score:  0.1449
------------------------------
Threshold: 0.40
  Accuracy:  0.2000
  Precision: 0.0877
  Recall (Recovery): 1.0000
  F1-Score:  0.1613
------------------------------

Evaluating Few-Shot for class: Cat


Few-Shot (Cat): 100%|██████████| 62/62 [00:01<00:00, 50.74it/s]



Few-Shot (3 examples) Evaluation Results for Cat:
Threshold: 0.20
  Accuracy:  0.0323
  Precision: 0.0323
  Recall (Recovery): 1.0000
  F1-Score:  0.0625
------------------------------
Threshold: 0.25
  Accuracy:  0.0323
  Precision: 0.0323
  Recall (Recovery): 1.0000
  F1-Score:  0.0625
------------------------------
Threshold: 0.30
  Accuracy:  0.0323
  Precision: 0.0323
  Recall (Recovery): 1.0000
  F1-Score:  0.0625
------------------------------
Threshold: 0.35
  Accuracy:  0.0484
  Precision: 0.0328
  Recall (Recovery): 1.0000
  F1-Score:  0.0635
------------------------------
Threshold: 0.40
  Accuracy:  0.1613
  Precision: 0.0370
  Recall (Recovery): 1.0000
  F1-Score:  0.0714
------------------------------

Evaluating Few-Shot for class: Dog


Few-Shot (Dog): 100%|██████████| 78/78 [00:01<00:00, 50.39it/s]


Few-Shot (3 examples) Evaluation Results for Dog:
Threshold: 0.20
  Accuracy:  0.2308
  Precision: 0.2308
  Recall (Recovery): 1.0000
  F1-Score:  0.3750
------------------------------
Threshold: 0.25
  Accuracy:  0.2308
  Precision: 0.2308
  Recall (Recovery): 1.0000
  F1-Score:  0.3750
------------------------------
Threshold: 0.30
  Accuracy:  0.2308
  Precision: 0.2308
  Recall (Recovery): 1.0000
  F1-Score:  0.3750
------------------------------
Threshold: 0.35
  Accuracy:  0.2564
  Precision: 0.2368
  Recall (Recovery): 1.0000
  F1-Score:  0.3830
------------------------------
Threshold: 0.40
  Accuracy:  0.3462
  Precision: 0.2609
  Recall (Recovery): 1.0000
  F1-Score:  0.4138
------------------------------





In [26]:
# Choose sample images from the evaluation set
if eval_images:
    num_demonstration_images = 5 # Number of images to demonstrate

    # Filter for images that are one of the target classes for demonstration
    target_class_eval_images = [img for img, label in zip(eval_images, true_labels) if label in TARGET_CLASSES]
    target_class_true_labels = [label for label in true_labels if label in TARGET_CLASSES]

    if not target_class_eval_images:
        print("No images from target classes available for demonstration.")
    else:
        demonstration_true_labels = target_class_true_labels[:min(num_demonstration_images, len(target_class_true_labels))]
        zero_shot_predictions = []
        one_shot_predictions = []
        few_shot_predictions = []

        for i in range(min(num_demonstration_images, len(target_class_eval_images))):
            sample_image = target_class_eval_images[i]
            sample_true_label = target_class_true_labels[i]
            print(f"\n--- Demonstration Image {i+1} ---")
            print(f"Sample True Label: {sample_true_label}")


            # --- Zero-Shot Probabilities ---
            print("\nZero-Shot Probabilities:")
            # Create labels for all target classes
            zero_shot_labels = [f"a photo of a {cls}" for cls in TARGET_CLASSES]
            zero_shot_probs = predict_zero_shot(sample_image, zero_shot_labels)

            # Print probabilities for each target class
            for j, class_name in enumerate(TARGET_CLASSES):
                print(f"  {class_name}: {zero_shot_probs[j]:.4f}")

            # Zero-shot prediction
            predicted_index = zero_shot_probs.index(max(zero_shot_probs))
            predicted_label_zero_shot = zero_shot_labels[predicted_index].replace("a photo of a ", "")
            print(f"Predicted (Zero-Shot): {predicted_label_zero_shot}")
            zero_shot_predictions.append(predicted_label_zero_shot)


            # --- One-Shot Probabilities ---
            print("\nOne-Shot Probabilities:")
            one_shot_similarities = {}
            one_shot_support_embeddings = {}
            for target_class in TARGET_CLASSES:
                if support_images[target_class]:
                    one_shot_support_embeddings[target_class] = get_image_embedding(support_images[target_class][0]).unsqueeze(0) # Keep embeddings as [1, 512] tensors
                else:
                     one_shot_support_embeddings[target_class] = torch.tensor([]).to(DEVICE) # Handle case with no support images

            one_shot_similarities = predict_few_shot(sample_image, one_shot_support_embeddings)

            # Print similarity scores for each target class
            for class_name in TARGET_CLASSES:
                 print(f"  {class_name}: {one_shot_similarities[class_name]:.4f}")

            # One-shot prediction (based on highest similarity)
            predicted_label_one_shot = max(one_shot_similarities, key=one_shot_similarities.get)
            print(f"Predicted (One-Shot): {predicted_label_one_shot}")
            one_shot_predictions.append(predicted_label_one_shot)


            # --- Few-Shot Probabilities ---
            print(f"\nFew-Shot ({NUM_FEW_SHOT_EXAMPLES} examples) Probabilities):")
            few_shot_similarities = {}
            support_embeddings_for_few_shot = {}
            for target_class in TARGET_CLASSES:
                if support_images[target_class]:
                    support_embeddings_for_few_shot[target_class] = torch.cat([get_image_embedding(img) for img in support_images[target_class]], dim=0)
                else:
                     support_embeddings_for_few_shot[target_class] = torch.tensor([]).to(DEVICE) # Handle case with no support images


            few_shot_similarities = predict_few_shot(sample_image, support_embeddings_for_few_shot)

            # Print similarity scores for each target class
            for class_name in TARGET_CLASSES:
                 print(f"  {class_name}: {few_shot_similarities[class_name]:.4f}")

            # Few-shot prediction (based on highest similarity)
            predicted_label_few_shot = max(few_shot_similarities, key=few_shot_similarities.get)
            print(f"Predicted (Few-Shot): {predicted_label_few_shot}")
            few_shot_predictions.append(predicted_label_few_shot)


          # Calculate and print overall accuracies
        overall_zero_shot_accuracy = accuracy_score(demonstration_true_labels, zero_shot_predictions)
        overall_one_shot_accuracy = accuracy_score(demonstration_true_labels, one_shot_predictions)
        overall_few_shot_accuracy = accuracy_score(demonstration_true_labels, few_shot_predictions)

        print(f"\n--- Overall Accuracy on Demonstration {num_demonstration_images} Images ---")
        print(f"Zero-Shot Accuracy: {overall_zero_shot_accuracy:.4f}")
        print(f"One-Shot Accuracy: {overall_one_shot_accuracy:.4f}")
        print(f"Few-Shot ({NUM_FEW_SHOT_EXAMPLES} examples) Accuracy: {overall_few_shot_accuracy:.4f}")



else:
    print("No evaluation images available to demonstrate.")


--- Demonstration Image 1 ---
Sample True Label: Dog

Zero-Shot Probabilities:
  Horse: 0.0025
  Cat: 0.0041
  Dog: 0.9934
Predicted (Zero-Shot): Dog

One-Shot Probabilities:
  Horse: 0.6153
  Cat: 0.7313
  Dog: 0.6746
Predicted (One-Shot): Cat

Few-Shot (3 examples) Probabilities):
  Horse: 0.4968
  Cat: 0.7246
  Dog: 0.7542
Predicted (Few-Shot): Dog

--- Demonstration Image 2 ---
Sample True Label: Dog

Zero-Shot Probabilities:
  Horse: 0.0395
  Cat: 0.3386
  Dog: 0.6218
Predicted (Zero-Shot): Dog

One-Shot Probabilities:
  Horse: 0.5286
  Cat: 0.5716
  Dog: 0.4929
Predicted (One-Shot): Cat

Few-Shot (3 examples) Probabilities):
  Horse: 0.4843
  Cat: 0.5617
  Dog: 0.4693
Predicted (Few-Shot): Cat

--- Demonstration Image 3 ---
Sample True Label: Cat

Zero-Shot Probabilities:
  Horse: 0.0012
  Cat: 0.9909
  Dog: 0.0078
Predicted (Zero-Shot): Cat

One-Shot Probabilities:
  Horse: 0.7122
  Cat: 1.0000
  Dog: 0.7898
Predicted (One-Shot): Cat

Few-Shot (3 examples) Probabilities):
  Hor