<a href="https://colab.research.google.com/github/sayanbanerjee32/multimodal_llm/blob/main/compare_image_embeddings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install git+https://github.com/openai/CLIP.git

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-3yl2swx7
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-3yl2swx7
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting ftfy (from clip==1.0)
  Downloading ftfy-6.3.0-py3-none-any.whl.metadata (7.1 kB)
Downloading ftfy-6.3.0-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25l[?25hdone
  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369489 sha256=7ebdbe6aab6e42b267f7bda6500c23983e9442831d3bcf800b917d6b51ed82db
  Stored in directory: /tmp/pip-ephem-wheel-cache-_ofzbpws/wheels/da/2b/4c/d6691fa9597aac8bb

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import numpy as np
from PIL import Image
import clip

In [None]:
def load_saved_embedding(npz_file, image_name):
    embeddings = np.load(npz_file, allow_pickle=True)
    return torch.from_numpy(embeddings[image_name])

def generate_embedding(image_path, clip_model_name="ViT-B/32"):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    clip_model, clip_preprocess = clip.load(clip_model_name, device=device)

    image = clip_preprocess(Image.open(image_path)).unsqueeze(0).to(device)
    with torch.no_grad():
        image_embedding = clip_model.encode_image(image).squeeze()

    return image_embedding

def cosine_similarity(embedding1, embedding2):
    return torch.nn.functional.cosine_similarity(embedding1, embedding2, dim=0)

def euclidean_distance(embedding1, embedding2):
    return torch.norm(embedding1 - embedding2)

def compare_embeddings(npz_file, image_name, image_path, threshold=0.95):
    # Load saved embedding
    saved_embedding = load_saved_embedding(npz_file, image_name)

    # Generate new embedding
    new_embedding = generate_embedding(image_path)

    # Ensure both embeddings are on the same device and have the same dtype
    saved_embedding = saved_embedding.to(new_embedding.device).to(new_embedding.dtype)

    # Calculate cosine similarity
    similarity = cosine_similarity(saved_embedding, new_embedding)

    # Calculate Euclidean distance
    distance = euclidean_distance(saved_embedding, new_embedding)

    print(f"Cosine Similarity: {similarity.item():.4f}")
    print(f"Euclidean Distance: {distance.item():.4f}")

    if similarity > threshold:
        print(f"The embeddings are approximately close (similarity > {threshold}).")
    else:
        print(f"The embeddings are not very close (similarity <= {threshold}).")

In [None]:
npz_file = '/content/drive/MyDrive/multimodal_llm/image_embedding/coco_image_embeddings.npz'
image_name = "000000024380.jpg"  # Replace with an actual image name from your dataset
image_path = "1.jpg"  # Replace with the actual path to the image file
compare_embeddings(npz_file, image_name, image_path)

Cosine Similarity: 0.9951
Euclidean Distance: 0.9849
The embeddings are approximately close (similarity > 0.95).
