In [None]:
import torch
import clip
from PIL import Image
import os

# Load the CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

# Encode all images in a folder
def encode_images(folder_path):
    embeddings = {}
    for file_name in os.listdir(folder_path):
        if file_name.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_path = os.path.join(folder_path, file_name)
            image = preprocess(Image.open(image_path).convert("RGB")).unsqueeze(0).to(device)
            with torch.no_grad():
                image_embedding = model.encode_image(image).float()
                image_embedding /= image_embedding.norm(dim=-1, keepdim=True)
            embeddings[file_name] = image_embedding
    return embeddings

# Find the closest image match to the text
def find_best_match(text_query, image_embeddings):
    # Encode the text
    text_tokens = clip.tokenize([text_query]).to(device)
    with torch.no_grad():
        text_embedding = model.encode_text(text_tokens).float()
        text_embedding /= text_embedding.norm(dim=-1, keepdim=True)
    
    # Compare text to each image embedding
    best_match = None
    best_score = -1

    for image_name, image_embedding in image_embeddings.items():
        similarity = torch.cosine_similarity(text_embedding, image_embedding).item()
        if similarity > best_score:
            best_score = similarity
            best_match = image_name

    return best_match, best_score

# Example usage
if __name__ == "__main__":
    image_folder = "lost_items"  # Folder with lost item images
    text_query = "red backpack with a water bottle"  # User’s search description

    # Encode images and search
    image_embeddings = encode_images(image_folder)
    best_match, score = find_best_match(text_query, image_embeddings)

    # Show result
    if best_match:
        print(f"Best Match: {best_match} (Similarity: {score:.4f})")
        Image.open(os.path.join(image_folder, best_match)).show()
    else:
        print("No match found!")


  0%|                                     | 880k/338M [00:13<2:34:30, 38.1kiB/s]

In [2]:
#pip install git+https://github.com/openai/CLIP.git

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to c:\users\trrsh\appdata\local\temp\pip-req-build-0wkqviz5
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting ftfy (from clip==1.0)
  Obtaining dependency information for ftfy from https://files.pythonhosted.org/packages/ab/6e/81d47999aebc1b155f81eca4477a616a70f238a2549848c38983f3c22a82/ftfy-6.3.1-py3-none-any.whl.metadata
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
   ---------------------------------------- 0.0/44.8 kB ? eta -:--:--
   ---------------------------------------- 0.0/44.8 kB ? eta -:--:--
   ---------------------------------------- 0.0/44.8 kB ? eta -:--:--
   ---------------------------------------- 0.0/44.8 kB ? eta -:--:--
   -----------------------------

  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git 'C:\Users\trrsh\AppData\Local\Temp\pip-req-build-0wkqviz5'
