<a href="https://colab.research.google.com/github/testgithubprecious/Ml_projects/blob/main/Image_text.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install dependencies if not already
# pip install torch torchvision matplotlib pillow openai-clip

import torch
import clip
from PIL import Image
import matplotlib.pyplot as plt

# ----- Load CLIP model -----
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

# ----- Input image -----
image_path = "example.jpg"  # Replace with your image path
image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)

# ----- Candidate text descriptions -----
text_descriptions = [
    "a dog playing in the park",
    "a cat sitting on a sofa",
    "a person riding a bike",
    "a beautiful sunset over mountains",
]

# Tokenize text
text_tokens = clip.tokenize(text_descriptions).to(device)

# ----- Encode image and text -----
with torch.no_grad():
    image_features = model.encode_image(image)
    text_features = model.encode_text(text_tokens)

    # Normalize features
    image_features /= image_features.norm(dim=-1, keepdim=True)
    text_features /= text_features.norm(dim=-1, keepdim=True)

    # Compute cosine similarity (logits)
    logits_per_image = image_features @ text_features.T
    probs = logits_per_image.softmax(dim=-1).cpu().numpy()

# ----- Display the input image -----
plt.imshow(Image.open(image_path))
plt.axis("off")
plt.title("📷 Input Image")
plt.show()

# ----- Print top matching results -----
print("🧠 Top Text Matches for Image:")
for i, (desc, prob) in enumerate(zip(text_descriptions, probs[0])):
    print(f"{i+1}. {desc} – {prob:.2%}")