In [2]:
import clip
import torch
from PIL import Image
import os
import numpy as np

# Load CLIP model and preprocess function
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

# Step 1: Define a Broad Set of Potential Tags (Expand as needed)
# This set could include general categories, common objects, etc.
potential_tags = [
    "person", "woman", "man", "child", "dog", "cat", "sofa", "car", "tree", "sky",
    "flower", "shoe", "bag", "laptop", "phone", "house", "building", "animal",
    "beach", "mountain", "river", "food", "fruit", "book", "desk", "chair", "lamp",
    "bottle", "keyboard", "plant", "bird", "sunset", "rain", "street", "bicycle",
    "clouds", "sand", "road", "city", "forest", "water", "painting", "camera",
    "clothes", "glasses", "accessories", "hat", "vehicle", "computer"
]  # Add more general tags as needed

# Step 2: Function to Dynamically Generate Tags for Each Image in a Folder
def generate_dynamic_tags(image_folder):
    tags_dict = {}
    for img_file in os.listdir(image_folder):
        if img_file.lower().endswith(('jpg', 'jpeg', 'png', 'webp')):  # Ensure only images are processed
            image_path = os.path.join(image_folder, img_file)
            image = Image.open(image_path).convert("RGB")
            image_tensor = preprocess(image).unsqueeze(0).to(device)

            # Encode image using CLIP
            with torch.no_grad():
                text_inputs = clip.tokenize(potential_tags).to(device)
                logits_per_image, _ = model(image_tensor, text_inputs)
                probs = logits_per_image.softmax(dim=-1).cpu().numpy()

            # Filter tags based on a probability threshold
            threshold = 0.2  # Adjust threshold as needed to be more/less selective
            selected_tags = [potential_tags[i] for i, prob in enumerate(probs[0]) if prob > threshold]
            
            # Add the generated tags to the dictionary for each image
            tags_dict[img_file] = selected_tags
            print(f"Image: {img_file}, Tags: {selected_tags}")

    return tags_dict

# Usage: Generate tags for all images in the specified folder
image_folder = "D:/SNU/Semester VII/CSD358 Information Retrieval/Project/images"
tags_dict = generate_dynamic_tags(image_folder)
print("Generated tags for all images in the folder:", tags_dict)

Image: 1.webp, Tags: ['clothes']
Image: 2.webp, Tags: ['shoe']
Generated tags for all images in the folder: {'1.webp': ['clothes'], '2.webp': ['shoe']}
