In [2]:
import os
import nltk
import torch
import clip
from PIL import Image
from nltk.corpus import wordnet
from transformers import BlipProcessor, BlipForConditionalGeneration
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Download NLTK data for POS tagging and WordNet if not already downloaded
nltk.download("punkt")
nltk.download("averaged_perceptron_tagger")
nltk.download("wordnet")

# Load CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

# Load BLIP model for captioning
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
caption_model = BlipForConditionalGeneration.from_pretrained(
    "Salesforce/blip-image-captioning-base"
).to(device)


# Function to generate caption for an image
def generate_caption(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(image, return_tensors="pt").to(device)
    out = caption_model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)
    return caption


# Function to extract nouns from text
def extract_nouns(text):
    words = nltk.word_tokenize(text)
    pos_tags = nltk.pos_tag(words)
    nouns = [word for word, pos in pos_tags if pos.startswith("NN")]
    return nouns


# Function to find synonyms of a word using WordNet
def find_synonyms(word):
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(
                lemma.name().replace("_", " ")
            )  # Replace underscores with spaces for readability
    return list(synonyms)


# Function to generate tags for an image and expand tags with synonyms
def generate_tags_with_synonyms(image_folder):
    tags_dict = {}

    for img_file in os.listdir(image_folder):
        if img_file.lower().endswith(("jpg", "jpeg", "png", "webp")):
            image_path = os.path.join(image_folder, img_file)

            # Step 1: Generate caption for the image
            caption = generate_caption(image_path)
            print(f"Caption for {img_file}: {caption}")

            # Step 2: Extract nouns from the caption
            initial_tags = extract_nouns(caption)
            expanded_tags = set()

            # Step 3: Expand each noun with synonyms and add to tag set
            for tag in initial_tags:
                expanded_tags.add(tag)  # Include the original noun
                synonyms = find_synonyms(tag)
                expanded_tags.update(synonyms)  # Add synonyms to the tag set

            tags_dict[img_file] = list(expanded_tags)
            print(f"Tags for {img_file}: {tags_dict[img_file]}")

    return tags_dict


# Run the tagging process on your image folder
image_folder = "images"  # Replace with your image folder path
tags_dict = generate_tags_with_synonyms(image_folder)

# Display tags for each image
for img_file, tags in tags_dict.items():
    print(f"Image: {img_file}, Tags: {tags}")

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\punya\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\punya\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\punya\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Caption for 1.webp: two women in red and yellow dresses posing for the camera
Tags for 1.webp: ['fair sex', 'lop', 'tog', 'get dressed', 'cleaning woman', 'groom', 'preen', 'camera', 'charwoman', 'habilitate', 'clothes', 'dress out', 'cut back', 'primp', 'clip', 'garment', 'coif', 'garnish', 'plume', 'coiffure', 'women', 'char', 'dress', 'frock', 'clothe', 'adult female', 'dresses', 'snip', 'raiment', 'garb', 'apparel', 'line up', 'dress up', 'television camera', 'prune', 'crop', 'curry', 'attire', 'fit out', 'woman', 'coiffe', 'set', 'arrange', 'trim', 'enclothe', 'photographic camera', 'wearing apparel', 'decorate', 'tv camera', 'do', 'womanhood', 'cleaning lady']
Caption for 2.webp: a pair of white shoes on a blue and pink background
Tags for 2.webp: ['distich', 'racy', 'couplet', 'blue angel', 'dyad', 'tap', 'span', 'grim', 'amobarbital sodium', 'naughty', 'scope', 'downhearted', 'pink', 'background signal', 'mate', 'blueness', 'rap', 'gloomy', 'desktop', 'play down', 'pinko', 'duo