In [18]:
import os
import json
import nltk
from nltk.corpus import wordnet
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import torch

# Download NLTK data if not already downloaded
nltk.download("punkt")
nltk.download("averaged_perceptron_tagger")
nltk.download("wordnet")

# Load the BLIP model for captioning
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)

# File to store image captions and tags
DATA_FILE = "image_tags.json"

# Load or initialize the JSON file for storing processed images
if os.path.exists(DATA_FILE):
    with open(DATA_FILE, "r") as file:
        image_data = json.load(file)
else:
    image_data = {}

# Function to generate caption for an image
def generate_caption(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(image, return_tensors="pt").to(device)
    out = caption_model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)
    return caption

# Function to extract nouns from text
def extract_nouns(text):
    words = nltk.word_tokenize(text)
    pos_tags = nltk.pos_tag(words)
    nouns = [word for word, pos in pos_tags if pos.startswith("NN")]
    return nouns

# Function to find a large set of related words for a tag using WordNet
def expand_with_synonyms(tag, min_tags=200):
    related_words = set()
    
    # Get synonyms, hypernyms, hyponyms, and related words
    for syn in wordnet.synsets(tag):
        for lemma in syn.lemmas():
            related_words.add(lemma.name().replace('_', ' '))  # Include synonyms
        for hypernym in syn.hypernyms():
            related_words.update(lemma.name().replace('_', ' ') for lemma in hypernym.lemmas())
        for hyponym in syn.hyponyms():
            related_words.update(lemma.name().replace('_', ' ') for lemma in hyponym.lemmas())
    
    # Ensure we have a minimum of `min_tags` related words
    if len(related_words) < min_tags:
        related_words.update(list(related_words)[:min_tags - len(related_words)])
    
    return list(related_words)[:min_tags]  # Return up to `min_tags`

# Function to generate a large tag set for new images
def generate_tags_with_expansion(image_folder, min_tags=200):
    for img_file in os.listdir(image_folder):
        if img_file.lower().endswith(('jpg', 'jpeg', 'png', 'webp')):
            image_path = os.path.join(image_folder, img_file)
            
            # Skip images that are already processed and stored in the JSON file
            if img_file in image_data:
                print(f"Skipping already processed image: {img_file}")
                continue

            # Step 1: Generate caption for the image
            caption = generate_caption(image_path)
            print(f"Caption for {img_file}: {caption}")

            # Step 2: Extract nouns from the caption
            initial_tags = extract_nouns(caption)
            expanded_tags = set()

            # Step 3: Expand each noun with a large set of related words
            for tag in initial_tags:
                related_words = expand_with_synonyms(tag, min_tags=min_tags // len(initial_tags))
                expanded_tags.update(related_words)

            # Store the caption and tags in the image data dictionary
            image_data[img_file] = {
                "caption": caption,
                "tags": list(expanded_tags)
            }
            print(f"Tags for {img_file}: {len(image_data[img_file]['tags'])} tags generated.")

    # Save updated image data to the JSON file
    with open(DATA_FILE, "w") as file:
        json.dump(image_data, file, indent=4)
    print(f"Updated data saved to {DATA_FILE}")

# Function to expand the search query with related words
def expand_query(query):
    query_tokens = nltk.word_tokenize(query.lower())
    expanded_query = set()

    # Add synonyms and related words for each query word
    for word in query_tokens:
        expanded_query.add(word)
        synonyms = expand_with_synonyms(word, min_tags=20)  # Limit query expansion to 20 words per term
        expanded_query.update(synonyms)
    
    return expanded_query

# Function to retrieve images based on expanded query
def retrieve_images(query):
    expanded_query = expand_query(query)
    results = {}

    # Check each image's tags to see if they match the expanded query terms
    for img_file, data in image_data.items():
        tags = set(tag.lower() for tag in data["tags"])
        common_tags = expanded_query.intersection(tags)
        if common_tags:
            # Count of matching tags can be used as a score
            results[img_file] = len(common_tags)

    # Sort images by the number of matching tags (descending order)
    sorted_results = sorted(results.items(), key=lambda x: x[1], reverse=True)
    top_images = [img for img, score in sorted_results]
    
    return top_images

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\punya\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\punya\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\punya\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [25]:
# Usage Example
image_folder = "images"  # Replace with your actual image folder path
generate_tags_with_expansion(image_folder, min_tags=200)

# Free-text query example
query = "two men"
top_images = retrieve_images(query)
print("Top retrieved images:", top_images)

Skipping already processed image: 1.webp
Skipping already processed image: 2.webp
Skipping already processed image: 3.jpg
Skipping already processed image: 360_F_555879079_fg6dTHFT29m5B7qgWei45WbYYle3pCFA.jpg
Skipping already processed image: Designer2.png
Skipping already processed image: IMG-20230113-WA0352.jpg
Skipping already processed image: redfront.jpg
Skipping already processed image: WhatsApp Image 2022-11-03 at 17.24.45.jpeg
Updated data saved to image_tags.json
Top retrieved images: ['3.jpg', 'IMG-20230113-WA0352.jpg', '2.webp']
