<a href="https://colab.research.google.com/github/punyamsingh/IRIS/blob/Colab/IRIS_TAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install ftfy regex tqdm
! pip install git+https://github.com/openai/CLIP.git

In [None]:
! pip install torch torchvision
! pip install yolov5


In [None]:
! pip install torch torchvision transformers sentence-transformers nltk


In [1]:
import torch
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import os
import json

# Load the pre-trained BLIP model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# Function to generate long captions using BLIP
def get_image_tags(image_path):
    """Use BLIP to generate detailed captions for the image."""
    image = Image.open(image_path).convert("RGB")

    # Preprocess the image
    inputs = processor(images=image, return_tensors="pt")

    # Generate a caption with increased max_length for more verbosity
    out = model.generate(**inputs, max_length=50, num_beams=5, temperature=0.7)  # num_beams and temperature can control output quality

    # Decode the output caption (this should be more verbose)
    caption = processor.decode(out[0], skip_special_tokens=True)
    print(f"Generated Caption for {image_path}: {caption}")

    # Extract tags (split caption into words or use NLP techniques to extract key objects)
    tags = set(caption.lower().split())

    return list(tags)

# Process all images in a directory and save results to JSON
def process_directory(directory_path, output_file='image_tags.json'):
    """Process all images in a directory, generate tags using BLIP, and save to JSON file."""
    image_tags_mapping = {}

    for filename in os.listdir(directory_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_path = os.path.join(directory_path, filename)
            # Step 1: Get detailed tags from the image using BLIP
            tags = get_image_tags(image_path)
            # Step 2: Store the mapping
            image_tags_mapping[filename] = tags
            print(f"Processed {filename} with tags: {tags}")

    # Save the image-to-tags mapping to a JSON file
    with open(output_file, 'w') as f:
        json.dump(image_tags_mapping, f, indent=4)

# Example usage
process_directory('/content')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/506 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/4.56k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]



Generated Caption for /content/IMG_9350.JPG: a car driving down a dirt road with trees in the background
Processed IMG_9350.JPG with tags: ['background', 'car', 'trees', 'the', 'a', 'down', 'driving', 'with', 'dirt', 'road', 'in']
Generated Caption for /content/IMG_9344.JPG: green bushes in front of the building
Processed IMG_9344.JPG with tags: ['in', 'building', 'front', 'the', 'green', 'bushes', 'of']
Generated Caption for /content/IMG_9466.JPG: a man riding a bike down a sidewalk
Processed IMG_9466.JPG with tags: ['riding', 'man', 'a', 'sidewalk', 'down', 'bike']
Generated Caption for /content/IMG_9327.JPG: a tree in the middle of a park
Processed IMG_9327.JPG with tags: ['of', 'the', 'a', 'middle', 'park', 'tree', 'in']
Generated Caption for /content/IMG_9476.jpg: a person standing in front of a wall
Processed IMG_9476.jpg with tags: ['wall', 'of', 'front', 'a', 'person', 'in', 'standing']
Generated Caption for /content/IMG_9351.JPG: a truck driving down a dirt road with trees in 

In [2]:
import json
from collections import defaultdict

# Load the JSON file with image tags
def load_image_tags(file_path):
    """Load image tags from a JSON file."""
    with open(file_path, 'r') as f:
        image_tags_mapping = json.load(f)
    return image_tags_mapping

# Create an inverted index from the image tags JSON file
def create_inverted_index(image_tags_mapping):
    """Create an inverted index from image-to-tags mapping."""
    inverted_index = defaultdict(list)

    for image, tags in image_tags_mapping.items():
        for tag in tags:
            inverted_index[tag].append(image)

    return dict(inverted_index)  # Convert to a regular dict for easier handling

# Save the inverted index to a new JSON file
def save_inverted_index(inverted_index, output_file='inverted_index.json'):
    """Save the inverted index to a JSON file."""
    with open(output_file, 'w') as f:
        json.dump(inverted_index, f, indent=4)
    print(f"Inverted index saved to {output_file}")

# Example usage
# Step 1: Load the image-to-tags JSON file generated previously
image_tags_mapping = load_image_tags('image_tags.json')

# Step 2: Create the inverted index
inverted_index = create_inverted_index(image_tags_mapping)

# Step 3: Save the inverted index to a new JSON file
save_inverted_index(inverted_index)


Inverted index saved to inverted_index.json


In [5]:
import json
import nltk
from nltk.corpus import stopwords
from sentence_transformers import SentenceTransformer, util
import numpy as np

# Download NLTK stopwords if not already downloaded
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# Load the SentenceTransformer model for semantic similarity
model = SentenceTransformer('all-MiniLM-L6-v2')  # Lightweight and accurate for similarity tasks

# Load inverted index
def load_inverted_index(file_path='inverted_index.json'):
    """Load the inverted index from a JSON file."""
    with open(file_path, 'r') as f:
        inverted_index = json.load(f)
    return inverted_index

# Preprocess the user query
def preprocess_query(query):
    """Lowercase and remove stop words from the query."""
    words = query.lower().split()
    filtered_words = [word for word in words if word not in stop_words]
    return ' '.join(filtered_words)

# Perform query on inverted index with similarity matching
def search_images(query, inverted_index, threshold=0.5):
    """
    Search for images based on a user query.
    Combines literal matching from inverted index and semantic similarity.
    """
    processed_query = preprocess_query(query)

    # Get tags and filenames for semantic matching
    tags = list(inverted_index.keys())
    tag_filenames = {tag: inverted_index[tag] for tag in tags}

    # Encode the processed query and tags for semantic matching
    query_embedding = model.encode(processed_query, convert_to_tensor=True)
    tag_embeddings = model.encode(tags, convert_to_tensor=True)

    # Calculate cosine similarities
    cosine_scores = util.pytorch_cos_sim(query_embedding, tag_embeddings)[0]

    # Filter tags based on similarity threshold
    relevant_tags = [tags[i] for i, score in enumerate(cosine_scores) if score >= threshold]
    relevant_scores = [score.item() for score in cosine_scores if score >= threshold]

    # Aggregate images from relevant tags
    image_results = {}
    for tag, score in zip(relevant_tags, relevant_scores):
        for image in tag_filenames[tag]:
            if image not in image_results or image_results[image] < score:
                image_results[image] = score

    # Sort images by their highest similarity score
    sorted_images = sorted(image_results.items(), key=lambda x: x[1], reverse=True)

    # Display results or message if no images found
    if sorted_images:
        print("Images found:")
        for image, score in sorted_images:
            print(f"Image: {image}, Similarity Score: {score:.2f}")
        return [image for image, _ in sorted_images]  # Return only image names
    else:
        print("No relevant images found.")
        return []

# Example usage
# Step 1: Load the inverted index
inverted_index = load_inverted_index('inverted_index.json')

# Step 2: Enter a query
user_query = "Man talking on phone"

# Step 3: Search for relevant images
search_images(user_query, inverted_index, threshold=0.2)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Images found:
Image: IMG_9476.jpg, Similarity Score: 0.28
Image: IMG_9352.JPG, Similarity Score: 0.28
Image: IMG_9466.JPG, Similarity Score: 0.25
Image: IMG_9335.JPG, Similarity Score: 0.25
Image: IMG_9337.JPG, Similarity Score: 0.25
Image: IMG_9413.JPG, Similarity Score: 0.25
Image: IMG_9333.JPG, Similarity Score: 0.25
Image: IMG_9889.JPG, Similarity Score: 0.25
Image: IMG_9332.JPG, Similarity Score: 0.25
Image: IMG_9350.JPG, Similarity Score: 0.20


['IMG_9476.jpg',
 'IMG_9352.JPG',
 'IMG_9466.JPG',
 'IMG_9335.JPG',
 'IMG_9337.JPG',
 'IMG_9413.JPG',
 'IMG_9333.JPG',
 'IMG_9889.JPG',
 'IMG_9332.JPG',
 'IMG_9350.JPG']