In [1]:
# pip install torch torchvision transformers pillow matplotlib
!pip install ipywidgets
!jupyter nbextension enable --py widgetsnbextension


Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


In [None]:
# If an image is captioned as:

# "a man with eyeglasses reading a book"
# → It gets sorted into the with_eyeglasses folder.

# If it's:

# "a woman smiling"
# → It goes to without_eyeglasses.

import os # Imports necessary libraries for image processing, model loading, and visualization.
import torch # PyTorch is used for deep learning tasks.
from PIL import Image # Python Imaging Library for image processing.
from transformers import BlipProcessor, BlipForConditionalGeneration #Loads a pre-trained AI model to describe what's in an image (caption generation).
from tqdm.notebook import tqdm # tqdm is used for displaying progress bars in Jupyter notebooks.
import shutil # shutil is used for file operations like copying files.
import matplotlib.pyplot as plt # Matplotlib is used for image visualization.

# AI captioning (BLIP) → transformers, torch

# Image handling → PIL, shutil

# Folder management → os

# Progress display → tqdm

# Visual output → matplotlib

device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Checks if a GPU is available and sets the device accordingly.
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") # Loads the pre-trained model for image captioning.
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device) # Moves the model to the specified device (GPU or CPU).

input_folder = "images" 
with_glasses_folder = "with_eyeglasses"
without_glasses_folder = "without_eyeglasses"

os.makedirs(with_glasses_folder, exist_ok=True) # Creates the folder for images with eyeglasses if it doesn't exist.
os.makedirs(without_glasses_folder, exist_ok=True) # Creates the folder for images without eyeglasses if it doesn't exist.

with_glasses_images = []

image_files = [f for f in os.listdir(input_folder) if f.lower().endswith((".jpg", ".jpeg", ".png"))] # Lists all image files in the input folder with specified extensions.

for filename in tqdm(image_files, desc="Processing images"): # Displays a progress bar while processing images.
    image_path = os.path.join(input_folder, filename) # Constructs the full path for each image file.
    # Combines the folder name and file name into a full file path.
    # Example: "images" + "pic1.jpg" → "images/pic1.jpg"


    image = Image.open(image_path).convert("RGB") # Opens the image and converts it to RGB format.
    # **Opens the image** file.
    # - Converts it to **RGB color format** (3 color channels — Red, Green, Blue) so the model can understand it.


    inputs = processor(images=image, return_tensors="pt").to(device) # Prepares the image for the model by processing it and converting it to a tensor.
    
    # from for filename in tqdm upto inputs => Simple flow - 📁 image → 🖼 open & convert → 🧪 process → 🧠 send to model (on CPU/GPU)
    
    
    with torch.no_grad(): # Disables gradient calculation for inference to save memory and computation.
      #  Tells PyTorch:
      # “I’m only using the model, not training it.”

      # Saves memory and speeds things up.
      
      out = model.generate(**inputs) # Generates a caption for the image using the model.
      caption = processor.decode(out[0], skip_special_tokens=True).lower() # Decodes the generated caption and converts it to lowercase.
      # Converts the model’s output (token IDs) into a readable string.

      # Example:
      # Output: [101, 1037, 2158, 2084, 3899, 102]
      # Becomes: "a man with glasses"


    print(f"{filename}: Caption = {caption}") # Prints the filename and its generated caption.

    if any(word in caption for word in ["eyeglasses", "glasses", "spectacles"]): # Checks if the caption contains any keywords indicating the presence of eyeglasses.
        shutil.copy(image_path, os.path.join(with_glasses_folder, filename)) # Copies the image to the folder for images with eyeglasses.
        with_glasses_images.append(image_path) # Appends the image path to the list of images with eyeglasses.
    else:
        shutil.copy(image_path, os.path.join(without_glasses_folder, filename)) #Copies the image into the correct folder.

def show_images(image_paths, title): 
    plt.figure(figsize=(12, 6)) # Sets up a figure for displaying images.
    plt.suptitle(title, fontsize=16) # Sets the title for the figure.
    for i, path in enumerate(image_paths[:6]): # Loops through the first 6 images to display.
        img = Image.open(path) # Opens each image file.
        plt.subplot(2, 3, i + 1) # Creates a subplot for each image.
        plt.imshow(img) # Displays the image.
        plt.title(os.path.basename(path)) # Sets the title for each subplot to the image filename.
        plt.axis('off') # Turns off the axis for a cleaner look.
    plt.tight_layout() 
    plt.show()

if with_glasses_images:
    show_images(with_glasses_images, "Images with Eyeglasses")
else:
    print("No eyeglasses detected in images.")

