In [1]:
!pip install git+https://github.com/openai/CLIP.git

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import psutil
import torch
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
from sklearn.metrics.pairwise import cosine_similarity
import clip

# Function to check memory usage
def get_memory_usage():
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / (1024 ** 3)  # Convert bytes to GB

# Load the model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)



^C


  from .autonotebook import tqdm as notebook_tqdm


Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to c:\users\aks\appdata\local\temp\pip-req-build-widtqcml
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'


  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git 'C:\Users\aks\AppData\Local\Temp\pip-req-build-widtqcml'


In [2]:
# Function to preprocess images
def load_and_preprocess_image(image_path):
    image = Image.open(image_path).convert("RGB")
    return preprocess(image).unsqueeze(0).to(device)

# Function to preprocess text
def preprocess_text(text):
    return clip.tokenize([text]).to(device)

# Paths
images_folder = r"C:\Users\aks\Desktop\zigguratss\artwork"
memory_limit_gb = 10  # Set memory limit to 10 GB

# Load your CSV file
file_path = r"C:\Users\aks\Desktop\zigguratss\output_csv_file.csv"
data = pd.read_csv(file_path)

In [4]:
# Function to preprocess a single text query and return its embedding
def get_single_text_embedding(text):
    text_input = preprocess_text(text)
    with torch.no_grad():
        text_embedding = model.encode_text(text_input)
    text_embedding /= text_embedding.norm(dim=-1, keepdim=True)
    return text_embedding.cpu().numpy()

# Function to compute ORB similarity
def orb_sim(img1, img2):
    orb = cv2.ORB_create()
    kp_a, desc_a = orb.detectAndCompute(img1, None)
    kp_b, desc_b = orb.detectAndCompute(img2, None)
    bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
    matches = bf.match(desc_a, desc_b)
    similar_regions = [i for i in matches if i.distance < 50]
    if len(matches) == 0:
        return 0
    return len(similar_regions) / len(matches)

# Function to compute SSIM
def structural_sim(img1, img2):
    sim, diff = structural_similarity(img1, img2, full=True)
    return sim

# Load images from a folder
def load_image(image_folder, image_id):
    image_path = os.path.join(image_folder, image_id)
    return cv2.imread(image_path, 0)

# Find most similar images to a text query
def find_most_similar_images_text(csv_path, image_folder, query_text, top_n=3):
    data = pd.read_csv(csv_path)
    filenames = data['image_id'].tolist()
    
    # Load and preprocess images
    images = []
    for filename in filenames:
        image_path = os.path.join(image_folder, filename)
        image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
        images.append(image)
    
    # Encode images using CLIP model
    with torch.no_grad():
        image_embeddings = torch.cat([model.encode_image(image) for image in images])
    
    # Normalize features
    image_embeddings /= image_embeddings.norm(dim=-1, keepdim=True)
    
    query_text_embedding = get_single_text_embedding(query_text)
    
    # Compute similarity scores using cosine_similarity from sklearn
    similarity_scores = cosine_similarity(image_embeddings.cpu().numpy(), query_text_embedding).squeeze()
    
    # Get indices of the top N most similar images
    top_indices = similarity_scores.argsort()[-top_n:][::-1]
    
    results = []

    # Display the most similar images and compute ORB and SSIM
    plt.figure(figsize=(15, 5))

    for i, idx in enumerate(top_indices):
        similar_image_path = os.path.join(image_folder, filenames[idx])
        similar_image = Image.open(similar_image_path).convert('L')
        img = cv2.imread(similar_image_path, 0)
        orb_similarity = orb_sim(img, img)  # Self-similarity
        if img.shape != img.shape:
            img_resized = resize(img, (img.shape[0], img.shape[1]), anti_aliasing=True, preserve_range=True).astype(img.dtype)
        else:
            img_resized = img
        ssim = structural_sim(img, img_resized)

        results.append({
            'img_id': filenames[idx],
            'orb_similarity': orb_similarity,
            'ssim': ssim
        })

        plt.subplot(1, top_n, i + 1)
        plt.imshow(similar_image)
        plt.title(f"Similar Image {i+1}\nScore: {similarity_scores[idx]:.4f}\nORB: {orb_similarity:.4f}\nSSIM: {ssim:.4f}")
        plt.axis('off')

    plt.show()

    return results

# Define paths
csv_path = r"C:\Users\aks\Desktop\zigguratss\output_csv_file.csv"
image_folder = r"C:\Users\aks\Desktop\zigguratss\artwork"

# Example usage
query_text = 'girl'
results = find_most_similar_images_text(csv_path, image_folder, query_text, top_n=3)

# Print results
for result in results:
    print(f"Image ID: {result['img_id']}")
    print(f"ORB Similarity: {result['orb_similarity']}")
    print(f"SSIM: {result['ssim']}")
    print("-" * 50)

NameError: name 'cv2' is not defined

<Figure size 1500x500 with 0 Axes>