In [1]:
import sqlite3

In [2]:
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
# connect to the SQLite database (or create it if it doesn't exist)
conn = sqlite3.connect('image_metadata.db')
c = conn.cursor()

In [4]:
# get image metadata from the database
c.execute("SELECT file_path, file_name, caption, tags FROM images")  
data = c.fetchall()

In [5]:
# separate the file_paths, file_names, captions, and tags
file_paths = [row[0] for row in data]
file_names = [row[1] for row in data]
captions = [row[2] for row in data]
tags = [row[3] for row in data]

In [6]:
# combine captions and tags into a single list of documents
documents = [f"{caption} {tag}" for caption, tag in zip(captions, tags)]

In [7]:
# create a TfidfVectorizer and fit it to the documents
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(documents)

In [25]:
def find_similar_images(query, threshold=0.05):
    # vectorize the query
    query_vec = vectorizer.transform([query])
    
    # calculate cosine similarities between the query and the documents
    similarities = cosine_similarity(query_vec, X).flatten()
    
    # get indices of images above the threshold
    above_threshold_indices = [i for i, score in enumerate(similarities) if score > threshold]
    
    # print out the file paths of the images above the threshold
    for index in above_threshold_indices:
        print(f"File name: {file_names[index]}, File path: {file_paths[index]}, Similarity: {similarities[index]}")

In [27]:
# prompt the user to enter a query and run the similarity search
query = "man"
find_similar_images(query)

File name: laion2b_en_part_00000_000007.png, File path: D:\AIML\gimmick\Scene-Sense\test100\laion2b_en_part_00000_000007.png, Similarity: 0.18630055746048146
File name: laion2b_en_part_00000_000438.png, File path: D:\AIML\gimmick\Scene-Sense\test100\laion2b_en_part_00000_000438.png, Similarity: 0.1260450349713539
File name: laion2b_en_part_00000_000940.png, File path: D:\AIML\gimmick\Scene-Sense\test100\laion2b_en_part_00000_000940.png, Similarity: 0.30796071456718993
File name: laion2b_en_part_00000_000998.png, File path: D:\AIML\gimmick\Scene-Sense\test100\laion2b_en_part_00000_000998.png, Similarity: 0.2462405840188553
File name: laion2b_en_part_00001_000036.png, File path: D:\AIML\gimmick\Scene-Sense\test100\laion2b_en_part_00001_000036.png, Similarity: 0.22640848782745157
File name: laion2b_en_part_00001_000203.png, File path: D:\AIML\gimmick\Scene-Sense\test100\laion2b_en_part_00001_000203.png, Similarity: 0.13894008234869507
File name: laion2b_en_part_00001_000246.png, File path