In [1]:
import os
from shutil import copyfile
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from scipy.spatial.distance import cosine
import numpy as np

In [2]:
from google.colab import drive
drive.mount("/Similar_IMG_search/")

Mounted at /Similar_IMG_search/


In [3]:

# Load VGG16 model without the top classification layer
model = VGG16(weights='imagenet', include_top=False)

def extract_features(image_path, model):
    # Load and preprocess the image
    img = load_img(image_path, target_size=(224, 224))
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)

    # Extract features
    features = model.predict(img_array)
    return features.flatten()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [4]:
def find_top_similar_images(query_img_path, folder_path, output_folder, top_n=20):
    # Extract features of the query image
    query_features = extract_features(query_img_path, model)

    # Store distances for all images
    similarity_scores = []

    # Loop through all images in the folder
    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)

        # Extract features of the image
        try:
            img_features = extract_features(file_path, model)

            # Compute cosine distance
            distance = cosine(query_features, img_features)
            similarity_scores.append((file_path, distance))
        except:
            continue

    # Sort by distance (lower distance means more similar)
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1])

    # Get top N similar images
    top_similar_images = similarity_scores[:top_n]

    # Save the top similar images to the specified output folder with renamed filenames
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)  # Create the folder if it doesn't exist

    for idx, (img_path, distance) in enumerate(top_similar_images):
        # Format the new filename to include cosine similarity
        new_filename = f"{idx + 1:02d}_{distance:.4f}.jpg"
        output_path = os.path.join(output_folder, new_filename)
        copyfile(img_path, output_path)

    return top_similar_images

In [5]:
# Example usage
query_image = "/Similar_IMG_search/MyDrive/Find_similarity/014.jpg"  # Path to the query image
image_folder = "/Similar_IMG_search/MyDrive/Find_similarity/ascending_cluster/cluster_0/"  # Folder containing images
output_folder = "/Similar_IMG_search/MyDrive/Find_similarity/output_searched_Img/"  # Folder to save the result

top_images = find_top_similar_images(query_image, image_folder, output_folder, top_n=20)

# Print the results
print("Top 20 similar images:")
for img_path, distance in top_images:
    print(f"Image: {img_path} (Distance: {distance})")
print(f"Top 20 images saved to: {output_folder}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 809ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 639ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 713ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 718ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 614ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 694ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 992ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 917ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 521ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 544ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 537ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 523ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 532ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 