In [1]:
import os
import numpy as np
import cv2

# Function to calculate MSE (Mean Squared Error)
def mean_squared_error(image1, image2):
    return np.sum((image1 - image2) ** 2) / float(image1.shape[0] * image1.shape[1])

# Function to calculate NCC (Normalized Cross-Correlation)
def normalized_cross_correlation(image1, image2):
    # Flatten both images to 1D arrays
    image1 = image1.flatten()
    image2 = image2.flatten()
    
    # Calculate the means
    mean1 = np.mean(image1)
    mean2 = np.mean(image2)
    
    # Calculate the numerator and denominator for NCC
    numerator = np.sum((image1 - mean1) * (image2 - mean2))
    denominator = np.sqrt(np.sum((image1 - mean1) ** 2) * np.sum((image2 - mean2) ** 2))
    
    # Return the NCC value
    return numerator / denominator if denominator != 0 else 0

# Function to compare all images in a folder to a template
def find_best_match(template_path, image_folder, metric='mse'):
    # Load the template image
    template = cv2.imread(template_path, cv2.IMREAD_GRAYSCALE)
    image_scores = []  # List to hold tuples of (image_path, score)

    # Get all image files from the folder (case insensitive check for extensions)
    image_paths = [
        os.path.join(image_folder, f) for f in os.listdir(image_folder) 
        if f.lower().endswith(('.png', '.jpg', '.jpeg'))
    ]
    
    if not image_paths:
        print("No valid image files found in the folder!")
        return []

    # Compare each image in the folder with the template
    for path in image_paths:
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            print(f"Skipping invalid image: {path}")
            continue  # Skip invalid images that couldn't be read
        
        if metric == 'mse':
            score = mean_squared_error(template, img)  # Calculate MSE
        elif metric == 'ncc':
            score = normalized_cross_correlation(template, img)  # Calculate NCC
        else:
            print(f"Unknown metric: {metric}")
            continue
        
        image_scores.append((path, score))

    # Sort the images by score in ascending order for MSE, descending for NCC
    image_scores.sort(key=lambda x: x[1], reverse=(metric == 'ncc'))

    return image_scores

if __name__ == "__main__":
    images_folder = r"C:\Users\MAINAK\Desktop\opencv\dataset\charts"
    reference_image = r"C:\Users\MAINAK\Desktop\opencv\dataset\charts\page.png"

    # Choose the metric: 'mse' for Mean Squared Error or 'ncc' for Normalized Cross-Correlation
    image_matches = find_best_match(reference_image, images_folder, metric='mse')

    # Display all images and their similarity scores sorted
    if image_matches:
        for match, score in image_matches:
            print(f"Image: {match} - Similarity score: {score:.2f}")
    else:
        print("No valid images found for comparison.")


Image: C:\Users\MAINAK\Desktop\opencv\dataset\charts\page.png - Similarity score: 0.00
Image: C:\Users\MAINAK\Desktop\opencv\dataset\charts\page2.png - Similarity score: 0.00
Image: C:\Users\MAINAK\Desktop\opencv\dataset\charts\page3.png - Similarity score: 1.52
Image: C:\Users\MAINAK\Desktop\opencv\dataset\charts\nc_page_67.png - Similarity score: 15.56
Image: C:\Users\MAINAK\Desktop\opencv\dataset\charts\nc_page_82.png - Similarity score: 15.57
Image: C:\Users\MAINAK\Desktop\opencv\dataset\charts\nc_page_93.png - Similarity score: 15.59
Image: C:\Users\MAINAK\Desktop\opencv\dataset\charts\nc_page.png - Similarity score: 15.61
