# Assignment 1 - Building a simple image search algorithm

In [2]:
# Importing packages
import os
import cv2
import numpy as np

# Utility functions
import sys
sys.path.append(os.path.join(".."))

# Plotting tool
import matplotlib.pyplot as plt

In [15]:
# Defining function for making 
def extract_color_hist(image_path):
    """
    Extracts color histograms for a single image.

    Args:
        image_path (str): Path to the image file.

    Returns:
        numpy.ndarray: A numpy array containing histograms for each channel (red, green, blue).
    """
    
    image = cv2.imread(image_path)

    # Split channels
    channels = cv2.split(image)

    histograms = []

    for channel in channels:
        # Calculate histogram
        hist = cv2.calcHist([channel], [0], None, [256], [0, 256]).flatten()

        # Normalize histogram
        hist = cv2.normalize(hist, hist, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)

        histograms.append(hist)

    # Concatenate histograms for all channels
    histograms = np.concatenate(histograms)

    return histograms

In [25]:
# Making function for comparing histograms
def compare_histograms(target_histogram, histograms_list):
    """
    Compares the histogram of a target image to other histograms using Chi-Squared distance.

    Args:
        target_histogram (numpy.ndarray): Histogram of the target image.
        histograms_list (list): List containing tuples of (filename, histograms) for all images.

    Returns:
        list: A list of tuples where each tuple contains the filename and the Chi-Squared distance
              between the target image histogram and the histogram of each image in the dataset.
    """

    distances = []

    for filename, histogram in histograms_list:
        # Compute Chi-Squared distance
        distance = cv2.compareHist(target_histogram, histogram, cv2.HISTCMP_CHISQR)
        distances.append((filename, distance))

    return distances


In [19]:
# Defining folder path
folder_path = os.path.join("..","in", "flowers")

# Initialize an empty list to store (filename, histograms) tuples
histograms_list = []

# Iterate over each image in the folder
for image_filename in os.listdir(folder_path):
    image_path = os.path.join(folder_path, image_filename)

    # Call the function to extract color histograms for the current image
    histograms = extract_color_hist(image_path)

    # Append the filename and histograms tuple to the list
    histograms_list.append((image_filename, histograms))

In [32]:
# Defining image path
image_path = os.path.join("..","in", "flowers","image_0009.jpg")

# Using function for extracting image histogram for chosen image
hist_chosen = extract_color_hist(image_path)

In [36]:
# Comparing histograms
distances = compare_histograms(hist_chosen,histograms_list)

In [38]:
# Sort the distances list based on the distances (second element of each tuple)
sorted_distances = sorted(distances, key=lambda x: x[1])

# Get the top 5 closest histograms
top_5_closest = sorted_distances[:6]

# Print the top 5 closest histograms
for filename, distance in top_5_closest:
    print(f"Distance from {filename}: {distance}")


Distance from image_0009.jpg: 0.0
Distance from image_0408.jpg: 113.69013464591076
Distance from image_0890.jpg: 169.60291780962874
Distance from image_1247.jpg: 179.33006948297367
Distance from image_0114.jpg: 182.7166582250731
Distance from image_1323.jpg: 191.6750983593898


In [1]:
import csv

# Define the output CSV file path
output_path = os.path.join("out", "similar_images.csv")

# Write the top 5 closest histograms to the CSV file
with open(output_csv_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    
    # Write the header
    writer.writerow(["Filename", "Distance"])

    # Write the top 5 closest histograms
    for filename, distance in top_5_closest:
        writer.writerow([filename, distance])

print(f"CSV file saved to: {output_csv_path}")


NameError: name 'os' is not defined