In [1]:
import numpy as np
import cv2
import os
from scipy import ndimage
from scipy.spatial import distance
from sklearn.cluster import KMeans

import time

In [2]:
filepath = os.path.join('benchmark_data', '20231106-182012')
filepath

'benchmark_data\\20231106-182012'

In [3]:
def load_images_from_folder(folder):
    images = {}
    img_files = os.listdir(folder)
    for image_file in img_files:
        image_path = os.path.join(folder, image_file)
        if image_path == os.path.join(filepath, '676_-1.999885817130722_30.978629028113254_115.png'):
            print("skipped one image so we can test with it")
            continue
        if os.path.isfile(image_path) and image_file.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
            img = cv2.imread(image_path)
            if img is not None:
                images[image_file]=img
    return images

In [4]:
images = load_images_from_folder(filepath)

skipped one image so we can test with it


In [5]:
def sift_features(images):
    descriptor_list = []
    image_to_descriptors = {}
    sift = cv2.SIFT_create()

    for key, img in images.items():
        _kp, des = sift.detectAndCompute(img, None)
        if des is not None:
            descriptor_list.extend(des)
            image_to_descriptors[key] = des
    return descriptor_list, image_to_descriptors

In [6]:
start_time = time.time()

descriptor_list, image_to_descriptors = sift_features(images) 

elapsed_time = time.time() - start_time

print(f"Took {elapsed_time}s detect features from {len(images)}; found {len(descriptor_list)} descriptors")

Took 47.087950468063354s detect features from 4199; found 1077840 descriptors


In [7]:
def unsupervised_kmeans(k, descriptor_list):
    kmeans = KMeans(n_clusters=k)
    kmeans.fit(descriptor_list)
    visual_words = kmeans.cluster_centers_
    return visual_words

In [8]:
num_clusters = 150

start_time = time.time()

visual_words_unsupervised = unsupervised_kmeans(num_clusters, descriptor_list)

elapsed_time = time.time() - start_time
print(f"Took {elapsed_time}s to group {len(descriptor_list)} descriptors from {len(images)} into {num_clusters} clusters")

  super()._check_params_vs_input(X, default_n_init=10)


Took 701.6489021778107s to group 1077840 descriptors from 4199 into 150 clusters


In [9]:
def calculate_histogram(descriptors, visual_words):
    # Create an array to store the histogram
    histogram = np.zeros(len(visual_words))

    for descriptor in descriptors:
        # Find the nearest visual word for the descriptor
        nearest_word = np.argmin(np.linalg.norm(visual_words - descriptor, axis=1))
        # Increment the corresponding bin in the histogram
        histogram[nearest_word] += 1

    return histogram



In [10]:
start_time = time.time()

# Calculate the target image's histogram using unsupervised visual words
image_to_histogram = {}
for key, desc in image_to_descriptors.items():
  hist = calculate_histogram(desc, visual_words_unsupervised)
  image_to_histogram[key] = hist

elapsed_time = time.time() - start_time
print(f"Took {elapsed_time}s to calculate histograms for {len(images)} training images")

image_to_histogram




Took 43.549728870391846s to calculate histograms for 4199 training images


{'10001_65.83862105342794_55.90920360993461_94.png': array([2., 0., 1., 0., 2., 0., 0., 1., 1., 0., 1., 6., 0., 4., 1., 0., 8.,
        3., 0., 0., 0., 1., 5., 1., 1., 1., 3., 1., 2., 2., 0., 6., 0., 2.,
        0., 1., 0., 1., 3., 2., 1., 1., 1., 0., 0., 1., 2., 0., 1., 1., 0.,
        2., 0., 3., 5., 3., 0., 1., 0., 0., 6., 0., 2., 0., 0., 0., 2., 0.,
        0., 0., 2., 2., 1., 2., 3., 0., 4., 0., 1., 0., 1., 2., 1., 2., 2.,
        2., 0., 1., 1., 2., 1., 0., 0., 1., 1., 0., 1., 2., 3., 0., 2., 1.,
        0., 3., 2., 1., 1., 0., 0., 3., 0., 8., 2., 0., 5., 1., 2., 2., 4.,
        2., 0., 1., 2., 3., 0., 1., 2., 0., 2., 0., 0., 1., 3., 0., 0., 1.,
        1., 4., 1., 0., 0., 4., 0., 3., 7., 3., 2., 0., 0., 4.]),
 '10006_65.83862105342794_55.90920360993461_89.png': array([2., 0., 4., 0., 3., 0., 0., 1., 0., 0., 1., 4., 1., 3., 0., 2., 6.,
        2., 1., 0., 0., 0., 9., 0., 1., 0., 1., 4., 1., 2., 0., 1., 2., 1.,
        0., 5., 2., 1., 2., 2., 2., 1., 2., 1., 0., 0., 3., 1., 0., 0.

In [11]:
def match_target_image(target_histogram, dataset_histograms):
    best_match = None
    min_distance = float('inf')

    for key, histogram in dataset_histograms.items():
        distance = np.linalg.norm(target_histogram - histogram)
        if distance < min_distance:
            min_distance = distance
            best_match = key

    return best_match

In [13]:
query_image_path = os.path.join(filepath, '676_-1.999885817130722_30.978629028113254_115.png')
query_img = cv2.imread(query_image_path)
sift = cv2.SIFT_create()
_kp, desc = sift.detectAndCompute(query_img, None)
query_hist = calculate_histogram(desc, visual_words_unsupervised)
query_hist


array([ 1.,  1.,  0.,  1.,  0.,  5.,  5.,  1.,  1.,  0.,  1.,  6.,  2.,
        3.,  1.,  1.,  2.,  0.,  0.,  3.,  1.,  2.,  8.,  4.,  2.,  1.,
        2.,  1.,  4.,  0.,  2.,  0.,  1.,  4.,  2.,  1.,  2.,  0.,  4.,
        4.,  5.,  0.,  1.,  3.,  0.,  2.,  0.,  1.,  0.,  0.,  3.,  1.,
        0.,  4.,  3.,  2.,  1.,  3.,  0.,  6.,  0.,  0.,  1.,  0.,  1.,
        5.,  1.,  1.,  0.,  2.,  0.,  4.,  2.,  5.,  2.,  0.,  1.,  0.,
        3.,  1.,  6.,  0.,  0.,  3.,  2.,  1.,  5.,  2.,  6.,  1.,  0.,
        3.,  1.,  6.,  0.,  0.,  0.,  0.,  0.,  0.,  7.,  3.,  2.,  0.,
        0.,  0.,  3.,  0.,  3.,  2.,  1.,  1.,  1.,  2.,  2.,  1.,  0.,
        5.,  2.,  0.,  4.,  4., 11.,  1.,  0.,  1.,  1.,  0.,  2.,  0.,
        5.,  9.,  6.,  1.,  1.,  3.,  0.,  2.,  9.,  2.,  7.,  0.,  1.,
        3.,  1.,  1.,  2.,  1.,  0.,  2.])

In [14]:
match = match_target_image(query_hist, image_to_histogram)
match

'701_-1.999885817130722_30.978629028113254_115.png'