In [2]:
import pickle
import random
from PIL import Image, ImageStat
from torchvision.transforms import transforms
import os
import numpy as np
import cv2
from tqdm.notebook import tqdm

h,w = 134,100
# h,w = 1333, 1000
transform = transforms.Resize((h,w))
images_path = 'D:\My Docs/University\Applied Data Science\Project/uob_image_set'

In [5]:
def full_path(dir_name):
    image_folder = os.path.join(images_path, dir_name)

    img_name = os.listdir(image_folder)[0]
    return os.path.join(image_folder, img_name)


def get_images():
    chosen = os.listdir(images_path)

    imgs = [Image.open(full_path(k)) for k in tqdm(chosen)]
    imgs = [transform(i) for i in tqdm(imgs)]
    print("LOADED")
    return np.array(imgs)

def get_fft(img):

    gray = np.array(img.convert("LA"))
    dft = cv2.dft(np.float32(gray), flags=cv2.DFT_COMPLEX_OUTPUT)
    dft_shift = np.fft.fftshift(dft)

    magnitude_spectrum = 20 * np.log(cv2.magnitude(dft_shift[:, :, 0], dft_shift[:, :, 1]))
    return magnitude_spectrum

def get_closest(images, fft_diff, idx, k):
    images = np.array(images)
    row = fft_diff[idx]
    k_smallest_idx = np.argsort(row)[1:k+1]

    return images[k_smallest_idx]

def showImages(images):

    dst = Image.new('RGB', (len(images) * w, h))
    x = 0
    y = 0
    for i in images:
        dst.paste(i, (x,y))
        x+= w

    dst.show()


In [6]:
def get_fourier_matrix(images):
    fft_images = [get_fft(i) for i in tqdm(images)]
    n = len(images)
    fft_diff = np.zeros((n,n))

    for i in tqdm(range(0, n)):
        for j in range(0, i):

            if i != j:
                fft_1 = fft_images[i]
                fft_2 = fft_images[j]

                diff = np.sum(abs(fft_1 - fft_2))
                fft_diff[i][j] = diff
                fft_diff[j][i] = diff

    return fft_diff

In [7]:
def get_col(image):
    return ImageStat.Stat(image).median

def get_colour_matrix(images):
    col_images = [get_col(i) for i in tqdm(images)]
    n = len(images)
    col_diff = np.zeros((n,n))

    for i in tqdm(range(0, n)):
        for j in range(0, i):

            if i != j:
                col_1 = np.array(col_images[i])
                col_2 = np.array(col_images[j])

                diff = np.sum(abs(col_1 - col_2))
                col_diff[i][j] = diff
                col_diff[j][i] = diff

    return col_diff


In [8]:
images = get_images()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  imgs = [Image.open(full_path(k)) for k in tqdm(chosen)]
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  imgs = [transform(i) for i in tqdm(imgs)]
  return np.array(imgs)
  return np.array(imgs)


  0%|          | 0/1500 [00:00<?, ?it/s]

  0%|          | 0/1500 [00:00<?, ?it/s]

LOADED


In [9]:
fft_diff_matrix = get_fourier_matrix(images)
col_diff_matrix = get_colour_matrix(images)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  fft_images = [get_fft(i) for i in tqdm(images)]
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i in tqdm(range(0, n)):
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  col_images = [get_col(i) for i in tqdm(images)]
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i in tqdm(range(0, n)):


  0%|          | 0/1500 [00:00<?, ?it/s]

  0%|          | 0/1500 [00:00<?, ?it/s]

  0%|          | 0/1500 [00:00<?, ?it/s]

  0%|          | 0/1500 [00:00<?, ?it/s]

In [10]:
def get_measures(w_matrix):
    w_matrix = w_matrix.flatten()
    above_zero_idxs = np.where(w_matrix > 0)
    above_zero = w_matrix[above_zero_idxs]
    min_w = np.amin(above_zero)
    max_w = np.amax(above_zero)
    average = np.mean(above_zero)
    # variance = np.var(above_zero)
    measures = [min_w, max_w, average]
    return measures

fft_min, fft_max, fft_mean = get_measures(fft_diff_matrix)
col_min, col_max, col_mean = get_measures(col_diff_matrix)
transformed_fft = (fft_diff_matrix - fft_min) / (fft_max - fft_min)
transformed_col = (col_diff_matrix - col_min) / (col_max - col_min)
print(fft_min,fft_max, fft_mean)
print(col_min,col_max, col_mean)
print(get_measures(transformed_col))
print(get_measures(transformed_fft))

159024.09375 996289.625 280059.3180783161
1.0 745.0 305.6397282233402
[0.0013440860215053765, 1.0, 0.4103231648787418]
[0.0019714661160548045, 1.0, 0.1445602708693262]


In [19]:
error_matrix = 3 * transformed_fft + transformed_col
idx = random.randint(0, 1500)
chosen_img = images[idx]
closest = get_closest(images, error_matrix, idx, 10)
showImages([chosen_img] + list(closest))

