## Find nearest neighbours and find cosine similarity

In [1]:
from sklearn.neighbors import NearestNeighbors
import cv2
from matplotlib import pyplot as plt
import numpy as np
from numpy.linalg import norm
from skimage.transform import resize
import glob, os, random

def HOG_CV(image):
    
    winSize = (28,28)
    blockSize = (4,4)
    blockStride = (2,2)
    cellSize = (2,2)
    nbins = 9
    derivAperture = 1
    winSigma = 4.
    histogramNormType = 0
    L2HysThreshold = 2.0000000000000001e-01
    gammaCorrection = 0
    nlevels = 64
    hog = cv2.HOGDescriptor(winSize,blockSize,blockStride,cellSize,nbins,derivAperture,winSigma,
                            histogramNormType,L2HysThreshold,gammaCorrection,nlevels)
    #compute(img[, winStride[, padding[, locations]]]) -> descriptors
    winStride = (8,8)
    padding = (8,8)
    locations = ((10,20),)
    hist = hog.compute(image,winStride,padding,locations)
    return hist


def find_hog(image):
    #image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image= image.astype(np.uint8)
    hist_cv = HOG_CV(image)
    return hist_cv

## Create training and testing data

In [2]:
data = glob.glob(str(os.getcwd())+"\images\images\*")
random.shuffle(data)
test_data = data[:500]
train_data = data[501:]

## Find discriptors and run KNN

In [3]:
from tqdm.notebook import tqdm
#print (os.getcwd())
#os.chdir("images")
limit = len(train_data)
dic_hog = {}
bins_list = []
for file in tqdm(train_data):
    a= find_hog(cv2.imread(file))
    dic_hog[file] = [x[0] for x in a]
    bins_list.append(dic_hog[file])
    if len(bins_list) == limit:
        break
        
N_Wanted = 10 #Number of neighbours wanted
bins_list = np.array(bins_list)
bins_list = np.reshape(bins_list,(limit,len(bins_list[0])))
nbrs = NearestNeighbors(n_neighbors=N_Wanted, algorithm='ball_tree').fit(bins_list)

HBox(children=(FloatProgress(value=0.0, max=43940.0), HTML(value='')))

In [51]:
from skimage.measure import compare_nrmse
from sklearn.metrics.pairwise import cosine_similarity

def find_img_cosine(path_img_1, path_img_2):
    img1 = cv2.imread(path_img_1)
    img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
    img1 = np.reshape(img1,(len(img1)*len(img1[0]),1))
    img1 = [x[0] for x in img1]
    
    img2 = cv2.imread(path_img_2)
    img2 =  cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
    img2 = np.reshape(img2,(len(img2)*len(img2[0]),1))
    img2 = [x[0] for x in img2]
    
    try:
        return [cosine_similarity([img1],[img2])[0][0]]
    except:
        print ("Different Dimensions, return empty array")
        return []
    
test_descriptor_mean_cosine = []
test_image_cosine = []
for test in tqdm(test_data):
    hogs = find_hog(cv2.imread(test))
    hogs = [x[0] for x in hogs]
    distances, indexes = nbrs.kneighbors([hogs])
    test_img = cv2.imread(test)
    local_cos_sim = []
    local_image_cosine = []
    for i in range(N_Wanted):
        nearest = list(dic_hog.keys())[list(indexes)[0][i]]
        local_cos_sim.append(cosine_similarity([hogs],[dic_hog[nearest]])[0][0])
        local_image_cosine+=find_img_cosine(test,nearest)
    test_descriptor_mean_cosine.append(np.mean(local_cos_sim)) 
    test_image_cosine.append(np.mean(local_image_cosine))
    #print ('\n\n\n')
    
print ("Global Mean Cosine Similarity Descriptor is :", np.mean(test_descriptor_mean_cosine))
print ("Global Mean Cosine Similarity Image for is :", np.mean(test_image_cosine))

HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))

Different Dimensions, return empty array
Different Dimensions, return empty array
Different Dimensions, return empty array

Global Mean Cosine Similarity Descriptor is : 0.6779431
Global Mean Cosine Similarity Image for is : 0.9734942164468434
