In [40]:
import numpy as np 
from matplotlib import pyplot as plt 
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs 
import os
import cv2 
from sklearn.neighbors import BallTree
from sklearn.neighbors import NearestNeighbors
from scipy.spatial import distance

## surf feature descriptors for images kept in rootDir

In [41]:
rootDir='./images/' # give the path of folder which contains images

list_of_features=[]
for dirName, subdirList, fileList in os.walk(rootDir):
    for fname in fileList:
        image = cv2.imread(rootDir+fname)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        surf = cv2.xfeatures2d.SURF_create()
        (kps, descs) = surf.detectAndCompute(gray, None)
        
        for i in range(0,len(descs)):
            list_of_features.append(descs[i])

In [42]:
print(len(list_of_features[0]))

64


## kmeans clustering 

In [43]:
list_of_features=np.asarray(list_of_features)
n_clusters = 128;
ret =  KMeans(n_clusters = n_clusters,random_state=0,max_iter=1000,tol=0.000001)
ret.fit(list_of_features)



KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=1000,
    n_clusters=128, n_init=10, n_jobs=1, precompute_distances='auto',
    random_state=0, tol=1e-06, verbose=0)

In [44]:
print ((ret.cluster_centers_)[0])

[-2.83567677e-03 -1.37770479e-03  4.87155840e-03  2.74285022e-03
  3.22117843e-03 -1.07750930e-02  2.46662479e-02  1.73719563e-02
  5.31249046e-02 -6.44174078e-03  7.07725286e-02  3.75084728e-02
 -2.52410769e-03  1.58289960e-03  5.56176808e-03  3.41248768e-03
 -1.79791600e-02 -7.23057892e-03  2.48872489e-02  1.55861275e-02
 -8.52647424e-03 -8.60666391e-03  8.17057490e-02  6.76196516e-02
  4.58290637e-01 -3.49903181e-02  5.36710680e-01  1.60641387e-01
 -1.98856052e-02 -9.53910314e-03  3.82402502e-02  2.43780650e-02
 -8.09153821e-03  7.02414103e-03  2.18066704e-02  1.58510488e-02
  2.75222138e-02  8.02217200e-02  1.15207419e-01  1.28969520e-01
  2.78370559e-01 -5.45306467e-02  4.10054117e-01  2.67193794e-01
 -4.19792160e-03 -1.98711045e-02  4.35161442e-02  3.90344411e-02
 -1.14251464e-03  8.25572875e-04  3.63575877e-03  4.64881770e-03
 -4.54118475e-04  7.87001103e-03  2.57906634e-02  4.04500812e-02
  1.56342685e-02  1.99487619e-02  3.18058059e-02  3.74261290e-02
  1.18869601e-03 -2.17909

In [45]:
histogram_per_image=[]
my_dict = {}
count=0

for dirName, subdirList, fileList in os.walk(rootDir):
    for fname in fileList:
        my_dict[fname]=count
        count+=1
        print(fname)
        image = cv2.imread(rootDir+fname)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        surf = cv2.xfeatures2d.SURF_create()
        (kps, descs) = surf.detectAndCompute(gray, None)
        histogram = np.zeros(n_clusters)
        for i in range(0,len(descs)):
            mi=1000000000
            t=0
            for j in range(0,len(ret.cluster_centers_)):
                dist = np.linalg.norm(descs[i]-ret.cluster_centers_[j])
                

                if(mi>=dist):
                    mi=dist
                    t=j
            histogram[t]+=1

        histogram_per_image.append(histogram)



dog.jpeg
man.jpg
woman.jpg
lion.jpeg
elephant.jpeg


In [46]:
hist=[]
test_image='lion4.png'
image = cv2.imread(test_image)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
surf = cv2.xfeatures2d.SURF_create()
(kps, descs) = surf.detectAndCompute(gray, None)
histogram = np.zeros(n_clusters)
print(histogram)
for i in range(0,len(descs)):
    mi=100000000
    t=0
    for j in range(0,len(ret.cluster_centers_)):
        dist = np.linalg.norm(descs[i]-ret.cluster_centers_[j])

        
        if(mi>=dist):
            mi=dist
            t=j
    histogram[t]+=1

hist.append(histogram)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0.]


## ballTree algo for fast and exact nearest neighbour search

In [48]:

tree = BallTree(histogram_per_image)  
dist, ind = tree.query(hist, k=2)
print(ind)
print(dist)
for k in my_dict.keys():
    if my_dict[k]==ind[0][0]:
        print(k)
        break

[[3 0]]
[[87.59566199 92.09777413]]
lion.jpeg
