In [10]:
# Refer https://medium.com/machine-learning-world/feature-extraction-and-similar-image-search-with-opencv-for-newbies-3c59796bf774
import numpy as np
import urllib
import cv2
import os
import pickle
import random
import scipy
from scipy.misc import imread
import matplotlib.pyplot as plt
%matplotlib inline

In [11]:
base_globalpath = '/visual/folder/'
base_querypath = '/visual/query_images/'

In [12]:
def get_valid_image_files(image_path):
    files = []
    for p in sorted(os.listdir(image_path)):
            if p.endswith(".jpg"): 
                files.append(image_path+"/"+p)
    return files
    

In [16]:
def extract_features(image_path, vector_size=32):
   
    image = imread(image_path, mode="RGB")
    try:
#     Using KAZE
        alg = cv2.KAZE_create()
#     Using SIFT
#         alg = cv2.xfeatures2d.SIFT_create()
#     Using SURF
#         alg = cv2.xfeatures2d.SURF_create()
        # Dinding image keypoints
        kps = alg.detect(image)
        # Getting first 32 of them. 
        # Number of keypoints is varies depend on image size and color pallet
        # Sorting them based on keypoint response value(bigger is better)
        kps = sorted(kps, key=lambda x: -x.response)[:vector_size]
        # computing descriptors vector
        kps, dsc = alg.compute(image, kps)
        # Flatten all of them in one big vector - our feature vector
        dsc = dsc.flatten()
        # Making descriptor of same size
        # Descriptor vector size is 64
        needed_size = (vector_size * 64)
        if dsc.size < needed_size:
            # if we have less the 32 descriptors then just adding zeros at the
            # end of our feature vector
            dsc = np.concatenate([dsc, np.zeros(needed_size - dsc.size)])
    except cv2.error as e:
        print('Error: ', e)
        return None

    return dsc


def batch_extractor(images_path, pickled_db_path=base_globalpath+'features.pck'):
    files = get_valid_image_files(images_path)

    result = {}
    for f in files: 
#         print('Extracting features from image %s' % f)
        name = f.split('/')[-1].lower()
        result[name] = extract_features(f)
        print(result)
    # saving all our feature vectors in pickled file
    with open(pickled_db_path, 'wb') as fp:
        pickle.dump(result, fp)

In [17]:
class Matcher(object):

    def __init__(self, pickled_db_path=base_globalpath+'features.pck'):
        with open(pickled_db_path,"rb") as fp:
            self.data = pickle.load(fp)
        self.names = []
        self.matrix = []
        for k, v in self.data.items():
            self.names.append(k)
            self.matrix.append(v)
        self.matrix = np.array(self.matrix)
        self.names = np.array(self.names)

    def cos_cdist(self, vector):
        # getting cosine distance between search image and images database
        v = vector.reshape(1, -1)
        return scipy.spatial.distance.cdist(self.matrix, v, 'cosine').reshape(-1)
#     distance.cdist(ref_1d[numpy.newaxis, :], query_2d)
    
#     def nearest_neighbors(values, all_values, nbr_neighbors=10):
#     nn = NearestNeighbors(nbr_neighbors, metric='cosine', algorithm='brute').fit(all_values)
#     dists, idxs = nn.kneighbors(values)

    def match(self, image_path, topn=5):
        features = extract_features(image_path)
        img_distances = self.cos_cdist(features)
        # getting top 5 records
        nearest_ids = np.argsort(img_distances)[:topn].tolist()
        nearest_img_paths = self.names[nearest_ids].tolist()
        return nearest_img_paths, img_distances[nearest_ids].tolist()

In [18]:

def show_img(path):
    img =imread(path, mode="RGB")
    plt.imshow(img)
    plt.show()

def run():
#     Query images to test
#     query_path = base_querypath
    category = 'all'
#     images_path = []
#     for category in categoryList
    images_path = base_globalpath+category
    files = get_valid_image_files(images_path)
#     Predicted dataset
#     for f in files:
#         print(f)
#     images_path = base_globalpath+category
#     Store the features of the predicted dataset in the dataset
    batch_extractor(images_path)
    
    ma = Matcher(base_globalpath+'features.pck')
    
    for s in files:
       
        print('Query image ==========================================')
        show_img(s)
        names, match = ma.match(s, topn=5)
        print('Result images ========================================')
        for i in range(5):
            # we got cosine distance, less cosine distance between vectors
            # more they similar, thus we subtruct it from 1 to get match value
            print('Match %s' % (1-match[i]) )
            if names[i].endswith(".jpg"): 
                show_img(images_path+"/"+names[i])

run()

`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  This is separate from the ipykernel package so we can avoid doing imports until


{'18959_comsumer_03.jpg': array([ 0.08627279,  0.01172121,  0.08972239, ..., -0.10453288,
        0.05046642,  0.10882016], dtype=float32)}
{'18959_comsumer_03.jpg': array([ 0.08627279,  0.01172121,  0.08972239, ..., -0.10453288,
        0.05046642,  0.10882016], dtype=float32), '18964_comsumer_02.jpg': array([-0.00239559, -0.00965965,  0.01038153, ...,  0.05233049,
        0.06544095,  0.17841244], dtype=float32)}
{'18959_comsumer_03.jpg': array([ 0.08627279,  0.01172121,  0.08972239, ..., -0.10453288,
        0.05046642,  0.10882016], dtype=float32), '18964_comsumer_02.jpg': array([-0.00239559, -0.00965965,  0.01038153, ...,  0.05233049,
        0.06544095,  0.17841244], dtype=float32), '18970_comsumer_03.jpg': array([0.00870385, 0.01287152, 0.06038076, ..., 0.21215613, 0.04619482,
       0.2966293 ], dtype=float32)}
{'18959_comsumer_03.jpg': array([ 0.08627279,  0.01172121,  0.08972239, ..., -0.10453288,
        0.05046642,  0.10882016], dtype=float32), '18964_comsumer_02.jpg': arra

{'18959_comsumer_03.jpg': array([ 0.08627279,  0.01172121,  0.08972239, ..., -0.10453288,
        0.05046642,  0.10882016], dtype=float32), '18964_comsumer_02.jpg': array([-0.00239559, -0.00965965,  0.01038153, ...,  0.05233049,
        0.06544095,  0.17841244], dtype=float32), '18970_comsumer_03.jpg': array([0.00870385, 0.01287152, 0.06038076, ..., 0.21215613, 0.04619482,
       0.2966293 ], dtype=float32), '18974_comsumer_03.jpg': array([ 0.00897055, -0.00632166,  0.10848335, ..., -0.48856297,
        0.1927459 ,  0.5071647 ], dtype=float32), '18980_comsumer_03.jpg': array([-0.00321387,  0.01427927,  0.03294564, ..., -0.06479373,
        0.08860894,  0.15339503], dtype=float32), '18984_shop_02.jpg': array([-0.05382007,  0.05487039,  0.06864368, ...,  0.04951368,
        0.05732642,  0.04968405], dtype=float32), '194_comsumer_03.jpg': array([ 0.04465114,  0.00654576,  0.07107775, ..., -0.03807499,
        0.01496757,  0.05396954], dtype=float32), '201_comsumer_03.jpg': array([-0.04151

{'18959_comsumer_03.jpg': array([ 0.08627279,  0.01172121,  0.08972239, ..., -0.10453288,
        0.05046642,  0.10882016], dtype=float32), '18964_comsumer_02.jpg': array([-0.00239559, -0.00965965,  0.01038153, ...,  0.05233049,
        0.06544095,  0.17841244], dtype=float32), '18970_comsumer_03.jpg': array([0.00870385, 0.01287152, 0.06038076, ..., 0.21215613, 0.04619482,
       0.2966293 ], dtype=float32), '18974_comsumer_03.jpg': array([ 0.00897055, -0.00632166,  0.10848335, ..., -0.48856297,
        0.1927459 ,  0.5071647 ], dtype=float32), '18980_comsumer_03.jpg': array([-0.00321387,  0.01427927,  0.03294564, ..., -0.06479373,
        0.08860894,  0.15339503], dtype=float32), '18984_shop_02.jpg': array([-0.05382007,  0.05487039,  0.06864368, ...,  0.04951368,
        0.05732642,  0.04968405], dtype=float32), '194_comsumer_03.jpg': array([ 0.04465114,  0.00654576,  0.07107775, ..., -0.03807499,
        0.01496757,  0.05396954], dtype=float32), '201_comsumer_03.jpg': array([-0.04151

{'18959_comsumer_03.jpg': array([ 0.08627279,  0.01172121,  0.08972239, ..., -0.10453288,
        0.05046642,  0.10882016], dtype=float32), '18964_comsumer_02.jpg': array([-0.00239559, -0.00965965,  0.01038153, ...,  0.05233049,
        0.06544095,  0.17841244], dtype=float32), '18970_comsumer_03.jpg': array([0.00870385, 0.01287152, 0.06038076, ..., 0.21215613, 0.04619482,
       0.2966293 ], dtype=float32), '18974_comsumer_03.jpg': array([ 0.00897055, -0.00632166,  0.10848335, ..., -0.48856297,
        0.1927459 ,  0.5071647 ], dtype=float32), '18980_comsumer_03.jpg': array([-0.00321387,  0.01427927,  0.03294564, ..., -0.06479373,
        0.08860894,  0.15339503], dtype=float32), '18984_shop_02.jpg': array([-0.05382007,  0.05487039,  0.06864368, ...,  0.04951368,
        0.05732642,  0.04968405], dtype=float32), '194_comsumer_03.jpg': array([ 0.04465114,  0.00654576,  0.07107775, ..., -0.03807499,
        0.01496757,  0.05396954], dtype=float32), '201_comsumer_03.jpg': array([-0.04151

{'18959_comsumer_03.jpg': array([ 0.08627279,  0.01172121,  0.08972239, ..., -0.10453288,
        0.05046642,  0.10882016], dtype=float32), '18964_comsumer_02.jpg': array([-0.00239559, -0.00965965,  0.01038153, ...,  0.05233049,
        0.06544095,  0.17841244], dtype=float32), '18970_comsumer_03.jpg': array([0.00870385, 0.01287152, 0.06038076, ..., 0.21215613, 0.04619482,
       0.2966293 ], dtype=float32), '18974_comsumer_03.jpg': array([ 0.00897055, -0.00632166,  0.10848335, ..., -0.48856297,
        0.1927459 ,  0.5071647 ], dtype=float32), '18980_comsumer_03.jpg': array([-0.00321387,  0.01427927,  0.03294564, ..., -0.06479373,
        0.08860894,  0.15339503], dtype=float32), '18984_shop_02.jpg': array([-0.05382007,  0.05487039,  0.06864368, ...,  0.04951368,
        0.05732642,  0.04968405], dtype=float32), '194_comsumer_03.jpg': array([ 0.04465114,  0.00654576,  0.07107775, ..., -0.03807499,
        0.01496757,  0.05396954], dtype=float32), '201_comsumer_03.jpg': array([-0.04151