In [1]:
#!pip install opencv-python



In [2]:
#!pip install opencv-python==3.4.2.16
#!pip install opencv-contrib-python==3.4.2.16

In [3]:
#!conda install -c anaconda scipy=1.2.1

#!pip install Pillow



In [4]:
import cv2
import numpy as np
import scipy
from scipy.spatial import distance
#from scipy.misc import imread
from imageio import imread
import pickle as pickle
import random
import os
import matplotlib.pyplot as plt

In [5]:
# Feature extractor
def extract_features(image_path, vector_size=32):
    image = imread(image_path, pilmode="RGB")
    try:
        # Using KAZE, cause SIFT, ORB and other was moved to additional module
        # which is adding addtional pain during install
        alg = cv2.KAZE_create()
        # Dinding image keypoints
        kps = alg.detect(image)
        # Getting first 32 of them. 
        # Number of keypoints is varies depend on image size and color pallet
        # Sorting them based on keypoint response value(bigger is better)
        kps = sorted(kps, key=lambda x: -x.response)[:vector_size]
        # computing descriptors vector
        kps, dsc = alg.compute(image, kps)
        # Flatten all of them in one big vector - our feature vector
        dsc = dsc.flatten()
        # Making descriptor of same size
        # Descriptor vector size is 64
        needed_size = (vector_size * 64)
        if dsc.size < needed_size:
            # if we have less the 32 descriptors then just adding zeros at the
            # end of our feature vector
            dsc = np.concatenate([dsc, np.zeros(needed_size - dsc.size)])
    except cv2.error as e:
        print ('Error: ', e)
        return None

    return dsc


def batch_extractor(images_path, pickled_db_path="all_features.pck"):
    print(images_path)
    folders = [os.path.join(images_path, p) for p in sorted(os.listdir(images_path))]
    image_folders = folders[:-1]
    image_files = []
    for dir_path in image_folders:
        image_files.append([os.path.join(dir_path, p) for p in sorted(os.listdir(dir_path))])

    result = {}
    for f in image_files:
        print ('Extracting features from image %s' % f)
        for image in f:
            name = image.split('/')[-1].lower()
            print('feature extraction, ', image, name)
            result[name] = extract_features(image)
    
    # saving all our feature vectors in pickled file
    with open(pickled_db_path, 'wb') as fp:
        pickle.dump(result, fp)

In [6]:
class Matcher(object):

    def __init__(self, pickled_db_path="all_features.pck"):
        with open(pickled_db_path, 'rb') as fp:
            self.data = pickle.load(fp, encoding='bytes')
        self.names = []
        self.matrix = []
        print('Matcher.., ',self.data)
        for k, v in self.data.items():
            self.names.append(k)
            self.matrix.append(v)
        self.matrix = np.array(self.matrix)
        self.names = np.array(self.names)

    def cos_cdist(self, vector):
        # getting cosine distance between search image and images database
        v = vector.reshape(1, -1)
        return distance.cdist(self.matrix, v, 'cosine').reshape(-1)

    def match(self, image_path, topn=5):
        features = extract_features(image_path)
        img_distances = self.cos_cdist(features)
        # getting top 5 records
        nearest_ids = np.argsort(img_distances)[:topn].tolist()
        nearest_img_paths = self.names[nearest_ids].tolist()

        return nearest_img_paths, img_distances[nearest_ids].tolist()

In [None]:
def show_img(path):
    img = imread(path, pilmode="RGB")
    plt.imshow(img)
    plt.show()

import os
import random
import scipy
#from scipy.misc import imread
from imageio import imread

def run():
    images_path = 'C:/Users/User/Downloads/shell_dataset/dataset/train'
    print(images_path)
    folders = [os.path.join(images_path, p) for p in sorted(os.listdir(images_path))]
    #print (folders[:-1])
    image_folders = folders[:-1]
    #print (type(image_folders))
    image_files = []
    for dir_path in image_folders:
        image_files.append([os.path.join(dir_path, p) for p in sorted(os.listdir(dir_path))])
    #print(len(image_files))
    print(len(image_files[0]), image_files[0][0])#, image_files[0][54])
    print(image_files[0])
    
    
    # getting 3 random images 
    sample = random.sample(image_files[0], 3)
    
    batch_extractor(images_path)

    ma = Matcher('all_features.pck')
    
    for s in sample:
        print ('Query image ==========================================')
        print(s)
        show_img(s)
        names, match = ma.match(s, topn=3)
        print ('Result images ========================================')
        #print('abc :', names, images_path)
        for i in range(3):
            # we got cosine distance, less cosine distance between vectors
            # more they similar, thus we subtruct it from 1 to get match value
            print ('Match %s' % (1-match[i]))
            show_img(os.path.join('/'.join(images_path.split('/')[:-1]), names[i]))
    
run()

C:/Users/User/Downloads/shell_dataset/dataset/train
55 C:/Users/User/Downloads/shell_dataset/dataset/train\0101\0101074000.jpg
['C:/Users/User/Downloads/shell_dataset/dataset/train\\0101\\0101074000.jpg', 'C:/Users/User/Downloads/shell_dataset/dataset/train\\0101\\0101075000.jpg', 'C:/Users/User/Downloads/shell_dataset/dataset/train\\0101\\0101080000.jpg', 'C:/Users/User/Downloads/shell_dataset/dataset/train\\0101\\0101081000.jpg', 'C:/Users/User/Downloads/shell_dataset/dataset/train\\0101\\0101082000.jpg', 'C:/Users/User/Downloads/shell_dataset/dataset/train\\0101\\0101083000.jpg', 'C:/Users/User/Downloads/shell_dataset/dataset/train\\0101\\0101084000.jpg', 'C:/Users/User/Downloads/shell_dataset/dataset/train\\0101\\0101085000.jpg', 'C:/Users/User/Downloads/shell_dataset/dataset/train\\0101\\0101090000.jpg', 'C:/Users/User/Downloads/shell_dataset/dataset/train\\0101\\0101091000.jpg', 'C:/Users/User/Downloads/shell_dataset/dataset/train\\0101\\0101092000.jpg', 'C:/Users/User/Downloads/

feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0101\0101075000.jpg train\0101\0101075000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0101\0101080000.jpg train\0101\0101080000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0101\0101081000.jpg train\0101\0101081000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0101\0101082000.jpg train\0101\0101082000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0101\0101083000.jpg train\0101\0101083000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0101\0101084000.jpg train\0101\0101084000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0101\0101085000.jpg train\0101\0101085000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0101\0101090000.jpg train\0101\0101090000.jpg
feature extraction,  C:/Users/User/Downloads/she

feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0102\0102075000.jpg train\0102\0102075000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0102\0102080000.jpg train\0102\0102080000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0102\0102081000.jpg train\0102\0102081000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0102\0102082000.jpg train\0102\0102082000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0102\0102083000.jpg train\0102\0102083000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0102\0102084000.jpg train\0102\0102084000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0102\0102085000.jpg train\0102\0102085000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0102\0102090000.jpg train\0102\0102090000.jpg
feature extraction,  C:/Users/User/Downloads/she

feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0103\0103075000.jpg train\0103\0103075000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0103\0103080000.jpg train\0103\0103080000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0103\0103081000.jpg train\0103\0103081000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0103\0103082000.jpg train\0103\0103082000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0103\0103083000.jpg train\0103\0103083000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0103\0103084000.jpg train\0103\0103084000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0103\0103085000.jpg train\0103\0103085000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0103\0103090000.jpg train\0103\0103090000.jpg
feature extraction,  C:/Users/User/Downloads/she

feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0104\0104075000.jpg train\0104\0104075000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0104\0104080000.jpg train\0104\0104080000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0104\0104081000.jpg train\0104\0104081000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0104\0104082000.jpg train\0104\0104082000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0104\0104083000.jpg train\0104\0104083000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0104\0104084000.jpg train\0104\0104084000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0104\0104085000.jpg train\0104\0104085000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0104\0104090000.jpg train\0104\0104090000.jpg
feature extraction,  C:/Users/User/Downloads/she

feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0105\0105075000.jpg train\0105\0105075000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0105\0105080000.jpg train\0105\0105080000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0105\0105081000.jpg train\0105\0105081000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0105\0105082000.jpg train\0105\0105082000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0105\0105083000.jpg train\0105\0105083000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0105\0105084000.jpg train\0105\0105084000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0105\0105085000.jpg train\0105\0105085000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0105\0105090000.jpg train\0105\0105090000.jpg
feature extraction,  C:/Users/User/Downloads/she

feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0106\0106075000.jpg train\0106\0106075000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0106\0106080000.jpg train\0106\0106080000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0106\0106081000.jpg train\0106\0106081000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0106\0106082000.jpg train\0106\0106082000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0106\0106083000.jpg train\0106\0106083000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0106\0106084000.jpg train\0106\0106084000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0106\0106085000.jpg train\0106\0106085000.jpg
feature extraction,  C:/Users/User/Downloads/shell_dataset/dataset/train\0106\0106090000.jpg train\0106\0106090000.jpg
feature extraction,  C:/Users/User/Downloads/she