# 用Python和OpenCV創建一個圖片搜索引擎的完整指南
- http://python.jobbole.com/80860/

- 計算出每張圖5個區塊的顏色直方圖,中間區塊是一個橢圓形

In [20]:
# import the necessary packages
import numpy as np
import cv2
import glob
 
class ColorDescriptor:
    def __init__(self, bins):
        # store the number of bins for the 3D histogram
        self.bins = bins
 
    def describe(self, image):
        # convert the image to the HSV color space and initialize
        # the features used to quantify the image
        image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)   #圖片轉為HSV色彩
        features = []
 
        # grab the dimensions and compute the center of the image
        (h, w) = image.shape[:2]  #抓取長和寬
        (cX, cY) = (int(w * 0.5), int(h * 0.5))  #找出中心點
        
        # divide the image into four rectangles/segments (top-left,
        # top-right, bottom-right, bottom-left)
        segments = [(0, cX, 0, cY), (cX, w, 0, cY), (cX, w, cY, h),  #將圖片分成4大塊
            (0, cX, cY, h)]
 
        # construct an elliptical mask representing the center of the
        # image
        (axesX, axesY) = (int(w * 0.75) / 2, int(h * 0.75) / 2)  #抓出橢圓的中心點
        ellipMask = np.zeros(image.shape[:2], dtype = "uint8")  #回傳維度是長*寬的矩陣,unit8為無符號的整數，範圍從0~255
        cv2.ellipse(ellipMask, (cX, cY), (axesX, axesY), 0, 0, 360, 255, -1)  #中間一區變成一個橢圓形
 
        # loop over the segments
        for (startX, endX, startY, endY) in segments:
            # construct a mask for each corner of the image, subtracting
            # the elliptical center from it
            cornerMask = np.zeros(image.shape[:2], dtype = "uint8")
            cv2.rectangle(cornerMask, (startX, startY), (endX, endY), 255, -1)  #輪到區塊為白色的部分，再計算顏色直方圖
            cornerMask = cv2.subtract(cornerMask, ellipMask)  #每個區塊都要扣除橢圓的部分
 
            # extract a color histogram from the image, then update the
            # feature vector
            hist = self.histogram(image, cornerMask)  #計算扣除橢圓部分區塊的顏色直方圖
            features.extend(hist)  #將計算出的顏色直方圖加到feature這個list中
            
        # extract a color histogram from the elliptical region and
        # update the feature vector
        hist = self.histogram(image, ellipMask)  #計算橢圓部分的顏色直方圖
        features.extend(hist)  #將計算出的顏色直方圖加到feature這個list中
 
        # return the feature vector
        return features
    
    def histogram(self, image, mask):
        # extract a 3D color histogram from the masked region of the
        # image, using the supplied number of bins per channel; then
        # normalize the histogram
        hist = cv2.calcHist([image], [0, 1, 2], mask, self.bins,  #計算直方圖
            [0, 180, 0, 256, 0, 256])
        hist = cv2.normalize(hist).flatten()  #將計算出的直方圖做標準化動作，使之可不受圖片大小影響

        # return the histogram
        return hist
    
if __name__ == '__main__':
    # initialize the color descriptor
    cd = ColorDescriptor((8, 12, 3))  #色相(H)=8，飽和度(S)=12，明度(V)=3
    # open the output index file for writing
    output = open('index1.csv', "w")

    # use glob to grab the image paths and loop over them
    for imagePath in glob.glob('dataset' + "/*.jpg"):  #讀取資料夾中的檔案
        # extract the image ID (i.e. the unique filename) from the image
        # path and load the image itself
        #print imagePath
        imageID = imagePath[imagePath.rfind("/") + 1:]  #從右到左找起，遇到"/"就停止，但不要有"/"
        #print imageID
        image = cv2.imread(imagePath)

        # describe the image
        features = cd.describe(image)   #用上面的describe方法找出顏色直方圖

        # write the features to file
        features = [str(f) for f in features]
        #print ",".join(features)
        output.write("%s,%s\n" % (imageID, ",".join(features))) #寫進file裡

    # close the index file
    output.close()

- 用query資料夾中的圖片，計算出該圖片5個區塊的顏色直方圖，和dataset中圖片的顏色直方圖做比較，取前三名

In [21]:
# import the necessary packages
import numpy as np
import csv

class ColorDescriptor:
    def __init__(self, bins):
        # store the number of bins for the 3D histogram
        self.bins = bins
 
    def describe(self, image):
        # convert the image to the HSV color space and initialize
        # the features used to quantify the image
        image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        features = []
 
        # grab the dimensions and compute the center of the image
        (h, w) = image.shape[:2]
        (cX, cY) = (int(w * 0.5), int(h * 0.5))
 
        # divide the image into four rectangles/segments (top-left,
        # top-right, bottom-right, bottom-left)
        segments = [(0, cX, 0, cY), (cX, w, 0, cY), (cX, w, cY, h),
            (0, cX, cY, h)]
 
        # construct an elliptical mask representing the center of the
        # image
        (axesX, axesY) = (int(w * 0.75) / 2, int(h * 0.75) / 2)
        ellipMask = np.zeros(image.shape[:2], dtype = "uint8")
        cv2.ellipse(ellipMask, (cX, cY), (axesX, axesY), 0, 0, 360, 255, -1)
 
        # loop over the segments
        for (startX, endX, startY, endY) in segments:
            # construct a mask for each corner of the image, subtracting
            # the elliptical center from it
            cornerMask = np.zeros(image.shape[:2], dtype = "uint8")
            cv2.rectangle(cornerMask, (startX, startY), (endX, endY), 255, -1)
            cornerMask = cv2.subtract(cornerMask, ellipMask)
 
            # extract a color histogram from the image, then update the
            # feature vector
            hist = self.histogram(image, cornerMask)
            features.extend(hist)
            
        # extract a color histogram from the elliptical region and
        # update the feature vector
        hist = self.histogram(image, ellipMask)
        features.extend(hist)
 
        # return the feature vector
        return features
    
    def histogram(self, image, mask):
        # extract a 3D color histogram from the masked region of the
        # image, using the supplied number of bins per channel; then
        # normalize the histogram
        hist = cv2.calcHist([image], [0, 1, 2], mask, self.bins,
            [0, 180, 0, 256, 0, 256])
        hist = cv2.normalize(hist).flatten()

        # return the histogram
        return hist

class Searcher:
    def __init__(self, indexPath):
        # store our index path
        self.indexPath = indexPath
 
    def search(self, queryFeatures, limit = 10):
        # initialize our dictionary of results
        results = {}
        # open the index file for reading
        with open(self.indexPath) as f:
            # initialize the CSV reader
            reader = csv.reader(f)  #讀取檔案
 
            # loop over the rows in the index
            for row in reader:
                # parse out the image ID and features, then compute the
                # chi-squared distance between the features in our index
                # and our query features
                features = [float(x) for x in row[1:]]
                d = self.chi2_distance(features, queryFeatures)  #利用卡方距離函數(卡方相似度)來去做顏色直方圖的比較
 
                # now that we have the distance between the two feature
                # vectors, we can udpate the results dictionary -- the
                # key is the current image ID in the index and the
                # value is the distance we just computed, representing
                # how 'similar' the image in the index is to our query
                results[row[0]] = d
 
            # close the reader
            f.close()
 
        # sort our results, so that the smaller distances (i.e. the
        # more relevant images are at the front of the list)
        results = sorted([(v, k) for (k, v) in results.items()])  #將找到的卡方相似度來做比較，數字越小，相似度越高
 
        # return our (top-3) results
        return results[:3]  #只要抓前三個最相近的圖片
    
    def chi2_distance(self, histA, histB, eps = 1e-10):  #利用epd來防止除零錯誤
        # compute the chi-squared distance
        d = 0.5 * np.sum([((a - b) ** 2) / (a + b + eps)
            for (a, b) in zip(histA, histB)])

        # return the chi-squared distance
        return d

if __name__ == '__main__':
    # initialize the image descriptor
    cd = ColorDescriptor((8, 12, 3))
    # load the query image and describe it
    query = cv2.imread("queries/119605.jpg")
    features = cd.describe(query)

    # perform the search
    searcher = Searcher('index1.csv')
    results = searcher.search(features)

    # display the query
    cv2.namedWindow("Query", cv2.WINDOW_NORMAL)  #讓圖片可以調整大小
    cv2.imshow("Query", query)
    cv2.waitKey(0)  #讓圖片可以一直顯示
    cv2.destroyAllWindows()  #關閉圖片的同時釋放記憶體

    # loop over the results
    for (score, resultID) in results:
        # load the result image and display it
        result = cv2.imread(resultID)
        cv2.namedWindow("Result", cv2.WINDOW_NORMAL)    
        cv2.imshow("Result", result)
        cv2.waitKey(0)
        cv2.destroyAllWindows()