# 圖像指紋與漢明距離
- 參考網站：https://segmentfault.com/a/1190000004467183
- 圖像指紋：將圖像按照特定的哈希算法，經過運算後得到的一組二進位數字
- 漢明距離：將一組二進位數據變成另一組數據所需的步驟數，這個數值可以衡量兩張圖片的差異，若漢明距離越小，代表相似度越高
- 平均哈希法(aHash)：比較灰度圖每個像速與平均值來實現

In [5]:
from PIL import Image
from PIL import ImageFilter
from PIL import ImageOps

#This module can classfy the image by Average Hash Method
#The Hash Method is too strict,so this moudel suitable for finding image by Thumbnail


def getCode(img,size):

    pixel = []
    for x in range(0,size[0]):
        for y in range(0,size[1]):
            pixel_value = img.getpixel((x,y))
            pixel.append(pixel_value)  #找出每一個像素值，並加入一個list

    avg = sum(pixel)/len(pixel)  #計算像素值平均數

    cp = []

    for px in pixel:
        if px > avg:  #該像素值大於平均像數值，則為1,並加入到一個list
            cp.append(1)
        else:
            cp.append(0)
    print cp
    return cp



def compCode(code1,code2):
    num = 0
    for index in range(0,len(code1)):
        if code1[index] != code2[index]:
            num+=1
    print num
    return num 

def classfiy_aHash(image1,image2,size=(8,8),exact=25):
    ''' 'image1' and 'image2' is a Image Object.
    You can build it by 'Image.open(path)'.
    'Size' is parameter what the image will resize to it and then image will be compared by the algorithm.
    It's 8 * 8 when it default.  
    'exact' is parameter for limiting the Hamming code between 'image1' and 'image2',it's 25 when it default.
    The result become strict when the exact become less. 
    This function return the true when the 'image1'  and 'image2' are similar. 
    '''
    image1 = image1.resize(size).convert('L').filter(ImageFilter.BLUR)  #縮小圖片為8*8，64個像素值並轉為灰度圖
    image1 = ImageOps.equalize(image1)  #均衡圖像的直方圖，為了讓灰色值均勻分佈
    code1 = getCode(image1, size)  #利用getCode方法取的0或1的list
    image2 = image2.resize(size).convert('L').filter(ImageFilter.BLUR)
    image2 = ImageOps.equalize(image2)
    code2 = getCode(image2, size)

    assert len(code1) == len(code2),"error"  
    
    return compCode(code1, code2)<=exact  #若code1和code2比較後的數值小於等於25,則兩張圖為相似圖



__all__=[classfiy_aHash]


In [6]:
from PIL import Image
image1 = Image.open("img/0004.jpg")
image2 = Image.open("img/0007.jpg")

n =classfiy_aHash(image1,image2,size=(8,8),exact=25)
print n

[1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0]
[0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1]
41
False


- 感知哈希法(pHash)：若用感知哈希法，可獲得更精確的結果，是利用DCT(離散餘弦變化)來降低頻率，DCT參考網址
https://zh.wikipedia.org/wiki/%E7%A6%BB%E6%95%A3%E4%BD%99%E5%BC%A6%E5%8F%98%E6%8D%A2
http://blog.csdn.net/luoweifu/article/details/8214959

In [20]:
from PIL import Image
from PIL import ImageFilter
from PIL import ImageOps
import math
import copy
#This module classify the image by  Discrete Cosine Transform and the  accurate rate has a little improve.

def get_code(List,middle):

    result = []
    for index in range(0,len(List)):  
        if List[index] > middle:  #像素值list大於中位數，則為1，並加到一個list裡
            result.append("1")
        else:
            result.append("0")
    print result
    return result



def comp_code(code1,code2):
    num = 0
    for index in range(0,len(code1)):
        if str(code1[index]) != str(code2[index]):   #兩個圖像
            num+=1
    return num 



def get_middle(List):
    li = copy.deepcopy(List)
    li.sort()  #排序
    value = 0
    if len(li)%2==0:  #若像素值list為偶數
        index = int((len(li)/2)) - 1  #取中間數減一的直

        value = li[index]
    else:
        index = int((len(li)/2))
        value = (li[index]+li[index-1])/2  #中間兩個數的平均值
    print value
    return value


def get_matrix(image):

    matrix = []
    size = image.size
    for height in range(0,size[1]):
        pixel = []
        for width in range(0,size[0]):
            pixel_value = image.getpixel((width,height))
            pixel.append(pixel_value)
        matrix.append(pixel)  #找出每個像素值並加進list

    return matrix


def get_coefficient(n):
    matrix = []
    PI = math.pi #圓周率
    sqr = math.sqrt(1/n) #開根號
    value = []
    for i in range(0,n):
        value.append(sqr)
    matrix.append(value)

    for i in range(1,n):
        value=[]
        for j in range (0,n):
            data = math.sqrt(2.0/n) * math.cos(i*PI*(j+0.5)/n);  
            value.append(data)
        matrix.append(value)

    return matrix


def get_transposing(matrix):
    new_matrix = []

    for i in range(0,len(matrix)):
        value = []
        for j in range(0,len(matrix[i])):
            value.append(matrix[j][i])
        new_matrix.append(value)

    return new_matrix

def get_mult(matrix1,matrix2):
    new_matrix = []

    for i in range(0,len(matrix1)):
        value_list = []
        for j in range(0,len(matrix1)): 
            t = 0.0
            for k in range(0,len(matrix1)):
                t += matrix1[i][k] * matrix2[k][j]
            value_list.append(t)
        new_matrix.append(value_list)

    return new_matrix

def DCT(double_matrix):
    n = len(double_matrix)
    A = get_coefficient(n)
    AT = get_transposing(A)

    temp = get_mult(double_matrix, A)
    DCT_matrix = get_mult(temp, AT)

    return DCT_matrix

def sub_matrix_to_list(DCT_matrix,part_size):
    w,h = part_size
    List = []
    for i in range(0,h):
        for j in range(0,w):
            List.append(DCT_matrix[i][j])
    return List



def classify_DCT(image1,image2,size=(32,32),part_size=(8,8)):
    """ 'image1' and 'image2' is a Image Object.
    You can build it by 'Image.open(path)'.
    'Size' is parameter what the image will resize to it and then image will be compared by the pHash.
    It's 32 * 32 when it default. 
    'part_size' is a size of a part of the matrix after Discrete Cosine Transform,which need to next steps.
    It's 8 * 8 when it default. 
    The function will return the hamming code,less is correct. 
    """
    assert size[0]==size[1],"size error"
    assert part_size[0]==part_size[1],"part_size error"

    image1 = image1.resize(size).convert('L').filter(ImageFilter.BLUR)  #縮小圖片至32*32並轉為灰度圖
    image1 = ImageOps.equalize(image1)
    matrix = get_matrix(image1)
    DCT_matrix = DCT(matrix)  #計算DCT，將圖片分離成分率的集合
    List = sub_matrix_to_list(DCT_matrix, part_size)  #縮小DCT,DCT計算後的矩陣為32*32,只保留左上角的8*8，這些代表圖片的最低頻率
    middle = get_middle(List)  #計算平均值
    code1 = get_code(List, middle)  #減小DCT,大於平均為1，小於平均為0

    image2 = image2.resize(size).convert('L').filter(ImageFilter.BLUR)
    image2 = ImageOps.equalize(image2)
    matrix = get_matrix(image2)
    DCT_matrix = DCT(matrix)
    List = sub_matrix_to_list(DCT_matrix, part_size)
    middle = get_middle(List)
    code2 = get_code(List, middle)



    return comp_code(code1, code2)  #此值越小越好

__all__ = [classify_DCT]

In [21]:
from PIL import Image
image1 = Image.open("img/0004.jpg")
image2 = Image.open("img/0002.jpg")

n =classify_DCT(image1,image2,size=(32,32),part_size=(8,8))
print n

177.0
['0', '0', '0', '1', '0', '0', '0', '1', '0', '0', '0', '0', '0', '1', '0', '1', '0', '0', '0', '0', '0', '1', '1', '1', '0', '0', '0', '1', '1', '1', '1', '1', '0', '1', '0', '0', '1', '1', '1', '1', '0', '0', '0', '0', '1', '1', '1', '1', '0', '0', '0', '1', '1', '1', '1', '1', '0', '0', '1', '1', '1', '1', '1', '1']
228.0
['0', '1', '1', '1', '1', '1', '1', '1', '0', '1', '1', '1', '1', '1', '1', '1', '0', '1', '1', '1', '1', '1', '1', '1', '0', '0', '1', '1', '1', '1', '1', '1', '0', '0', '1', '0', '0', '1', '1', '1', '0', '0', '0', '0', '0', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
32


- 跟pHash相比，dHash速度較快，跟aHash比，dHash在效率幾乎相同的情況下效過更好，dHash是基於漸變實現的

In [26]:
from PIL import Image
from PIL import ImageFilter
from PIL import ImageOps
#This module can classfy the image by dHash


def getCode(img,size):

    result = []
    # print("x==",size[0])
    # print("y==",size[1]-1)
    
    x_size = size[0]-1#width
    y_size = size[1] #high
    for x in range(0,x_size):
        for y in range(0,y_size):
            now_value = img.getpixel((x,y))
            next_value = img.getpixel((x+1,y))

            if next_value < now_value:  #下一個像素值小於現在的像素值，則為一，反之，則為0
                result.append(1)
            else:
                result.append(0)

    print result
    return result



def compCode(code1,code2):
    num = 0
    for index in range(0,len(code1)):
        if code1[index] != code2[index]:
            num+=1
    return num 

def classfiy_dHash(image1,image2,size=(9,8)):
    ''' 'image1' and 'image2' is a Image Object.
    You can build it by 'Image.open(path)'.
    'Size' is parameter what the image will resize to it and then image will be compared to another image by the dHash.
    It's 9 * 8 when it default.  
    The function will return the hamming code,less is correct. 
    '''
    image1 = image1.resize(size).convert('L')  #縮小圖片到9*8並灰度化
    code1 = getCode(image1, size)


    image2 = image2.resize(size).convert('L')
    code2 = getCode(image2, size)

    assert len(code1) == len(code2),"error"
    
    return compCode(code1, code2)  #比較後，數值越小越好



__all__=[classfiy_dHash]

In [27]:
from PIL import Image
image1 = Image.open("img/0004.jpg")
image2 = Image.open("img/0002.jpg")

n =classfiy_dHash(image1,image2,size=(9,8))
print n

[0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1]
[1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0]
31
