In [20]:
from PIL import Image
from PIL import ImageFilter
from PIL import ImageOps
import math

def get_code(List,middle):
	result = []
	for index in range(0,len(List)):
		if List[index] > middle:
			result.append("1")
		else:
			result.append("0")
	return result


def comp_code(code1,code2):
	num = 0
	for index in range(0,len(code1)):
		if str(code1[index]) != str(code2[index]):
			num+=1
	return num 

def get_middle(List):
	li = List.copy()
	li.sort()
	if len(li)%2==0:
		index = int((len(li)/2)) - 1
		value = li[index]
	else:
		index = int((len(li)/2))
		value = (li[index]+li[index-1])/2
	return value


def get_matrix(image):
	matrix = []
	size = image.size
	for height in range(0,size[1]):
		pixel = []
		for width in range(0,size[0]):
			pixel_value = image.getpixel((width,height))
			pixel.append(pixel_value)
		matrix.append(pixel)	
	return matrix


def get_coefficient(n):
	matrix = []
	PI = math.pi
	sqr = math.sqrt(1/n)
	value = []
	for i in range(0,n):
		value.append(sqr)
	matrix.append(value)

	for i in range(1,n):
		value=[]
		for j in range (0,n):
			data = math.sqrt(2.0/n) * math.cos(i*PI*(j+0.5)/n)
			value.append(data)
		matrix.append(value)
	return matrix


def get_transposing(matrix):
	new_matrix = []
	for i in range(0,len(matrix)):
		value = []
		for j in range(0,len(matrix[i])):
			value.append(matrix[j][i])
		new_matrix.append(value)
	return new_matrix

def get_mult(matrix1,matrix2):
	new_matrix = []

	for i in range(0,len(matrix1)):
		value_list = []
		for j in range(0,len(matrix1)): 
			t = 0.0
			for k in range(0,len(matrix1)):
				t += matrix1[i][k] * matrix2[k][j]
			value_list.append(t)
		new_matrix.append(value_list)
	return new_matrix

def DCT(double_matrix):
	n = len(double_matrix)
	A = get_coefficient(n)
	AT = get_transposing(A)
	temp = get_mult(double_matrix, A)
	DCT_matrix = get_mult(temp, AT)
	return DCT_matrix
	
def sub_matrix_to_list(DCT_matrix,part_size):
	w,h = part_size
	List = []
	for i in range(0,h):
		for j in range(0,w):
			List.append(DCT_matrix[i][j])
	return List


def classify_DCT(image1,image2,size=(32,32),part_size=(8,8)):
	assert size[0]==size[1],"size error"
	assert part_size[0]==part_size[1],"part_size error"

	image1 = image1.convert('L').resize(size,Image.ANTIALIAS).filter(ImageFilter.BLUR)
	image1 = ImageOps.equalize(image1)
	matrix = get_matrix(image1)
	DCT_matrix = DCT(matrix)
	List = sub_matrix_to_list(DCT_matrix, part_size)
	middle = get_middle(List)
	code1 = get_code(List, middle)

	image2 = image2.convert('L').resize(size,Image.ANTIALIAS).filter(ImageFilter.BLUR)
	image2 = ImageOps.equalize(image2)
	matrix = get_matrix(image2)
	DCT_matrix = DCT(matrix)
	List = sub_matrix_to_list(DCT_matrix, part_size)
	middle = get_middle(List)
	code2 = get_code(List, middle)
	return comp_code(code1, code2)

In [21]:
path1='./img/1.png'
img1=Image.open(path1,'r')
for j in range(3,15):
    path2='./img/%s.png'%j
    img2=Image.open(path2,'r')
    # 感知hash算法，如果值小于5，说明两张图片相似，如果大于10，两张图片不相似
    print(j,classify_DCT(img1,img2))

3 16
4 20


5 31
6 0


7 24


8 26


9 2
10 42
11 20


12 36
13 20
14 44


In [30]:
from PIL import Image #导入pillow库下的image模块，主要用于图片缩放、图片灰度化、获取像素灰度值

def dhash(filepath, hash_size = 8):
  image=Image.open(filepath,'r')
  # Grayscale and shrink the image in one step.
  image = image.convert('L').resize(
    (hash_size + 1, hash_size),
    # Image.ANTIALIAS,
    #   Image.BICUBIC #双立方滤波。在输入图像的4*4矩阵上进行立方插值。
      Image.BILINEAR #双线性内插滤波。在输入图像的2*2矩阵上进行线性插值。
  )
  pixels = list(image.getdata())
  
  # Compare adjacent pixels.
  difference = []
  for row in range(hash_size):
    for col in range(hash_size):
      pixel_left = image.getpixel((col, row))
      pixel_right = image.getpixel((col + 1, row))
      difference.append(pixel_left > pixel_right)
  
  # Convert the binary array to a hexadecimal string.
  decimal_value = 0
  hex_string = []
  for index, value in enumerate(difference):
    if value:
      decimal_value += 2**(index % 8)
    if (index % 8) == 7:
      hex_string.append(hex(decimal_value)[2:].rjust(2, '0'))
      decimal_value = 0
  return ''.join(hex_string)

def Diff(dhash1, dhash2):
    num=0
    for i,j in zip(dhash1,dhash2):
        if i!=j:
            num+=1
    return num

def DHash(imagepath1, imagepath2):
    hash1 = dhash(imagepath1)
    hash2 = dhash(imagepath2)
    num = 0
    for i, j in zip(hash1, hash2):
        if i != j:
            num += 1
    return num

In [49]:
i=16
filepath1='./img/%s.png'%i
for j in range(i+1,19):
    filepath2='./img/%s.png'%j
    # 感知hash算法，如果值小于5，说明两张图片相似，如果大于10，两张图片不相似
    print(j,DHash(filepath1,filepath2))

17 12
18 10


In [7]:
from collections import defaultdict
import time
def drop_same_image(images):
    drop_image_index=[]
    for i in range(0,len(images)-1):
        image1=images[i]
        for j in range(i+1,len(images)):
            image2=images[j]
            score=DHash(image1,image2)
            if score:
                drop_image_index.append(images[j])
    imagess=[a for a in images if a not in drop_image_index]
    return imagess

scores=defaultdict(int)
a=time.time()
for id,image in imagesdict.items():
    author_image=image["author_image"]
    author_image=drop_same_image(author_image)
    content_image=image["content_image"]
    content_image=drop_same_image(content_image)
    for i,image1 in enumerate(author_image):
        for j,image2 in enumerate(content_image):
            score=DHash(image1,image2)
            if score:
                scores[id]+=1
                print(image1,image2)
                del content_image[j]
                continue
print(a-time.time())

['./img/13.png', './img/14.png', './img/15.png', './img/16.png', './img/17.png', './img/18.png', './img/19.png', './img/20.png', './img/21.png']


['./img/1.png', './img/2.png', './img/3.png', './img/4.png', './img/5.png', './img/6.png', './img/7.png', './img/8.png', './img/9.png']
31017 9 9


./img/13.png ./img/9.png
./img/14.png ./img/1.png


./img/16.png ./img/2.png


./img/17.png ./img/4.png


./img/20.png ./img/7.png
./img/21.png ./img/3.png


-12.655050039291382


In [191]:
from queue import PriorityQueue 
from functools import partial
q = PriorityQueue() 
import time
a=time.time()
for id,image in imagesdict.items():
    author_image=image["author_image"]
    print(len(author_image))
    for i in range(len(author_image)-1): 
        func=partial(DHash,author_image[i])
        iters=map(func,author_image[i+1:])
        print(list(iters))
print(a-time.time())

10


[False, False, False, False, False, False, False, False, False]


[False, False, False, False, False, False, False, False]


[False, False, False, False, False, False, False]


[False, False, False, False, False, False]


[False, False, False, False, False]


[False, False, False, False]
[False, False, False]


[False, False]
[True]
-3.67590594291687


In [32]:
imagesdict={31017: {'content_image': ['./img/1.png', './img/2.png', './img/3.png', './img/4.png', './img/5.png', './img/6.png', './img/7.png', './img/8.png', './img/9.png', './img/10.png', './img/11.png', './img/12.png'], 'author_image': ['./img/13.png', './img/14.png', './img/15.png', './img/16.png', './img/17.png', './img/18.png', './img/19.png', './img/20.png', './img/21.png', './img/22.png']}}


In [53]:
import cv2 as cv
import math
import numpy as np
from collections import defaultdict
#定义最大灰度级数
gray_level = 8

def maxGrayLevel(img):
    max_gray_level=0
    (height,width)=img.shape
    for y in range(height):
        for x in range(width):
            if img[y][x] > max_gray_level:
                max_gray_level = img[y][x]
    return max_gray_level+1

def getGlcm(input,d_x,d_y):
    if d_x<0:
        start=-d_x
    else:start=0
    srcdata=input.copy()
    ret=[[0.0 for i in range(gray_level)] for j in range(gray_level)]
    (height,width) = input.shape
    max_gray_level=maxGrayLevel(input)

    #若灰度级数大于gray_level，则将图像的灰度级缩小至gray_level，减小灰度共生矩阵的大小
    if max_gray_level > gray_level:
        for j in range(height):
            for i in range(width):
                srcdata[j][i] = srcdata[j][i]*gray_level / max_gray_level

    for j in range(height-d_y):
        for i in range(start,width-d_x):
            rows = srcdata[j][i-start]
            cols = srcdata[j + d_y][i+d_x]
            ret[rows][cols]+=1.0

    for i in range(gray_level):
        for j in range(gray_level):
            ret[i][j]/=float(height*width)

    return ret

def feature_computer(p):
    Con=0.0# 对比度反映图像的清晰程度。
    Ent=0.0 # 熵是对图像纹理复杂程度和非均匀程度的一种数学度量。
    Asm=0.0 #角二阶矩是灰度共生矩阵各个元素值的平方和，反映图像纹理的粗细程度和灰度的分布情况。
    Idm=0.0 # 反差分矩阵用来度量图像局部纹理变化的多少。
    for i in range(gray_level):
        for j in range(gray_level):
            Con+=(i-j)*(i-j)*p[i][j]
            Asm+=p[i][j]*p[i][j]
            Idm+=p[i][j]/(1+(i-j)*(i-j))
            if p[i][j]>0.0:
                Ent+=p[i][j]*math.log(p[i][j])
    return [Asm,Con,-Ent,Idm]

def GrayCoOccur(filepath):
    """基于灰度共生矩阵的图像相似度算法"""
    img_gray=changeGray(filepath)
    glcms=[]
    d=1 #像素移动距离
    for x,y in [[d,0],[0,d],[d,d],[-d,d]]:
        glcm=getGlcm(img_gray, x,y)
        glcms.append(glcm)
    
    scores=[]
    for score in glcms:
        feature=feature_computer(score)
        scores.extend(feature)
    return scores

def SimGrayCoOccur(score1,score2):
    vector1=np.array(score1)
    vector2=np.array(score2)
    # sim=np.dot(vector1,vector2)/(np.linalg.norm(vector1)*(np.linalg.norm(vector2)))
    sim=np.linalg.norm(vector1-vector2,ord=1)
    return sim
    

def changeGray(filepath,size=256):
    img = cv.imread(filepath)
    img_shape=img.shape
    # int(img_shape[1]/2),int(img_shape[0]/2)
    img=cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    img_gray=cv.resize(img,(size,size),interpolation=cv.INTER_CUBIC)
    return img_gray

def GrayHis(filepath,size):
    """基于灰度颜色直方图的相似度算法"""
    img_gray=changeGray(filepath,size=size)
    h,w=img_gray.shape
    grayhis=defaultdict(int)
    for i in range(h):
        for j in range(w):
            grayhis[img_gray[i][j]]+=1
    
    grayhis=sorted(grayhis.items(),key=lambda key:key[0])
    grayhis={a:b/float(h*w) for a,b in grayhis}
    return grayhis

def SimGrayHis(gray1,gray2):
    gray1_sum=sum(gray1.values())
    gray2_sum=sum(gray2.values())
    graysum=0
    for i in range(256):
        graysum+=math.sqrt(gray2.get(i,0)*gray1.get(i,0))
    sim=1-math.sqrt(1-graysum/float(gray1_sum*gray2_sum))
    return sim


In [63]:
filepath1='./img/9.png'
filepath2='./img/8.png'
gray1=GrayHis(filepath1,128)
gray2=GrayHis(filepath2,128)
sim1=SimGrayHis(gray1,gray2)
sim2=SimGrayCoOccur(GrayCoOccur(filepath1),GrayCoOccur(filepath2))
k=0.5
print(sim1,sim2)
sim=k*sim1+(1-k)*(1/float(0.01+sim2))
print(sim)


0.8992923025824633 0.16871218099194485
3.2474408915531536
