All credits for these algorithms go to:  
https://blog.iconfinder.com/detecting-duplicate-images-using-python-cb240b05a3b6  
http://www.hackerfactor.com/blog/index.php?/archives/529-Kind-of-Like-That.html  
https://en.wikipedia.org/wiki/Hamming_distance#Algorithm_example

In [1]:
def dhash(image, hash_size = 16):
    # Grayscale and shrink the image in one step.
    image = image.convert('L').resize((hash_size + 1, hash_size), Image.ANTIALIAS, )

    pixels = list(image.getdata())

    # Compare adjacent pixels.
    difference = []
    for row in range(hash_size):
        for col in range(hash_size):
            pixel_left = image.getpixel((col, row))
            pixel_right = image.getpixel((col + 1, row))
            difference.append(pixel_left > pixel_right)

    # Convert the binary array to a hexadecimal string.
    decimal_value = 0
    hex_string = []
    for index, value in enumerate(difference):
        if value:
            decimal_value += 2**(index % 8)
        if (index % 8) == 7:
            hex_string.append(hex(decimal_value)[2:].rjust(2, '0'))
            decimal_value = 0

    return ''.join(hex_string)

In [2]:
def hamming_distance(s1, s2):
    """Return the Hamming distance between equal-length sequences"""
    if len(s1) != len(s2):
        raise ValueError("Undefined for sequences of unequal length")
    return sum(el1 != el2 for el1, el2 in zip(s1, s2))

In [8]:
import os
from PIL import Image
test1 = Image.open('test1.jpg')
test2 = Image.open('test2.jpg')
test3 = Image.open('test3.jpg')
test4 = Image.open('test4.jpg')

print(int(round(os.path.getsize('test1.jpg')/1024)), "KB")

1006 KB


In [4]:
dash1 = dhash(test1)
dash2 = dhash(test2)
dash3 = dhash(test3)
dash4 = dhash(test4)
print(dash1)
print(dash2)
print(dash3)
print(dash4)

78bce34f414bb54fbd4c7f5e273337239f65f3457312e143f947690e2f0dc603
78bce34f414bb54fbd4c7f5e273337239f65e3457312e1437947690e2f0dc603
38bce34f414b954fbd4d7f5a273337239f65e34d7312e5437947690e2f0dc601
12d22e924293499b64db64dba4da24da26daa6dab2d992dcd2f44af769f73871


In [5]:
dis12 = hamming_distance(dash1, dash2)
dis13 = hamming_distance(dash1, dash3)
dis23 = hamming_distance(dash2, dash3)
dis24 = hamming_distance(dash2, dash4)
print(dis12)
print(dis13)
print(dis23)
print(dis24)

2
9
7
63
