## Image Hashing

In [1]:
# import the necessary packages
import numpy as np
import cv2
from imutils import paths
import pickle
import vptree
import time
 
def dhash(image, hashSize=8):
# convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 
 # resize the grayscale image, adding a single column (width) so we
 # can compute the horizontal gradient
    resized = cv2.resize(gray, (hashSize + 1, hashSize))
 
 # compute the (relative) horizontal gradient between adjacent
 # column pixels
    diff = resized[:, 1:] > resized[:, :-1]
 
 # convert the difference image to a hash
    return sum([2 ** i for (i, v) in enumerate(diff.flatten()) if v])
def convert_hash(h):
# convert the hash to NumPy's 64-bit float and then back to
# Python's built in int
    return int(np.array(h, dtype="float64"))

def hamming(a, b):
# compute and return the Hamming distance between the integers
    return bin(int(a) ^ int(b)).count("1")

In [2]:

time.time

args = {'images' : 'referenceImages/',
        'tree' : 'VP-Tree.pickle',
        'hashes' : 'hashes.pickle'}

# grab the paths to the input images and initialize the dictionary
# of hashes
imagePaths = list(paths.list_images(args["images"]))
hashes = {}
 
# loop over the image paths
for (i, imagePath) in enumerate(imagePaths):
# load the input image
    print("[INFO] processing image {}/{}".format(i + 1,
        len(imagePaths)))
    image = cv2.imread(imagePath)
 
 # compute the hash for the image and convert it
    h = dhash(image)
    h = convert_hash(h)
 
 # update the hashes dictionary
    l = hashes.get(h, [])
    l.append(imagePath)
    hashes[h] = l

# build the VP-Tree
print("[INFO] building VP-Tree...")
points = list(hashes.keys())
tree = vptree.VPTree(points, hamming)

# serialize the VP-Tree to disk
print("[INFO] serializing VP-Tree...")
f = open(args["tree"], "wb")
f.write(pickle.dumps(tree))
f.close()
 
# serialize the hashes to dictionary
print("[INFO] serializing hashes...")
f = open(args["hashes"], "wb")
f.write(pickle.dumps(hashes))
f.close()

[INFO] processing image 1/754
[INFO] processing image 2/754
[INFO] processing image 3/754
[INFO] processing image 4/754
[INFO] processing image 5/754
[INFO] processing image 6/754
[INFO] processing image 7/754
[INFO] processing image 8/754
[INFO] processing image 9/754
[INFO] processing image 10/754
[INFO] processing image 11/754
[INFO] processing image 12/754
[INFO] processing image 13/754
[INFO] processing image 14/754
[INFO] processing image 15/754
[INFO] processing image 16/754
[INFO] processing image 17/754
[INFO] processing image 18/754
[INFO] processing image 19/754
[INFO] processing image 20/754
[INFO] processing image 21/754
[INFO] processing image 22/754
[INFO] processing image 23/754
[INFO] processing image 24/754
[INFO] processing image 25/754
[INFO] processing image 26/754
[INFO] processing image 27/754
[INFO] processing image 28/754
[INFO] processing image 29/754
[INFO] processing image 30/754
[INFO] processing image 31/754
[INFO] processing image 32/754
[INFO] processing

[INFO] processing image 261/754
[INFO] processing image 262/754
[INFO] processing image 263/754
[INFO] processing image 264/754
[INFO] processing image 265/754
[INFO] processing image 266/754
[INFO] processing image 267/754
[INFO] processing image 268/754
[INFO] processing image 269/754
[INFO] processing image 270/754
[INFO] processing image 271/754
[INFO] processing image 272/754
[INFO] processing image 273/754
[INFO] processing image 274/754
[INFO] processing image 275/754
[INFO] processing image 276/754
[INFO] processing image 277/754
[INFO] processing image 278/754
[INFO] processing image 279/754
[INFO] processing image 280/754
[INFO] processing image 281/754
[INFO] processing image 282/754
[INFO] processing image 283/754
[INFO] processing image 284/754
[INFO] processing image 285/754
[INFO] processing image 286/754
[INFO] processing image 287/754
[INFO] processing image 288/754
[INFO] processing image 289/754
[INFO] processing image 290/754
[INFO] processing image 291/754
[INFO] p

[INFO] processing image 518/754
[INFO] processing image 519/754
[INFO] processing image 520/754
[INFO] processing image 521/754
[INFO] processing image 522/754
[INFO] processing image 523/754
[INFO] processing image 524/754
[INFO] processing image 525/754
[INFO] processing image 526/754
[INFO] processing image 527/754
[INFO] processing image 528/754
[INFO] processing image 529/754
[INFO] processing image 530/754
[INFO] processing image 531/754
[INFO] processing image 532/754
[INFO] processing image 533/754
[INFO] processing image 534/754
[INFO] processing image 535/754
[INFO] processing image 536/754
[INFO] processing image 537/754
[INFO] processing image 538/754
[INFO] processing image 539/754
[INFO] processing image 540/754
[INFO] processing image 541/754
[INFO] processing image 542/754
[INFO] processing image 543/754
[INFO] processing image 544/754
[INFO] processing image 545/754
[INFO] processing image 546/754
[INFO] processing image 547/754
[INFO] processing image 548/754
[INFO] p

In [2]:
# construct the argument parser and parse the arguments
args = {'tree': 'VP-Tree.pickle', 
        'hashes': 'hashes.pickle',
       'query': 'queryImages/sample_1.jpg',
       'distance': 21}

# load the VP-Tree and hashes dictionary
print("[INFO] loading VP-Tree and hashes...")
tree = pickle.loads(open(args["tree"], "rb").read())
hashes = pickle.loads(open(args["hashes"], "rb").read())
 
# load the input query image
image = cv2.imread(args["query"])
cv2.imshow("query", image)
 
# compute the hash for the query image, then convert it
queryHash = dhash(image)
queryHash = convert_hash(queryHash)

# perform the search
print("[INFO] performing search...")
start = time.time()
results = tree.get_all_in_range(queryHash, args["distance"])
results = sorted(results)
end = time.time()
print("[INFO] search took {} seconds".format(end - start))

# loop over the results
for (d, h) in results:
# grab all image paths in our dataset with the same hash
    resultPaths = hashes.get(h, [])
    print("[INFO] {} total image(s) with d: {}, h: {}".format(len(resultPaths), d, h))
 
 # loop over the result paths
    for resultPath in resultPaths:
# load the result image and display it to our screen
        result = cv2.imread(resultPath)
        cv2.imshow("Result", result)
        cv2.waitKey(0)

[INFO] loading VP-Tree and hashes...
[INFO] performing search...
[INFO] search took 0.0005230903625488281 seconds
