In [1]:
from PIL import Image
import numpy as np
import os, re

In [2]:
# 파일 경로 지정하기
search_dir = "./image/101_ObjectCategories"
cache_dir = "./image/cache_avhash"
if not os.path.exists(cache_dir):
    os.mkdir(cache_dir)

In [3]:
# 이미지 데이터를 Average Hash로 변환하기 --- (※1)
def average_hash(fname, size = 16):
    fname2 = fname[len(search_dir):]
    # 이미지 캐시하기
    cache_file = cache_dir + "/" + fname2.replace('/', '_') + ".csv"
    if not os.path.exists(cache_file): # 해시 생성하기
        img = Image.open(fname)
        img = img.convert('L').resize((size, size), Image.ANTIALIAS)
        pixels = np.array(img.getdata()).reshape((size, size))
        avg = pixels.mean()
        px = 1 * (pixels > avg)
        np.savetxt(cache_file, px, fmt="%.0f", delimiter=",")
    else: # 캐시돼 있다면 읽지 않기
        px = np.loadtxt(cache_file, delimiter=",")
    return px

In [4]:
# 해밍 거리 구하기 --- (※2)
def hamming_dist(a, b):
    aa = a.reshape(1, -1) # 1차원 배열로 변환하기
    ab = b.reshape(1, -1)
    dist = (aa != ab).sum()
    return dist

In [5]:
# 모든 폴더에 처리 적용하기 --- (※3)
def enum_all_files(path):
    for root, dirs, files in os.walk(path):
        for f in files:
            #fname = os.path.join(root, f)
            fname = root + '/' + f
            print("fname : ",fname)
            if re.search(r'\.(jpg|jpeg|png)$', fname):
                #print(fname)
                yield fname

In [6]:
# 이미지 찾기 --- (※4)
def find_image(fname, rate):
    src = average_hash(fname)
    #print(src)
    for fname in enum_all_files(search_dir):
        dst = average_hash(fname)
        diff_r = hamming_dist(src, dst) / 256
        print("[check] ",fname)
        print("[check] ",fname)
        if diff_r < rate:
            yield (diff_r, fname)

In [7]:
# 찾기 --- (※5)
srcfile = search_dir + "/chair/image_0016.jpg"
html = ""
sim = list(find_image(srcfile, 0.25))
sim = sorted(sim, key=lambda x:x[0])
for r, f in sim:
    print(r, ">", f)
    s = '<div style="float:left;"><h3>[ 차이 :' + str(r) + '-' + \
        os.path.basename(f) + ']</h3>'+ \
        '<p><a href="' + f + '"><img src="' + f + '" width=400>'+ \
        '</a></p></div>'
    html += s

fname :  ./image/101_ObjectCategories\chair/image_0001.jpg
[check]  ./image/101_ObjectCategories\chair/image_0001.jpg
[check]  ./image/101_ObjectCategories\chair/image_0001.jpg
fname :  ./image/101_ObjectCategories\chair/image_0002.jpg
[check]  ./image/101_ObjectCategories\chair/image_0002.jpg
[check]  ./image/101_ObjectCategories\chair/image_0002.jpg
fname :  ./image/101_ObjectCategories\chair/image_0003.jpg
[check]  ./image/101_ObjectCategories\chair/image_0003.jpg
[check]  ./image/101_ObjectCategories\chair/image_0003.jpg
fname :  ./image/101_ObjectCategories\chair/image_0004.jpg
[check]  ./image/101_ObjectCategories\chair/image_0004.jpg
[check]  ./image/101_ObjectCategories\chair/image_0004.jpg
fname :  ./image/101_ObjectCategories\chair/image_0005.jpg
[check]  ./image/101_ObjectCategories\chair/image_0005.jpg
[check]  ./image/101_ObjectCategories\chair/image_0005.jpg
fname :  ./image/101_ObjectCategories\chair/image_0006.jpg
[check]  ./image/101_ObjectCategories\chair/image_0006.j

In [8]:
# HTML로 출력하기
html = """<html><head><meta charset="utf8"></head>
<body><h3>원래 이미지</h3><p>
<img src='{0}' width=400></p>{1}
</body></html>""".format(srcfile, html)
with open("./avhash-search-output.html", "w", encoding="utf-8") as f:
    f.write(html)
print("ok")

ok
