In [1]:
# Returns global paths to all *.jpg files in directory
def get_jpegs(path):
    import glob
    return glob.iglob(path + '**/*.jpg', recursive=True)

In [2]:
# Converts data to paths
def date_to_path(data):
    import datetime
    return datetime.datetime.strptime(data, '%Y:%m:%d %H:%M:%S').strftime('%Y/%m %B')

In [3]:
# Returns EXIF datestamp
def get_exif_date(pic):
    import PIL.Image
    try:
        img = PIL.Image.open(pic)
    except:
        return None
    
    if not hasattr(img, '_getexif'):
        return None
    exif_data = img._getexif()
    return exif_data.get(36867)

In [4]:
# Generates a list of move commands from an SD card to the storage
def get_move_commands(src, dst):
    import os
    for pic in get_jpegs(src):
        date = get_exif_date(pic)
        if date is not None:
            yield (pic, dst + '/' + date_to_path(date) + '/' + os.path.basename(pic))

In [5]:
def sha256sum(filename):
    import hashlib
    h = hashlib.sha256()
    with open(filename, 'rb', buffering=0) as f:
        for b in iter(lambda : f.read(128*1024), b''):
            h.update(b)
    return h.hexdigest()

In [6]:
# Moves files from an SD card to the storage
# The storage will have hiearachied folders like:
# "2010/09 September"
# If file exists already, it is skipped
def move_files(src_path, dst_path):
    list_of_files = list(get_move_commands(src_path, dst_path))
    for (src, dst) in list_of_files:
        import os
        import shutil
        if os.path.isfile(dst):
            print ('Skip ' + src + ', ' + dst + ' exists')
        else:
            os.makedirs(os.path.dirname(dst), exist_ok=True)
            shutil.move(src, dst)

In [7]:
# Checks if directory has duplicate files
# Compares only EXIF + SHA, ignores content
# (i.e. ignores photoshoot series)
def check_duplicates(path):
    results = dict()
    n = 0
    for pic in get_jpegs(path):
        import PIL.Image
        if (n % 100) == 0:
            print ('photos loaded: ' + str(n))
        d = get_exif_data(pic)
        if results.get(d) is not None:
            if sha256sum(pic) == sha256sum(results.get(d)):
                print (pic + ' and ' + results.get(d) + ' are same files')
        results[d] = pic
        n += 1

In [8]:

def check_blurry_image(imagePath):
    # load the image, convert it to grayscale, and compute the
    # focus measure of the image using the Variance of Laplacian
    # method
    import cv2
    image = cv2.imread(imagePath)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return cv2.Laplacian(gray, cv2.CV_64F).var()

In [9]:
def check_blurry(path):
    for pic in get_jpegs(path):
        print (pic + ' is ' + str(check_blurry_image(pic)))

In [14]:
# Returns imagehash (see https://pypi.org/project/ImageHash/)
def get_hash(pic):
    import PIL.Image
    import imagehash
    try:
        img = PIL.Image.open(pic)
    except:
        return None

    return imagehash.dhash(img)
