In [3]:
import hashlib
from pathlib import Path

DEFAULT_EXTS = {".jpg", ".jpeg", ".png", ".tif", ".tiff", ".bmp"}


def iter_images(root, exts=DEFAULT_EXTS):
    root = Path(root)
    for p in root.rglob("*"):
        if p.is_file() and p.suffix.lower() in exts:
            yield p


def file_hash(path, chunk_size=1 << 20):
    h = hashlib.md5()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(chunk_size), b""):
            if not chunk:
                break
            h.update(chunk)
    return h.hexdigest()


def group_by_hash(root, exts=DEFAULT_EXTS):
    groups = {}
    for p in iter_images(root, exts):
        h = file_hash(p)
        groups.setdefault(h, []).append(str(p))
    return groups


def split_unique_and_duplicates(root, exts=DEFAULT_EXTS):
    groups = group_by_hash(root, exts)
    uniques, duplicates = [], []
    for paths in groups.values():
        if len(paths) == 1:
            uniques.append(paths[0])
        else:
            duplicates.append(paths)
    return uniques, duplicates


if __name__ == "__main__":
    import sys

    root =  "."
    uniques, dups = split_unique_and_duplicates(root)
    print(f"Unique images: {len(uniques)}")
    print(f"Duplicate groups: {len(dups)}")
    for g in dups:
        print()
        for p in g:
            print(p)


Unique images: 4528
Duplicate groups: 1860

data/single_chromosomes_object/images/1052172.jpg
data/24_chromosomes_object/images/1052172.jpg

data/single_chromosomes_object/images/105112.jpg
data/24_chromosomes_object/images/105112.jpg

data/single_chromosomes_object/images/1055863.jpg
data/24_chromosomes_object/images/1055863.jpg

data/single_chromosomes_object/images/1053252.jpg
data/24_chromosomes_object/images/1053252.jpg

data/single_chromosomes_object/images/1051312.jpg
data/24_chromosomes_object/images/1051312.jpg

data/single_chromosomes_object/images/1053753.jpg
data/24_chromosomes_object/images/1053753.jpg

data/single_chromosomes_object/images/1053592.jpg
data/24_chromosomes_object/images/1053592.jpg

data/single_chromosomes_object/images/1050901.jpg
data/24_chromosomes_object/images/1050901.jpg

data/single_chromosomes_object/images/103082.jpg
data/24_chromosomes_object/images/103082.jpg

data/single_chromosomes_object/images/1050373.jpg
data/24_chromosomes_object/images/105