In [31]:
from glob import glob
from PIL import Image
import imagehash

def frame(fname):
    """for a given file name, return the frame index.
    
    for example, 'cropped/001.png' returns int 1.
    """
    idx = int(fname.split("/")[1].split(".")[0])
    return idx


def remove_outliers(files):
    """remove images that are not similar with any other images in the batch.
    
    we want to keep images that are similar with at least one other image in the batch.
    this is because we want to find long contiguous blocks of similar images, which would
    suggest static faces.
    """
    
    stack = []

    for i in range(len(files) - 1):
        prev_f = files[i]
        next_f = files[i + 1]

        prev_h = imagehash.average_hash(Image.open(prev_f))
        next_h = imagehash.average_hash(Image.open(next_f))

        # keep if it's similar
        if next_h == prev_h:
            stack.append(prev_f)
            stack.append(next_f)
        else:
            pass
        
    return stack


def create_blocks(stack):
    """create continguous blocks of similar frames.
    """
    MAX_FRAME_DIFF = 2

    blocks = []
    block = []

    for i in range(len(stack) - 1):
        p_idx = frame(stack[i])
        n_idx = frame(stack[i + 1])

        if (n_idx - p_idx) < MAX_FRAME_DIFF:
            block.append(p_idx)
            block.append(n_idx)
        else:
            blocks.append(block)
            block = []
            
    return blocks

In [32]:
files = sorted(glob('cropped/*.png'))
files[:5]

['cropped/0000.png',
 'cropped/0001.png',
 'cropped/0002.png',
 'cropped/0003.png',
 'cropped/0004.png']

In [33]:
stack = remove_outliers(files)

In [34]:
stack[:5]

['cropped/0002.png',
 'cropped/0003.png',
 'cropped/0003.png',
 'cropped/0004.png',
 'cropped/0005.png']

In [36]:
blocks = create_blocks(stack)

In [37]:
for block in blocks:
    block = set(block)
    if len(block) > 5:
        print(block)

{113, 114, 115, 116, 117, 118, 119}
{135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168}
{173, 174, 175, 176, 177, 178, 179, 180, 181}
{196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210}
{306, 307, 308, 309, 310, 311, 312}
