In [16]:
from glob import glob
from PIL import Image
import imagehash
from typing import List

def frame(fname: str) -> int:
    """for a given file name, return the frame index.
    
    for example, 'cropped/001.png' returns int 1.
    """
    idx = int(fname.split("/")[1].split(".")[0])
    return idx


def remove_outliers(files: List[str]) -> List[str]:
    """remove images that are not similar with any other images in the batch.
    
    files is a sorted list of image file names, sorted by frame index number. this helps us
    keep files that are similar to its neighbors.

    we want to keep images that are similar with at least one other image in the batch.
    this is because we want to find long contiguous blocks of similar images, which would
    suggest static faces.
    """
    
    stack = []

    for i in range(len(files) - 1):
        prev_f = files[i]
        next_f = files[i + 1]

        prev_h = imagehash.average_hash(Image.open(prev_f))
        next_h = imagehash.average_hash(Image.open(next_f))

        # keep if it's similar
        if next_h == prev_h:
            stack.append(prev_f)
            stack.append(next_f)
        else:
            pass
        
    return stack


def create_blocks(stack: List[str]) -> List[List[int]]:
    """create continguous blocks of similar frames.
    """
    MAX_FRAME_DIFF = 5

    blocks = []
    block = []

    for i in range(len(stack) - 1):
        p_idx = frame(stack[i])
        n_idx = frame(stack[i + 1])

        if (n_idx - p_idx) < MAX_FRAME_DIFF:
            block.append(p_idx)
            block.append(n_idx)
        else:
            blocks.append(block)
            block = []
            
    return blocks

In [28]:
files = sorted(glob('cropped_static/*'))
files[:5]

['cropped_static/00084.00000.jpg',
 'cropped_static/00088.00001.jpg',
 'cropped_static/00092.00001.jpg',
 'cropped_static/00096.00001.jpg',
 'cropped_static/00100.00001.jpg']

In [29]:
stack = remove_outliers(files)

In [30]:
stack[:5]

['cropped_static/00084.00000.jpg',
 'cropped_static/00088.00001.jpg',
 'cropped_static/00100.00001.jpg',
 'cropped_static/00104.00001.jpg',
 'cropped_static/00108.00001.jpg']

In [31]:
blocks = create_blocks(stack)

In [32]:
for block in blocks:
    block = set(block)
    if len(block) > 10:
        print(block)

{128, 100, 132, 104, 136, 108, 140, 112, 144, 156, 116, 148, 120, 124, 152}
{256, 384, 260, 388, 264, 392, 268, 396, 272, 400, 276, 404, 280, 408, 284, 412, 288, 416, 164, 292, 420, 168, 296, 424, 172, 300, 428, 176, 304, 432, 180, 308, 184, 312, 188, 316, 192, 320, 196, 324, 200, 328, 204, 332, 208, 336, 212, 340, 216, 344, 380, 220, 348, 224, 352, 228, 356, 376, 232, 360, 236, 364, 240, 368, 244, 372, 248, 252}
{640, 644, 648, 652, 656, 660, 664, 668, 672, 676, 680, 684, 688, 692, 696, 700, 576, 704, 580, 708, 584, 712, 588, 716, 592, 596, 600, 604, 608, 612, 616, 620, 624, 628, 632, 636}
{792, 796, 800, 804, 808, 812, 816, 820, 824, 828, 832, 836, 840, 844, 848, 852, 856, 860, 864, 868, 872}
{1024, 1028, 1032, 1036, 1040, 1044, 1048, 1052, 1056, 1060, 1064, 1068, 1072, 1076, 1080, 972, 976, 980, 984, 988, 992, 996, 1000, 1004, 1008, 1012, 1016, 1020}
{1152, 1156, 1160, 1164, 1168, 1172, 1176, 1180, 1184, 1188, 1192, 1196, 1200, 1204, 1208, 1212, 1216, 1220, 1224, 1228, 1232, 1236, 1