# Extract and crop the interpreter

In [None]:
# Interpreter box
X, Y = 571, 208
W, H = 180, 193
ROOT_DIR = 'scraped'

## Get frame indexes between which the interpreter is present

In [None]:
import json
import os
from cv2 import VideoCapture
import face_recognition
import numpy as np

from utils.video import get_vid_metadata, get_frame


known_faces = []
known_people = []
for file_name in os.listdir('known_people'):
    img = face_recognition.load_image_file(os.path.join('known_people', file_name))
    enc = face_recognition.face_encodings(img)[0]
    known_faces.append(enc)
    known_people.append(file_name.replace('.png', ''))


def is_known(idx: int, cap: VideoCapture):
    """Wether we recognize one person in the interpreter box or not"""
    frame = get_frame(idx, cap)[Y : Y + H, X : X + W]
    encodings = face_recognition.face_encodings(frame)
    for enc in encodings:
        matches = face_recognition.compare_faces(known_faces, enc)
        if any(matches):
            return known_people[np.argmax(matches)]
    return None


# false, ..., false, true, ..., true, false, ..., false
def bsearch(l: int, r: int, predicate):
    """Binary search on a sequence like [false, ..., false, true, ..., true]. Returns index of first true.
    Helps optimally search for the start and end indexes where the interpreter is present"""
    res = r
    while l <= r:
        m = (l + r) // 2
        if predicate(m):
            res = m
            r = m - 1
        else:
            l = m + 1  # noqa: E741
    return res


def do_segment(path: str):
    cap, fps, frame_count = get_vid_metadata(path)
    stride = (
        fps * 60 * 15
    )  # 30 min of SL at least (required by law) but let's search in chunks of 15 mins so we don't jump over if a tiny bit less than 30 mins
    true_idx = -1
    interpreter = None
    for i in range(0, frame_count, int(stride)):
        interpreter = is_known(i, cap)
        if interpreter:
            true_idx = i
            break
    if true_idx == -1:
        return None

    start = bsearch(0, true_idx, lambda x: is_known(x, cap))
    first_false = bsearch(true_idx, frame_count - 1, lambda x: not is_known(x, cap))

    if first_false < frame_count and not is_known(first_false, cap):
        end = first_false - 1
    else:
        end = frame_count - 1

    return start, end, interpreter


for root, _, files in os.walk(ROOT_DIR):
    for file in files:
        if not file.endswith('.mp4'):
            continue
        vid = os.path.join(root, file)
        seg = vid.replace('.mp4', '.json')
        if os.path.exists(seg):
            print(f'Skipping {seg}')
            continue
        segment = do_segment(vid)
        if not segment:
            raise ValueError(vid)
        with open(seg, 'w') as f:
            json.dump(
                {'start': segment[0], 'end': segment[1], 'interpreter': segment[2]}, f
            )
        print(seg)

## Some stats (no. hours and fps)

In [None]:
import cv2  # noqa: F811

fps = set()

sum = 0

for root, _, files in os.walk(ROOT_DIR):
    for file in files:
        if not file.endswith('.json'):
            continue
        with open(os.path.join(root, file)) as f:
            j = json.load(f)
            sum += j['end'] - j['start']
        # cap = cv2.VideoCapture(os.path.join(root, file.replace('.json', '.mp4')))
        # fps.add(cap.get(cv2.CAP_PROP_FPS))
        # cap.release()

print(sum / 25 / 60 / 60)
print(fps)

283.1301
set()


## Random viz frames

In [None]:
import os
import json
import random
import cv2  # noqa: F811

X, Y = 571, 208
W, H = 180, 193
WINDOW_NAME = 'Viz'


def collect_segments(root=ROOT_DIR):
    segments = []
    for r, _, files in os.walk(root):
        for f in files:
            if not f.endswith('.mp4'):
                continue
            vid_path = os.path.join(r, f)
            json_path = vid_path.replace('.mp4', '.json')
            if not os.path.exists(json_path):
                continue

            try:
                with open(json_path) as jf:
                    meta = json.load(jf)
                if {'start', 'end', 'interpreter'} <= meta.keys():
                    segments.append((vid_path, meta))
                else:
                    print(f'Failed to load {json_path}')
            except Exception as e:
                print(f'Failed to load {json_path}: {e}')
    return segments


def pick_frame_within_segment(cap, meta):
    start = meta['start']
    end = meta['end']

    idx = random.randint(start, end)
    cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
    ok, frame = cap.read()
    if not ok:
        return None, None

    return idx, frame


def crop_with_bbox(frame, x, y, w, h):
    H_img, W_img = frame.shape[:2]
    x1 = max(0, x)
    y1 = max(0, y)
    x2 = min(W_img, x + w)
    y2 = min(H_img, y + h)
    if x1 >= x2 or y1 >= y2:
        return None
    return frame[y1:y2, x1:x2].copy()


def draw_3x3_grid(img):
    """
    Draw a 3x3 grid (like a camera overlay) on img in-place.
    """
    h, w = img.shape[:2]
    third_w = w // 3
    third_h = h // 3

    # vertical lines
    for i in range(1, 3):
        x = i * third_w
        cv2.line(img, (x, 0), (x, h - 1), (0, 255, 0), 1)

    # horizontal lines
    for i in range(1, 3):
        y = i * third_h
        cv2.line(img, (0, y), (w - 1, y), (0, 255, 0), 1)


segments = collect_segments(ROOT_DIR)
if not segments:
    print('No (mp4, json) pairs found with valid metadata.')
else:
    print('Press any key to show a random cropped frame.')
    print("Press 'q' or ESC to quit.")

    cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)

    while True:
        vid_path, meta = random.choice(segments)

        cap = cv2.VideoCapture(vid_path)
        if not cap.isOpened():
            print(f'Could not open video: {vid_path}')
            cap.release()
            continue

        frame_idx, frame = pick_frame_within_segment(cap, meta)
        cap.release()

        if frame is None:
            print(f'Could not grab frame for {vid_path}')
            continue

        crop = crop_with_bbox(frame, X, Y, W, H)
        if crop is None:
            print(f'BBox out of range for video: {vid_path}')
            continue

        draw_3x3_grid(crop)

        # print info to console
        print('=' * 60)
        print(f'Video       : {vid_path}')
        print(f'Interpreter : {meta.get("interpreter")}')
        print(f'Frame index : {frame_idx}')

        # show the cropped frame
        cv2.imshow(WINDOW_NAME, crop)

        key = cv2.waitKey(0) & 0xFF
        if key in (ord('q'), 27):  # 'q' or ESC
            break

    cv2.destroyAllWindows()

## Crop segments with ffmpeg -- lossless quality

In [None]:
import os
import json
import subprocess

FPS = 25
x, y = 571, 208
width, height = 180, 193


def crop_segment(vid_path: str, start_frame: int, end_frame: int):
    output_path = vid_path.replace('.mp4', '.seg.mp4')

    start_sec = start_frame / FPS
    duration_sec = (end_frame - start_frame + 1) / FPS

    crop_filter = f'crop={width}:{height}:{x}:{y}'

    quiet_opts = ['-hide_banner', '-loglevel', 'error']

    subprocess.run(
        [
            'ffmpeg',
            *quiet_opts,
            '-y',  # overwrite
            '-hwaccel',
            'cuda',
            '-ss',
            f'{start_sec:.6f}',
            '-i',
            vid_path,
            '-t',
            f'{duration_sec:.6f}',
            '-vf',
            crop_filter,
            '-c:v',
            'h264_nvenc',
            '-rc',
            'constqp',
            '-qp',
            '0',
            '-preset',
            'p1',
            '-c:a',
            'copy',
            output_path,
        ],
        check=True,
        stdout=subprocess.DEVNULL,
    )


for root, _, files in os.walk(ROOT_DIR):
    for file in files:
        if not file.endswith('.json'):
            continue

        json_path = os.path.join(root, file)
        vid_path = json_path.replace('.json', '.mp4')

        if os.path.exists(json_path.replace('.json', '.seg.mp4')):
            print(f'Skipping {vid_path}')
            continue

        with open(json_path) as f:
            segment = json.load(f)

        crop_segment(vid_path, segment['start'], segment['end'])
        print(vid_path)

Skipping ./2025/05/05/62540739-2.mp4
Skipping ./2025/05/29/62547811-2.mp4
Skipping ./2025/05/10/62542520-2.mp4
Skipping ./2025/05/15/62543845-2.mp4
Skipping ./2025/05/31/62548235-2.mp4
Skipping ./2025/05/23/62546036-2.mp4
Skipping ./2025/05/13/62543055-2.mp4
Skipping ./2025/05/18/62544587-2.mp4
Skipping ./2025/05/12/62542841-2.mp4
Skipping ./2025/05/24/62546375-2.mp4
Skipping ./2025/05/20/62545135-2.mp4
Skipping ./2025/05/09/62542153-2.mp4
Skipping ./2025/05/16/62544131-2.mp4
Skipping ./2025/05/14/62543477-2.mp4
Skipping ./2025/05/27/62547169-2.mp4
Skipping ./2025/05/06/62540996-2.mp4
Skipping ./2025/05/02/62540043-2.mp4
Skipping ./2025/05/08/62541789-2.mp4
Skipping ./2025/05/04/62540479-2.mp4
Skipping ./2025/05/04/62540490-2.mp4
Skipping ./2025/05/28/62547446-2.mp4
Skipping ./2025/05/30/62548066-2.mp4
Skipping ./2025/05/17/62544360-2.mp4
Skipping ./2025/05/07/62541413-2.mp4
Skipping ./2025/05/19/62544826-2.mp4
Skipping ./2025/10/05/62580344-2.mp4
Skipping ./2025/10/22/62584862-2.mp4
S