In [1]:
from scenedetect import detect, AdaptiveDetector, split_video_ffmpeg, ContentDetector

ex_vid_fp = "/mnt/arc/levlevi/nba-positions-videos-dataset/nba-plus-statvu-dataset/clips/707/period1/707_period1_2+_76423285.mp4"
scene_list = detect(ex_vid_fp, AdaptiveDetector())

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import json

annotation_ex = "/mnt/arc/levlevi/nba-positions-videos-dataset/nba-plus-statvu-dataset/clip-annotations/17600/period1/17600_period1_1-_77131343_annotation.json"
with open(annotation_ex, 'r') as f:
    data = json.load(f)

In [3]:
unique_keys = set()
for item in data['frames']:
    # print(item)
    frame_id = item["frame_id"]
    if frame_id not in unique_keys:
        unique_keys.add(frame_id)

In [4]:
len(data['frames'][0]['bbox'])

5

In [5]:
# for each clip
    # 1. do we have an annotation file?
    # 2. parse clip into [segmented_clip]
    # for each segmented_clip:
        # a. longer than 2s?
        # b. are bbxs present?
        # c. avg # bbxs < 3 ?
        # if all criteria are met:
            # create new segmented_clip and annotation file using simple subset of og files

In [6]:
import os
from glob import glob

all_clip_file_paths = glob(
    "/mnt/arc/levlevi/nba-positions-videos-dataset/nba-plus-statvu-dataset/clips" + '/*/*/*.mp4'
)
all_annotations_paths = glob(
    "/mnt/arc/levlevi/nba-positions-videos-dataset/nba-plus-statvu-dataset/clip-annotations"
    + "/*/*/*.json"
)
all_annotation_basenames = set(list(
    os.path.basename(fp).replace('.json', '.mp4').replace('_annotation', '') for fp in all_annotations_paths
))

In [7]:
# all clips to process
clips_w_ann_file_paths = [fp for fp in all_clip_file_paths if os.path.basename(fp) in all_annotation_basenames]

In [35]:
import cv2
from scenedetect import detect
from statistics import mean
from typing import List, Tuple
from scenedetect.frame_timecode import FrameTimecode


MIN_SCENE_LEN = 2 * 30
MIN_NUM_BBXS = 3
THRESHOLD = 30

detector = ContentDetector(threshold=THRESHOLD)

def parse_scene(video_fp: str) -> List[Tuple]:
    
    def add_frame_length_info(interval):
        def parse_frame_info(frame_info):
            return int(frame_info)
        def calculate_length_in_frames(start_info, end_info):
            start_frame = parse_frame_info(start_info)
            end_frame = parse_frame_info(end_info)
            return end_frame - start_frame
        start_info, end_info = interval
        length_in_frames = calculate_length_in_frames(start_info, end_info)
            # Return the original tuple with the frame length appended
        return (start_info, end_info, length_in_frames)
    
    scene_list = detect(video_fp, detector)
    return [add_frame_length_info(interval) for interval in scene_list]


def filter_scenes(video_fp: str, scenes: List[Tuple]):
    """
    Return scenes that are:
        1. 2+ sec. in length
        2. contain an avg. of 3+ bbxs
    """

    # look up annotation fp
    annotation_fp = video_fp.replace("clips", "clip-annotations").replace(
        ".mp4", "_annotation.json"
    )
    # load data
    with open(annotation_fp, 'r') as f:
        data = json.load(f)

    # find all frames w/ bbxs
    unique_keys = set()
    for item in data["frames"]:
        frame_id = item["frame_id"]
        if frame_id not in unique_keys:
            unique_keys.add(int(frame_id))

    # num frames to parse
    final_frame = int(cv2.VideoCapture(video_fp).get(cv2.CAP_PROP_FRAME_COUNT))
    # count bbxs
    # [# bbxs]
    num_bbxs = []
    for frame_idx in range(final_frame):
        if frame_idx not in unique_keys:
            num_bbxs.append(0)
        else:
            num_bbx_tmp = len(data["frames"][frame_idx]["bbox"])
            num_bbxs.append(num_bbx_tmp)

    # edge case
    if len(scenes) == 0:
        if mean(num_bbxs) < MIN_NUM_BBXS:
            return []
        else:
            return [(FrameTimecode(0, fps=30), FrameTimecode(final_frame, fps=30), final_frame)]

    filtered_scenes = []
    # filter scenes
    for scene in scenes:
        # 1. longer than 2s?
        scene_start = scene[0].frame_num
        scene_end = scene[1].frame_num
        if scene_end - scene_start < MIN_SCENE_LEN:
            continue
        # 2. avg # bbxs < 3?
        if mean(num_bbxs[scene_start: scene_end]) < MIN_NUM_BBXS:
            continue
        filtered_scenes.append(scene)

    return filtered_scenes

In [37]:
clip_ex = clips_w_ann_file_paths[1]
scenes = parse_scene(clip_ex)
filtered_scenes = filter_scenes(clip_ex, scenes)
filtered_scenes

[(00:00:00.000 [frame=0, fps=30.000],
  00:00:11.067 [frame=332, fps=30.000],
  332)]

In [10]:
clip_ex = clips_w_ann_file_paths[0]
scenes = parse_scene(clip_ex)
scenes

[]