## About
This is a notebook for explanation of how I extracted frames from train videos (see: https://www.kaggle.com/c/nfl-impact-detection/discussion/201502)

Actualy I extracted frames in my local enviroment and uploaded them because of the limitation of Kaggle Notebooks outputs (20GB).

In [None]:
import os
import sys
from pathlib import Path

import cv2
import numpy as np
import pandas as pd
from joblib import Parallel, delayed

In [None]:
ROOT = Path.cwd().parent
INPUT = ROOT / "input"
DATA = INPUT / "nfl-impact-detection"
TRAIN_VIDEOS = DATA / "train"

WORK = ROOT / "working"

# # save frames out of /kaggle/working/ because of the HDD limitation
TMP = ROOT / "tmp"
TRAIN_EXTRACTED_FRAMES = TMP / "nfl-impact-detection-train-frames"
TRAIN_EXTRACTED_FRAMES.mkdir(parents=True)

In [None]:
TRAIN_EXTRACTED_FRAMES

In [None]:
for video_path in sorted(TRAIN_VIDEOS.iterdir()):
    print(video_path.name)

## Extract frames

In [None]:
def extract_frames_from_video(video_path: Path, out_root: Path):
    """Extract frames from one video"""
    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        return
    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    digit = len(str(int(n_frames)))
    
    base_name = video_path.stem
    out_dir_path = out_root / base_name
    out_dir_path.mkdir(exist_ok=True)  # make directory for video
    print(f"[video id: {base_name}] n_frames {n_frames}")

    ret, frame = cap.read()
    frame_id = 1  # frame index starts from 1.
    while ret:
        frame_path = out_dir_path / "{}_{}.png".format(base_name, str(frame_id).zfill(digit))
        _ = cv2.imwrite(str(frame_path), frame)
        ret, frame = cap.read()
        frame_id += 1

In [None]:
video_paths = sorted(TRAIN_VIDEOS.iterdir())
_ = Parallel(n_jobs=4, verbose=10)(  # We can use 4 CPU cores in CPU Notebooks.
    [delayed(extract_frames_from_video)(v_path, TRAIN_EXTRACTED_FRAMES) for v_path in video_paths])

In [None]:
for video_dir in sorted(TRAIN_EXTRACTED_FRAMES.iterdir()):
    print("video: {}.mp4,\tn_frames: {}".format(video_dir.name, len(list(video_dir.iterdir()))))

## create meta data csv

In [None]:
meta_info_list = []
for v_dir in sorted(TRAIN_EXTRACTED_FRAMES.iterdir()):
    v_id = v_dir.name
    for f_path in sorted(v_dir.iterdir()):
        f_name = f_path.name
        frame_id = int(f_path.stem.split("_")[-1])
        meta_info_list.append([v_id + ".mp4", frame_id, v_id, f_name])
        
meta_info_df = pd.DataFrame(
    meta_info_list,
    columns=["video", "frame", "video_id", "frame_name"])
del meta_info_list

In [None]:
meta_info_df.head()

In [None]:
meta_info_df.to_csv(TRAIN_EXTRACTED_FRAMES / "train_frames.csv", index=False)

In [None]:
meta_info_df.to_csv(WORK / "train_frames.csv", index=False)  # for checking