The dataset contains images with videos from different angles.

In this part we will get some frames for each video in the folders.

In [3]:
import ffmpeg
import os
import subprocess
from multiprocessing import Pool, cpu_count
from tqdm import tqdm

FRAMES_PER_SECOND = 1
MAX_SECONDS = 25

In [4]:
def extract_frames(video_path):
    print("Extracting frames from video", video_path)
    video_folder = os.path.dirname(video_path)

    dish_id = os.path.basename(video_folder)

    video_name = os.path.splitext(os.path.basename(video_path))[
        0].split('_')[1]

    camera_label = ""
    if (video_name):
        camera_label = video_name

    output_folder = os.path.join(video_folder, 'frames')

    os.makedirs(output_folder, exist_ok=True)

    cmd = [
        "ffmpeg",
        "-hwaccel", "cuda",

        "-i", video_path,
        "-vf", f"fps={FRAMES_PER_SECOND},scale=1280:720",
        "-frames:v", str(MAX_SECONDS),
        "-threads", "4",

        "-q:v", "2",
        os.path.join(output_folder, f"{dish_id}_{camera_label}_%04d.jpg")
    ]

    subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

In [5]:
def get_all_video_paths(root_dir):
    """Get all video paths in the root directory."""
    video_paths = []
    for root, dirs, files in os.walk(root_dir):
        for file in files:
            if file.endswith('.h264'):
                video_paths.append(os.path.join(root, file))
    return video_paths

In [6]:

root_dir = '../nutrition5k_dataset/imagery/side_angles'

video_paths = get_all_video_paths(root_dir)

print(video_paths[:5])

['../nutrition5k_dataset/imagery/side_angles/dish_1551373647/camera_B.h264', '../nutrition5k_dataset/imagery/side_angles/dish_1551373647/camera_A.h264', '../nutrition5k_dataset/imagery/side_angles/dish_1551373647/camera_C.h264', '../nutrition5k_dataset/imagery/side_angles/dish_1551373647/camera_D.h264', '../nutrition5k_dataset/imagery/side_angles/dish_1551374189/camera_B.h264']


In [7]:
def extract_then_delete_videos(video_paths):
    """Parse the video and delete it."""
    with Pool(cpu_count()) as pool:
        for _ in tqdm(
            pool.imap(extract_frames, video_paths),
            total=len(video_paths),
            desc="Extracting frames",
            unit="video"
        ):
            pass

In [13]:
for video_path in video_paths:
    print(video_path)
    try:
        os.remove(video_path)
    except Exception as e:
        print(e)

../nutrition5k_dataset/imagery/side_angles/dish_1551373647/camera_B.h264
[Errno 2] No such file or directory: '../nutrition5k_dataset/imagery/side_angles/dish_1551373647/camera_B.h264'
../nutrition5k_dataset/imagery/side_angles/dish_1551373647/camera_A.h264
../nutrition5k_dataset/imagery/side_angles/dish_1551373647/camera_C.h264
../nutrition5k_dataset/imagery/side_angles/dish_1551373647/camera_D.h264
../nutrition5k_dataset/imagery/side_angles/dish_1551374189/camera_B.h264
../nutrition5k_dataset/imagery/side_angles/dish_1551374189/camera_A.h264
../nutrition5k_dataset/imagery/side_angles/dish_1551374189/camera_C.h264
../nutrition5k_dataset/imagery/side_angles/dish_1551374189/camera_D.h264
../nutrition5k_dataset/imagery/side_angles/dish_1560442450/camera_B.h264
../nutrition5k_dataset/imagery/side_angles/dish_1560442450/camera_A.h264
../nutrition5k_dataset/imagery/side_angles/dish_1560442450/camera_C.h264
../nutrition5k_dataset/imagery/side_angles/dish_1560442450/camera_D.h264
../nutrition