In [3]:
import os
import random
import shutil

from glob import glob


def sample_videos(input_dir, output_dir, sample_size=100):
    # Verify input directory exists
    if not os.path.exists(input_dir):
        raise FileNotFoundError(f"The input directory '{input_dir}' does not exist.")

    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # List all files in the input directory
    all_files = os.listdir(input_dir)

    # Filter only video files (assuming common video extensions)
    video_files = glob(input_dir + "/*/*/*.mp4")

    # Check if there are enough videos to sample
    if len(video_files) < sample_size:
        raise ValueError(
            f"Not enough videos to sample. Found {len(video_files)} videos."
        )

    # Randomly sample the video files
    sampled_videos = random.sample(video_files, sample_size)

    # Copy sampled videos to the output directory
    for src in sampled_videos:
        dst = os.path.join(output_dir, os.path.basename(src))
        shutil.copy2(src, dst)

    print(f"Successfully copied {sample_size} videos to '{output_dir}'.")


# Example usage
input_directory = (
    "/mnt/arc/levlevi/nba-positions-videos-dataset/nba-plus-statvu-dataset/clips"
)
output_directory = "/mnt/arc/levlevi/nba-positions-videos-dataset/scene-filtering/yolo_ft_dataset/clips"
sample_videos(input_directory, output_directory)

Successfully copied 100 videos to '/mnt/arc/levlevi/nba-positions-videos-dataset/scene-filtering/yolo_ft_dataset/clips'.


In [2]:
import os
import subprocess


def get_video_files(directory):
    video_extensions = {".mp4", ".mkv", ".flv", ".avi", ".mov", ".wmv"}
    return [
        os.path.join(directory, f)
        for f in os.listdir(directory)
        if os.path.splitext(f)[1].lower() in video_extensions
    ]


def create_file_list(video_files, list_file):
    with open(list_file, "w") as f:
        for video in video_files:
            f.write(f"file '{video}'\n")


def concatenate_videos(directory, output_file):
    video_files = get_video_files(directory)
    if not video_files:
        print("No video files found in the directory.")
        return

    list_file = "file_list.txt"
    create_file_list(video_files, list_file)

    ffmpeg_command = [
        "ffmpeg",
        "-f",
        "concat",
        "-safe",
        "0",
        "-i",
        list_file,
        "-c",
        "copy",
        output_file,
    ]

    try:
        subprocess.run(ffmpeg_command, check=True)
        print(f"Videos have been concatenated into {output_file}.")
    except subprocess.CalledProcessError as e:
        print(f"An error occurred while concatenating videos: {e}")
    finally:
        if os.path.exists(list_file):
            os.remove(list_file)

directory = "/mnt/arc/levlevi/nba-positions-videos-dataset/scene-filtering/yolo_ft_dataset/clips"
output_file = "scene-parse-data.mp4"
concatenate_videos(directory, output_file)

ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

Videos have been concatenated into scene-parse-data.mp4.
