In [7]:
import cv2
import os
from os import listdir
import random
import re
from pytube import YouTube
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
import shutil
import moviepy.editor as mp
import subprocess
import datetime
from yt_dlp import YoutubeDL


def download_video(url, save_path, resolution=None):
    yt = YouTube(url)
    if resolution:
        video = yt.streams.filter(res = resolution).first()
    else:
        video = yt.streams.filter().order_by("resolution").desc().first()

    # Reformat the video name
    video_name = video.default_filename.replace(" ", "").replace("/", "_").replace("-", "_")
    # add current date to the video_name in the format of "YYYY_MM_DD_video_name"
    video_name = datetime.datetime.now().strftime("%Y_%m_%d_") + url.split("v=")[1] + '_' + video_name 
    
    # Split the name and the extension
    name_part, ext_part = os.path.splitext(video_name)

    # Remove non-alphanumeric and non-underscore characters from the name part
    name_part = re.sub(r'\W+', '', name_part)

    # Join the name part and the extension part
    video_name = name_part + ext_part
    video_file_path = os.path.join(save_path, video_name)
    
    # if video does not exist, download it
    if not os.path.isfile(os.path.join(save_path, video_name)):
        print(f'Downloading video {video_name}...')
        video.download(output_path=save_path, filename=video_name)
    else:
        print(f'Video {video_name} already exists.')

    # # If the downloaded video is in WebM format, convert it to MP4 using FFmpeg
    # if ext_part.lower() == '.webm' and not os.path.isfile(os.path.splitext(video_file_path)[0] + '.mp4'):
    #     mp4_output_path = os.path.splitext(video_file_path)[0] + '.mp4'
    #     print("converting")
    #     subprocess.run(['ffmpeg', '-i', video_file_path, '-c:v', 'libx264', '-c:a', 'aac', mp4_output_path], check=True)
    #     os.remove(video_file_path)  # Remove the original WebM file

    #     return mp4_output_path

    return video_file_path

# def download_video(url, save_path, resolution=None):
#     # Define the format string based on the desired resolution
#     format_str = 'bestvideo[ext=mp4]' if resolution is None else f'bestvideo[ext=mp4][height<={resolution}]'

#     ydl_opts = {
#         'format': format_str,
#         'outtmpl': save_path + '/%(title)s.%(ext)s',
#         'postprocessors': [{
#             'key': 'FFmpegVideoConvertor',
#             'preferedformat': 'mp4',  # Ensure the final format is MP4
#         }],
#         # Add any other options you might need here
#     }

#     with YoutubeDL(ydl_opts) as ydl:
#         info_dict = ydl.extract_info(url, download=False)
#         video_title = info_dict.get('title', None)
#         video_id = info_dict.get('id', None)

#         # Clean up the video title to make it filesystem friendly
#         video_title = re.sub(r'[^\w\-_\. ]', '_', video_title)
#         # Add current date to the video_title in the format of "YYYY_MM_DD_video_title"
#         video_title_formatted = datetime.datetime.now().strftime("%Y_%m_%d_") + video_title
#         video_filename = f"{video_title_formatted}_{video_id}.mp4"
#         video_file_path = os.path.join(save_path, video_filename)

#         # Check if file already exists
#         if not os.path.isfile(video_file_path):
#             print(f'Downloading video {video_filename}...')
#             ydl.download([url])
#         else:
#             print(f'Video {video_filename} already exists.')

#     return video_file_path

#### cut downloaded video to specified interval to test model performance ####
def cut_video(video_path, output_path,start_time, end_time, fps):
    # Get the file extension of the input video
    _, file_extension = os.path.splitext(video_path)

    # Load the video clip
    clip = mp.VideoFileClip(video_path).subclip(start_time, end_time)

    # Choose the appropriate codec based on the file extension
    codec = 'libx264' if file_extension == '.mp4' else 'libvpx-vp9'  # For .mp4 use H.264, for others use VP9

    # Write the trimmed video to the output file with the selected codec
    clip.write_videofile(output_path, codec=codec, fps = fps)
    
def extract_frames(video_path, frames_dir, num_frames, total_seconds, start_time, end_time):
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)

    # Calculate the frame indices for the specified start and end times
    start_frame = int(start_time * fps)
    end_frame = int(end_time * fps)

    # Generate a list of all frame indices
    all_frames = list(range(int(total_seconds * fps)))

    # Remove the frame indices that fall into the specified interval
    available_frames = [f for f in all_frames if f < start_frame or f >= end_frame]

    # Randomly select frame indices from the available frames
    frame_indices = random.sample(available_frames, num_frames)

    # Create a separate folder for each video
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    video_frames_dir = os.path.join(frames_dir, video_name)
    os.makedirs(video_frames_dir, exist_ok=True)

    for i, frame_idx in enumerate(frame_indices):
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
        ret, frame = cap.read()

        if not ret:
            print(f"Frame at position {frame_idx} could not be read.")
            continue

        # Save the frame within the video-specific folder
        frame_name = f"frame_{video_name}_{i}.jpg"
        cv2.imwrite(os.path.join(video_frames_dir, frame_name), frame)

    
def process_video(video_url = None, 
                  video_local_path = None, # input a local video
                  clip_video=False,     # whether to save a clipped version (for model testing, et.)
                  save_full_video=False,# whether to save the full video
                  resolution = None,    # format: '1440p', '1080p', '720p' ...
                  video_save_path='./videos_full',
                  clip_save_path='./videos_clipped', 
                  frames_dir='./yt_frames',  # directory to store video frames (for model training)
                  num_frames=20,        # number of frames to extract
                  interval_length=8):

    # Download video
    print("downloading video")
    video_path = video_local_path if video_local_path else download_video(video_url, video_save_path, resolution) 
    

    # Get video properties
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    total_seconds = int(total_frames / cap.get(cv2.CAP_PROP_FPS))
    fps = round(cap.get(cv2.CAP_PROP_FPS))
    
    # Calculate the available duration for the interval
    available_duration = total_seconds - interval_length

    if available_duration <= 0:
        print("Video duration is shorter than the specified interval.")
        return

    # Generate the random start time for the interval
    start_time = random.randint(0, available_duration)

    # Calculate the end time based on the start time and interval length
    end_time = start_time + interval_length
    
    # Extract frames before trimming the video
    print("extracting frames from {} to {}".format(start_time, end_time))
    
    # skip if frames already exist
    if os.path.isdir(os.path.join(frames_dir, os.path.splitext(os.path.basename(video_path))[0])):
        print("frames already exist")
    else:
        extract_frames(video_path, frames_dir, num_frames, total_seconds, start_time, end_time)

    # Clip the video if requested
    if clip_video:
        print("clipping video")
        trimmed_video_path = os.path.join(clip_save_path, f"{os.path.splitext(os.path.basename(video_path))[0]}_trimmed.mp4")
        cut_video(video_path, trimmed_video_path, start_time, end_time, fps)

    # Save the whole video if requested
    if not save_full_video and not video_local_path:
        os.remove(video_path)

def process_video_list(video_list, 
                       processed_history,
                       clip_video=False,     # whether to save a clipped version (for model testing, et.)
                        save_full_video=False,# whether to save the full video
                        resolution = None,    # format: '1440p', '1080p', '720p' ...
                        video_save_path='videos/videos_full',
                        frames_dir='training_images/yt_frames',  # directory to store video frames (for model training)
                        num_frames=30,        # number of frames to extract
                        ):
    # processed_history: a txt file that stores the video links that have been processed
    with open(processed_history, 'r') as f:
        processed = f.readlines()
    processed = [x.strip() for x in processed]
    for video in video_list:
        if video not in processed:
            print("processing video {}".format(video))
            process_video(video_url=video, 
                          clip_video=clip_video, 
                          save_full_video=save_full_video,
                          resolution=resolution,
                          video_save_path=video_save_path,
                          frames_dir=frames_dir, 
                          num_frames=num_frames)
            with open(processed_history, 'a') as f:
                f.write(video + '\n')
        else:
            print("video {} already processed".format(video))

## Process YouTube videos

In [8]:
urls = [
    'https://www.youtube.com/watch?v=Lx0HRpNbdeo',
    'https://www.youtube.com/watch?v=tPesRuo9YxA&t=53s',
    #'https://www.youtube.com/watch?v=jM9CfUbroAg',
    'https://www.youtube.com/watch?v=OR9-LPl8QDc'
]

len(urls)

3

In [9]:
process_video_list(urls,
                   processed_history="processed_yt_links.txt",
                    clip_video=False,     # whether to save a clipped version (for model testing, et.)
                    save_full_video=True,# whether to save the full video
                    resolution = None,    # format: '1440p', '1080p', '720p' ...
                    video_save_path='videos',
                    frames_dir='training_images/yt_frames',  # directory to store video frames (for model training)
                    num_frames=30,  )
                   
                   

processing video https://www.youtube.com/watch?v=Lx0HRpNbdeo
downloading video
Downloading video 2023_11_15_id_Lx0HRpNbdeo.mp4...
extracting frames from 243 to 251
processing video https://www.youtube.com/watch?v=tPesRuo9YxA&t=53s
downloading video
Downloading video 2023_11_15_id_tPesRuo9YxAt53s.mp4...
extracting frames from 237 to 245
processing video https://www.youtube.com/watch?v=OR9-LPl8QDc
downloading video
Downloading video 2023_11_15_id_OR9LPl8QDc.mp4...
extracting frames from 326 to 334


## Process a local video

In [None]:
video_path = "/Users/oscarwan/bballDetection/videos/videos_full/GX010004_1080p.mp4"
process_video(video_local_path = video_path, 
              frames_dir = "/Users/oscarwan/bballDetection/training_images/yt_frames",
              )

downloading video
fps  60
extracting frames from 469 to 477


In [None]:
download_video("https://www.youtube.com/watch?v=-mhfin6NvIM&ab_channel=JonathanYuen", resolution="1080p48", save_path = ".")

AttributeError: 'NoneType' object has no attribute 'default_filename'

In [None]:
url = "https://www.youtube.com/watch?v=-mhfin6NvIM&ab_channel=JonathanYuen"
yt = YouTube(url)
video = yt.streams.filter(mime_type="video/mp4").order_by("resolution").desc().first()

In [None]:
video.download()

'/Users/oscarwan/GitHub/court-vision/20230724 - Game 5.mp4'

In [8]:
urls = [
        #"https://www.youtube.com/watch?v=Lx0HRpNbdeo",
        # "https://www.youtube.com/watch?v=zYCbImvhdEg", # very long
        # "https://www.youtube.com/watch?v=sLci5Ft6WC0&t=38s",
        # "https://www.youtube.com/watch?v=rxwJHq34BJk&list=PLqHiuEtARPJiQkMFP9vnAW05eSspQniSi&index=11",
       # "https://www.youtube.com/watch?v=3Jb9LnTs_bQ",
        "https://www.youtube.com/watch?v=bk2bhXLx3RU"
        ]


for url in urls:
        download_video(url, save_path = "videos_full")

Downloading video 2023_12_16_bk2bhXLx3RU_Episode2LongfellowLightPickupBasketball2v2FeaturingKyleBenPatNed.webm...


In [9]:
def get_video_duration(video_path):
    """Get the duration of the video in seconds."""
    try:
        result = subprocess.run(
            ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', 
             '-of', 'default=noprint_wrappers=1:nokey=1', video_path],
            stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        return float(result.stdout)
    except Exception as e:
        print(f"Error getting duration of {video_path}: {e}")
        return 0

def trim_video(video_path, interval=300):  # 300 seconds = 5 minutes
    duration = get_video_duration(video_path)
    if duration == 0:
        return

    # Calculate start time
    start_time = 0 if duration <= interval else random.randint(0, int(duration) - interval)

    # Temporary output file name
    temp_output = "temp_" + os.path.basename(video_path)

    # ffmpeg command to trim the video
    command = [
        'ffmpeg',
        '-ss', str(start_time),
        '-i', video_path,
        '-t', str(interval),
        '-c', 'copy',
        temp_output
    ]

    # Execute the command
    subprocess.run(command)

    # Replace the original file with the trimmed file
    os.replace(temp_output, video_path)



In [11]:
for videos in os.listdir("videos_full"):
    trim_video(os.path.join("videos_full", videos), interval=180)

ffmpeg version 6.0 Copyright (c) 2000-2023 the FFmpeg developers
  built with Apple clang version 14.0.3 (clang-1403.0.22.14.1)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/6.0 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags= --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --enable-libzmq --enable-libzimg --disable-libjack --di