In [1]:
import logging
from pathlib import Path
import subprocess

from pytube import YouTube

from lioness.nodes.project_logging import default_logging

default_logging()

logging.getLogger('pytube').setLevel(logging.INFO)
logger = logging.getLogger(__name__)

BASE_DIRECTORY = Path('../data')
VIDEO_DIRECTORY = BASE_DIRECTORY / 'videos'
AUDIO_DIRECTORY = BASE_DIRECTORY / 'audios'
COMBINED_DIRECTORY = BASE_DIRECTORY / 'combined'

# Create directories if they do not exist
VIDEO_DIRECTORY.mkdir(parents=True, exist_ok=True)
AUDIO_DIRECTORY.mkdir(parents=True, exist_ok=True)
COMBINED_DIRECTORY.mkdir(parents=True, exist_ok=True)

video_url = 'https://youtu.be/zP1fojwJZIc?si=9mnpmFewtJvi4T0n'

def download_and_merge(video_url):
    video = YouTube(video_url)

    # Select the highest resolution video stream with progressive=False
    video_stream = video.streams.filter(progressive=False, file_extension='mp4', type='video').order_by('resolution').desc().first()
    # Select the highest quality audio stream
    audio_stream = video.streams.filter(progressive=False, file_extension='mp4', type='audio').order_by('abr').desc().first()

    # Download video and audio streams
    video_filename = video_stream.default_filename
    audio_filename = video_stream.default_filename.replace('.mp4', '.mp3')

    video_file = VIDEO_DIRECTORY / video_filename
    audio_file = AUDIO_DIRECTORY / audio_filename

    video_stream.download(output_path=VIDEO_DIRECTORY, filename=video_filename)
    audio_stream.download(output_path=AUDIO_DIRECTORY, filename=audio_filename)

    logger.info(f'Downloaded video: {video_file}')
    logger.info(f'Downloaded audio: {audio_file}')

    # Define the output file path
    output_file = COMBINED_DIRECTORY / video_filename

    # Merge video and audio using ffmpeg
    ffmpeg_command = [
        'ffmpeg', '-i', str(video_file), '-i', str(audio_file), '-c:v', 'copy', '-c:a', 'aac', '-strict', 'experimental', str(output_file)
    ]
    subprocess.run(ffmpeg_command)

    logger.info(f'Final video saved to: {output_file}')

In [2]:
download_and_merge(video_url=video_url)

2024-06-17 15:31:39,733 pytube.helpers - DEBUG:matched regex search: (?:v=|\/)([0-9A-Za-z_-]{11}).*
2024-06-17 15:31:40,917 pytube.extract - DEBUG:applying descrambler
2024-06-17 15:31:40,918 pytube.extract - DEBUG:finding initial function name
2024-06-17 15:31:40,920 pytube.extract - DEBUG:Pattern failed: ytplayer\.config\s*=\s*
2024-06-17 15:31:40,920 pytube.extract - DEBUG:Could not parse object.
2024-06-17 15:31:40,945 pytube.extract - DEBUG:finished regex search, matched: (/s/player/[\w\d]+/[\w\d_/.]+/base\.js)
2024-06-17 15:31:42,347 pytube.cipher - DEBUG:finding initial function name
2024-06-17 15:31:42,542 pytube.cipher - DEBUG:finished regex search, matched: (?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)
2024-06-17 15:31:42,542 pytube.cipher - DEBUG:getting transform plan
2024-06-17 15:31:42,543 pytube.helpers - DEBUG:matched regex search: zPa=function\(\w\){[a-z=\.\(\"\)]*;(.*);(?:.+)}
2024-06-17 15:31:42,544 pytube.cipher - DEBUG:getting 

ffmpeg version 5.1.5-0+deb12u1 Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 12 (Debian 12.2.0-14)
  configuration: --prefix=/usr --extra-version=0+deb12u1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libglslang --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librist --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtheora --enable-libtwolame --enable-libvidstab --enab

2024-06-17 15:47:57,969 __main__ - INFO:Final video saved to: ../data/combined/FULL FIGHT  Dmitry Bivol vs Joe Smith Jr.mp4


frame=78210 fps=1369 q=-1.0 Lsize= 1324007kB time=00:52:08.44 bitrate=3467.0kbits/s speed=54.8x    
video:1271841kB audio:49649kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.190410%
[aac @ 0x56468fa96440] Qavg: 719.872
