In [None]:
import os
import csv
import subprocess
import librosa
import matplotlib.pyplot as plt
import numpy as np
import cv2
from moviepy import VideoFileClip
from yt_dlp import YoutubeDL

csv_file = "./vggsound.csv"
output_dir = "./dataset"
video_dir = os.path.join(output_dir, "videos")
frame_dir = os.path.join(output_dir, "images")
spec_dir = os.path.join(output_dir, "specs")
duration = 10
sample_count = 10000
sr = 22050
img_size = 512

# os.makedirs(video_dir, exist_ok=True)
# os.makedirs(frame_dir, exist_ok=True)
# os.makedirs(spec_dir, exist_ok=True)

def download_clip(youtube_id, start_time, duration=10):
    url = f"https://www.youtube.com/watch?v={youtube_id}"
    temp_path = os.path.join(video_dir, f"{youtube_id}_full.mp4")
    clip_path = os.path.join(video_dir, f"{youtube_id}.mp4")

    ydl_opts = {'format': 'mp4', 'outtmpl': temp_path, 'quiet': True}
    with YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])

    with VideoFileClip(temp_path) as video:
        subclip = video.subclipped(start_time, start_time + duration)
        subclip.write_videofile(clip_path, codec="libx264", audio_codec="aac")

    os.remove(temp_path)
    return clip_path

def extract_middle_frame(video_path, out_path):
    cap = cv2.VideoCapture(video_path)
    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.set(cv2.CAP_PROP_POS_FRAMES, total // 2)
    success, frame = cap.read()
    if success:
        frame = cv2.resize(frame, (img_size, img_size))
        cv2.imwrite(out_path, frame)
    cap.release()
    return success

def save_mel_spectrogram(video_path, out_path):
    y, _ = librosa.load(video_path, sr=sr)
    mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    log_mel = librosa.power_to_db(mel, ref=np.max)

    fig = plt.figure(figsize=(5.12, 5.12), dpi=100)
    plt.axis('off')
    plt.imshow(log_mel, cmap='magma', aspect='auto')
    plt.savefig(out_path, bbox_inches='tight', pad_inches=0)
    plt.close(fig)

start_index = 4200
with open(csv_file) as f:
    reader = csv.reader(f)
    for i, row in enumerate(reader):
        if i < start_index:
            continue
        if i >= sample_count:
            break

        try:
            ytid = row[0]
            start = int(float(row[1]))
            label = row[2] if len(row) > 2 else "unknown"

            print(f"Processing {ytid} at row {i}...")

            video_path = download_clip(ytid, start)
            frame_path = os.path.join(frame_dir, f"{ytid}.jpg")
            spec_path = os.path.join(spec_dir, f"{ytid}.png")

            success = extract_middle_frame(video_path, frame_path)
            if success:
                save_mel_spectrogram(video_path, spec_path)
                os.remove(video_path)
                print(f"Done: {ytid}")
            else:
                print(f"Frame extraction failed: {ytid}")
        except Exception as e:
            print(f"Error processing row {i}: {e}")
            continue

Processing 00qdDqodX9U at row 4200...


ERROR: [youtube] 00qdDqodX9U: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


Error processing row 4200: ERROR: [youtube] 00qdDqodX9U: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies
Processing 00tm1g6HAg0 at row 4201...
{'video_found': True, 'audio_found': True, 'metadata': {'major_brand': 'mp42', 'minor_version': '0', 'compatible_brands': 'isommp42', 'creation_time': '2024-03-09T02:56:10.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 360], 'bitrate': 554, 'fps': 29.97002997002997, 'codec_name': 'h264', 'profile': '(Main)', 'metadata': {'Metadata': '', 'creation_time': '2024-03-09T02:56:10.000000Z', 'handler_name': 'ISO Media file produced by Google Inc. Created on

                                                                   

MoviePy - Done.
MoviePy - Writing video ./dataset/videos/00tm1g6HAg0.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready ./dataset/videos/00tm1g6HAg0.mp4


  y, _ = librosa.load(video_path, sr=sr)


Done: 00tm1g6HAg0
Processing 00tm1g6HAg0 at row 4202...


ERROR: [youtube] 00tm1g6HAg0: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


Error processing row 4202: ERROR: [youtube] 00tm1g6HAg0: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies
Processing 00upPhvd6s4 at row 4203...


ERROR: [youtube] 00upPhvd6s4: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


Error processing row 4203: ERROR: [youtube] 00upPhvd6s4: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies
Processing 00wORCOKNHw at row 4204...


ERROR: [youtube] 00wORCOKNHw: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


Error processing row 4204: ERROR: [youtube] 00wORCOKNHw: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies
Processing 00woB5JzpZs at row 4205...
{'video_found': True, 'audio_found': True, 'metadata': {'major_brand': 'mp42', 'minor_version': '0', 'compatible_brands': 'isommp42', 'creation_time': '2024-03-04T07:21:28.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 360], 'bitrate': 144, 'fps': 29.97002997002997, 'codec_name': 'h264', 'profile': '(Main)', 'metadata': {'Metadata': '', 'creation_time': '2024-03-04T07:21:28.000000Z', 'handler_name': 'ISO Media file produced by Google Inc. Created on

                                                                   

MoviePy - Done.
MoviePy - Writing video ./dataset/videos/00woB5JzpZs.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready ./dataset/videos/00woB5JzpZs.mp4
Done: 00woB5JzpZs
Processing 00yZOgawwAo at row 4206...
{'video_found': True, 'audio_found': True, 'metadata': {'major_brand': 'mp42', 'minor_version': '0', 'compatible_brands': 'isommp42', 'creation_time': '2018-11-26T00:04:47.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 360], 'bitrate': 273, 'fps': 30.0, 'codec_name': 'h264', 'profile': '(Main)', 'metadata': {'Metadata': '', 'creation_time': '2018-11-26T00:04:47.000000Z', 'handler_name': 'ISO Media file produced by Google Inc. Created on: 11/25/2018.', 'vendor_id': '[0][0][0][0]'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': 'eng', 'default': True, 'fps': 44100, 'bitrate': 127, 'metadata': {'Metadata': '', 'creation_time': '2018-11-26T00:04:47.000000Z', 'handler_name': 'ISO Media file produced by Google Inc. Created on: 11/25/201

                                                                   

MoviePy - Done.
MoviePy - Writing video ./dataset/videos/00yZOgawwAo.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready ./dataset/videos/00yZOgawwAo.mp4
Done: 00yZOgawwAo
Processing 00yZOgawwAo at row 4207...


ERROR: [youtube] 00yZOgawwAo: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


Error processing row 4207: ERROR: [youtube] 00yZOgawwAo: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies
Processing 00ypc1aPiO4 at row 4208...


ERROR: [youtube] 00ypc1aPiO4: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


Error processing row 4208: ERROR: [youtube] 00ypc1aPiO4: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies
Processing 00ypc1aPiO4 at row 4209...


ERROR: [youtube] 00ypc1aPiO4: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


Error processing row 4209: ERROR: [youtube] 00ypc1aPiO4: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies
Processing 00ypkSbNDhM at row 4210...
{'video_found': True, 'audio_found': True, 'metadata': {'major_brand': 'mp42', 'minor_version': '0', 'compatible_brands': 'isommp42', 'creation_time': '2024-03-10T00:37:56.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 360], 'bitrate': 365, 'fps': 30.0, 'codec_name': 'h264', 'profile': '(Main)', 'metadata': {'Metadata': '', 'creation_time': '2024-03-10T00:37:56.000000Z', 'handler_name': 'ISO Media file produced by Google Inc. Created on: 03/09/2024.

                                                                   

MoviePy - Done.
MoviePy - Writing video ./dataset/videos/00ypkSbNDhM.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready ./dataset/videos/00ypkSbNDhM.mp4
Done: 00ypkSbNDhM
Processing 00ypkSbNDhM at row 4211...


ERROR: [youtube] 00ypkSbNDhM: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


Error processing row 4211: ERROR: [youtube] 00ypkSbNDhM: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies
Processing 00z0M1DJyAw at row 4212...
{'video_found': True, 'audio_found': True, 'metadata': {'major_brand': 'mp42', 'minor_version': '0', 'compatible_brands': 'isommp42', 'encoder': 'Google'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 360], 'bitrate': 638, 'fps': 29.97002997002997, 'codec_name': 'h264', 'profile': '(Main)', 'metadata': {'Metadata': '', 'handler_name': 'ISO Media file produced by Google Inc.', 'vendor_id': '[0][0][0][0]'}}, {'input_number': 0, 'stream_number': 1, 'stream_type

                                                                   

MoviePy - Done.
MoviePy - Writing video ./dataset/videos/00z0M1DJyAw.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready ./dataset/videos/00z0M1DJyAw.mp4
Done: 00z0M1DJyAw
Processing 0102_FBc_70 at row 4213...


ERROR: [youtube] 0102_FBc_70: Video unavailable


Error processing row 4213: ERROR: [youtube] 0102_FBc_70: Video unavailable
Processing 011Z5yjq6wo at row 4214...
{'video_found': True, 'audio_found': True, 'metadata': {'major_brand': 'mp42', 'minor_version': '0', 'compatible_brands': 'isommp42', 'creation_time': '2023-05-17T21:38:54.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 360], 'bitrate': 509, 'fps': 30.0, 'codec_name': 'h264', 'profile': '(Main)', 'metadata': {'Metadata': '', 'creation_time': '2023-05-17T21:38:54.000000Z', 'handler_name': 'ISO Media file produced by Google Inc. Created on: 05/17/2023.', 'vendor_id': '[0][0][0][0]'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 44100, 'bitrate': 127, 'metadata': {'Metadata': '', 'creation_time': '2023-05-17T21:38:54.000000Z', 'handler_name': 'ISO Media file produced by Google Inc. Created on: 05/17/2023.', 'vendor_id'

                                                                   

MoviePy - Done.
MoviePy - Writing video ./dataset/videos/011Z5yjq6wo.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready ./dataset/videos/011Z5yjq6wo.mp4
Done: 011Z5yjq6wo
Processing 011Z5yjq6wo at row 4215...
{'video_found': True, 'audio_found': True, 'metadata': {'major_brand': 'mp42', 'minor_version': '0', 'compatible_brands': 'isommp42', 'creation_time': '2023-05-17T21:38:54.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 360], 'bitrate': 509, 'fps': 30.0, 'codec_name': 'h264', 'profile': '(Main)', 'metadata': {'Metadata': '', 'creation_time': '2023-05-17T21:38:54.000000Z', 'handler_name': 'ISO Media file produced by Google Inc. Created on: 05/17/2023.', 'vendor_id': '[0][0][0][0]'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 44100, 'bitrate': 127, 'metadata': {'Metadata': '', 'creation_time': '2023-05-17T21:38:54.000000Z', 'handler_name': 'ISO Media file produced by Google Inc. Created on: 05/17/2023

                                                                   

MoviePy - Done.
MoviePy - Writing video ./dataset/videos/011Z5yjq6wo.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready ./dataset/videos/011Z5yjq6wo.mp4
Done: 011Z5yjq6wo
Processing 014EDZD8CHQ at row 4216...
{'video_found': True, 'audio_found': True, 'metadata': {'major_brand': 'mp42', 'minor_version': '0', 'compatible_brands': 'isommp42', 'encoder': 'Google'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 360], 'bitrate': 639, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(Main)', 'metadata': {'Metadata': '', 'handler_name': 'ISO Media file produced by Google Inc.', 'vendor_id': '[0][0][0][0]'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': 'eng', 'default': True, 'fps': 44100, 'bitrate': 95, 'metadata': {'Metadata': '', 'handler_name': 'ISO Media file produced by Google Inc.', 'vendor_id': '[0][0][0][0]'}}], 'input_number': 0}], 'duration': 54.33, 'bitrate': 737, 'start': 0.0, 'default_video_input_number': 0, 'default_video_stream_number': 0

                                                                   

MoviePy - Done.
MoviePy - Writing video ./dataset/videos/014EDZD8CHQ.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready ./dataset/videos/014EDZD8CHQ.mp4
Done: 014EDZD8CHQ
Processing 014Rnl2xIZg at row 4217...


ERROR: [youtube] 014Rnl2xIZg: Video unavailable


Error processing row 4217: ERROR: [youtube] 014Rnl2xIZg: Video unavailable
Processing 014wEN3qV1E at row 4218...


ERROR: [youtube] 014wEN3qV1E: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


Error processing row 4218: ERROR: [youtube] 014wEN3qV1E: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies
Processing 0156oeNSmdY at row 4219...


ERROR: [youtube] 0156oeNSmdY: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


Error processing row 4219: ERROR: [youtube] 0156oeNSmdY: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies
Processing 015T7UQR0zQ at row 4220...


ERROR: [youtube] 015T7UQR0zQ: Video unavailable


Error processing row 4220: ERROR: [youtube] 015T7UQR0zQ: Video unavailable
Processing 016LCSDhwyQ at row 4221...


ERROR: [youtube] 016LCSDhwyQ: Video unavailable


Error processing row 4221: ERROR: [youtube] 016LCSDhwyQ: Video unavailable
Processing 01Bl9ZlR7KI at row 4222...
{'video_found': True, 'audio_found': True, 'metadata': {'major_brand': 'mp42', 'minor_version': '0', 'compatible_brands': 'isommp42', 'creation_time': '2019-09-16T08:50:45.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 360], 'bitrate': 243, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(Main)', 'metadata': {'Metadata': '', 'creation_time': '2019-09-16T08:50:45.000000Z', 'handler_name': 'ISO Media file produced by Google Inc. Created on: 09/16/2019.', 'vendor_id': '[0][0][0][0]'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 44100, 'bitrate': 128, 'metadata': {'Metadata': '', 'creation_time': '2019-09-16T08:50:45.000000Z', 'handler_name': 'ISO Media file produced by Google Inc. Created on: 09/16/2019.', 'vendor_id'

                                                                   

MoviePy - Done.
MoviePy - Writing video ./dataset/videos/01Bl9ZlR7KI.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready ./dataset/videos/01Bl9ZlR7KI.mp4
Done: 01Bl9ZlR7KI
Processing 01DJUnoCwk0 at row 4223...
{'video_found': True, 'audio_found': True, 'metadata': {'major_brand': 'mp42', 'minor_version': '0', 'compatible_brands': 'isommp42', 'creation_time': '2024-03-04T04:23:39.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 360], 'bitrate': 173, 'fps': 29.97002997002997, 'codec_name': 'h264', 'profile': '(Main)', 'metadata': {'Metadata': '', 'creation_time': '2024-03-04T04:23:39.000000Z', 'handler_name': 'ISO Media file produced by Google Inc. Created on: 03/03/2024.', 'vendor_id': '[0][0][0][0]'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 44100, 'bitrate': 128, 'metadata': {'Metadata': '', 'creation_time': '2024-03-04T04:23:39.000000Z', 'handler_name': 'ISO Media file produced by Google Inc. Created o

                                                                   

MoviePy - Done.
MoviePy - Writing video ./dataset/videos/01DJUnoCwk0.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready ./dataset/videos/01DJUnoCwk0.mp4
Done: 01DJUnoCwk0
Processing 01EtjUk6KFw at row 4224...
{'video_found': True, 'audio_found': True, 'metadata': {'major_brand': 'mp42', 'minor_version': '0', 'compatible_brands': 'isommp42', 'encoder': 'Google'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 360], 'bitrate': 323, 'fps': 30.0, 'codec_name': 'h264', 'profile': '(Main)', 'metadata': {'Metadata': '', 'handler_name': 'ISO Media file produced by Google Inc.', 'vendor_id': '[0][0][0][0]'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 44100, 'bitrate': 95, 'metadata': {'Metadata': '', 'handler_name': 'ISO Media file produced by Google Inc.', 'vendor_id': '[0][0][0][0]'}}], 'input_number': 0}], 'duration': 400.43, 'bitrate': 421, 'start': 0.0, 'default_video_input_number': 0, 'default_video_stream_number': 0

                                                                   

MoviePy - Done.
MoviePy - Writing video ./dataset/videos/01EtjUk6KFw.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready ./dataset/videos/01EtjUk6KFw.mp4
Done: 01EtjUk6KFw
Processing 01GR5c7dSNo at row 4225...
{'video_found': True, 'audio_found': True, 'metadata': {'major_brand': 'mp42', 'minor_version': '0', 'compatible_brands': 'isommp42', 'creation_time': '2024-08-22T06:27:51.000000Z', 'encoder': 'Google'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [202, 360], 'bitrate': 197, 'fps': 29.54, 'codec_name': 'h264', 'profile': '(Main)', 'metadata': {'Metadata': '', 'creation_time': '2024-08-22T06:27:51.000000Z', 'handler_name': 'ISO Media file produced by Google Inc. Created on: 08/21/2024.', 'vendor_id': '[0][0][0][0]'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': 'eng', 'default': True, 'fps': 44100, 'bitrate': 128, 'metadata': {'Metadata': '', 'creation_time': '2024-08-22T06:27:51.000000Z', 'handler_name': 'ISO Media file produced by Google Inc.

                                                                   

MoviePy - Done.
MoviePy - Writing video ./dataset/videos/01GR5c7dSNo.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready ./dataset/videos/01GR5c7dSNo.mp4
Done: 01GR5c7dSNo
Processing 01GR5c7dSNo at row 4226...
{'video_found': True, 'audio_found': True, 'metadata': {'major_brand': 'mp42', 'minor_version': '0', 'compatible_brands': 'isommp42', 'creation_time': '2024-08-22T06:27:51.000000Z', 'encoder': 'Google'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [202, 360], 'bitrate': 197, 'fps': 29.54, 'codec_name': 'h264', 'profile': '(Main)', 'metadata': {'Metadata': '', 'creation_time': '2024-08-22T06:27:51.000000Z', 'handler_name': 'ISO Media file produced by Google Inc. Created on: 08/21/2024.', 'vendor_id': '[0][0][0][0]'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': 'eng', 'default': True, 'fps': 44100, 'bitrate': 128, 'metadata': {'Metadata': '', 'creation_time': '2024-08-22T06:27:51.000000Z', 'handler_name': 'ISO Media file produced by Google Inc.

                                                                   

MoviePy - Done.
MoviePy - Writing video ./dataset/videos/01GR5c7dSNo.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready ./dataset/videos/01GR5c7dSNo.mp4
Done: 01GR5c7dSNo
Processing 01IRLV4c-4c at row 4227...


ERROR: [youtube] 01IRLV4c-4c: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


Error processing row 4227: ERROR: [youtube] 01IRLV4c-4c: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies
Processing 01Iho7loxL0 at row 4228...
{'video_found': True, 'audio_found': True, 'metadata': {'major_brand': 'mp42', 'minor_version': '0', 'compatible_brands': 'isommp42', 'encoder': 'Google'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 360], 'bitrate': 340, 'fps': 30.0, 'codec_name': 'h264', 'profile': '(Main)', 'metadata': {'Metadata': '', 'handler_name': 'ISO Media file produced by Google Inc.', 'vendor_id': '[0][0][0][0]'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', '

                                                                   

MoviePy - Done.
MoviePy - Writing video ./dataset/videos/01Iho7loxL0.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready ./dataset/videos/01Iho7loxL0.mp4
Done: 01Iho7loxL0
Processing 01Jxsfxi1r4 at row 4229...
{'video_found': True, 'audio_found': True, 'metadata': {'major_brand': 'mp42', 'minor_version': '0', 'compatible_brands': 'isommp42', 'creation_time': '2024-03-04T05:03:55.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 360], 'bitrate': 205, 'fps': 29.97002997002997, 'codec_name': 'h264', 'profile': '(Main)', 'metadata': {'Metadata': '', 'creation_time': '2024-03-04T05:03:55.000000Z', 'handler_name': 'ISO Media file produced by Google Inc. Created on: 03/03/2024.', 'vendor_id': '[0][0][0][0]'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': 'eng', 'default': True, 'fps': 44100, 'bitrate': 127, 'metadata': {'Metadata': '', 'creation_time': '2024-03-04T05:03:55.000000Z', 'handler_name': 'ISO Media file produced by Google Inc. Created 

                                                                   

MoviePy - Done.
MoviePy - Writing video ./dataset/videos/01Jxsfxi1r4.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready ./dataset/videos/01Jxsfxi1r4.mp4
Done: 01Jxsfxi1r4
Processing 01LPFe-13Aw at row 4230...


ERROR: [youtube] 01LPFe-13Aw: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


Error processing row 4230: ERROR: [youtube] 01LPFe-13Aw: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies
Processing 01MWUU_A6bk at row 4231...
[download]  45.5% of   65.69MiB at    1.73MiB/s ETA 00:20  

In [5]:
# !pip install -U yt-dlp
!apt-get install -y ffmpeg
# !pip install soundfile
# !pip install librosa
# !pip install opencv-python

/bin/bash: line 1: apt-get: command not found


In [None]:
# import subprocess
# print(subprocess.run(["which", "ffmpeg"], capture_output=True, text=True).stdout)




In [2]:
pip install yt-dlp moviepy imageio-ffmpeg librosa opencv-python matplotlib

Defaulting to user installation because normal site-packages is not writeable
Collecting moviepy
  Downloading moviepy-2.1.2-py3-none-any.whl (126 kB)
     |████████████████████████████████| 126 kB 2.4 MB/s            
[?25hCollecting imageio-ffmpeg
  Downloading imageio_ffmpeg-0.6.0-py3-none-manylinux2014_x86_64.whl (29.5 MB)
     |████████████████████████████████| 29.5 MB 21.7 MB/s            �█████████▎      | 23.3 MB 21.7 MB/s eta 0:00:01
Collecting python-dotenv>=0.10
  Downloading python_dotenv-1.1.0-py3-none-any.whl (20 kB)
Collecting proglog<=1.0.0
  Downloading proglog-0.1.11-py3-none-any.whl (7.8 kB)
Collecting imageio<3.0,>=2.5
  Downloading imageio-2.37.0-py3-none-any.whl (315 kB)
     |████████████████████████████████| 315 kB 102.3 MB/s            
Installing collected packages: python-dotenv, proglog, imageio-ffmpeg, imageio, moviepy
Successfully installed imageio-2.37.0 imageio-ffmpeg-0.6.0 moviepy-2.1.2 proglog-0.1.11 python-dotenv-1.1.0
Note: you may need to restart t

In [3]:
from moviepy.video.io.VideoFileClip import VideoFileClip

print(VideoFileClip)


<class 'moviepy.video.io.VideoFileClip.VideoFileClip'>
