In [1]:
!pip install moviepy pytube
import concurrent.futures
import datetime
import os

import pandas as pd
import torch
from moviepy.editor import VideoFileClip
from pytube import YouTube

Defaulting to user installation because normal site-packages is not writeable


In [2]:
TIME_DURATION_IN_SEC = 10
CSV_FILE = './vggsound.csv'

In [3]:
# Rename the columns
new_column_names = {
    '---g-f_I2yQ': 'youtube_video_id',
    '1': 'start_seconds',
    'people marching': 'label',
    'test': 'split',
}

In [4]:
# Rename the columns
df = pd.read_csv(CSV_FILE)
df.rename(columns=new_column_names, inplace=True)

In [5]:
df.head()

Unnamed: 0,youtube_video_id,start_seconds,label,split
0,--0PQM4-hqg,30,waterfall burbling,train
1,--56QUhyDQM,185,playing tennis,train
2,--5OkAjCI7g,40,people belly laughing,train
3,--8puiAGLhs,30,car engine starting,train
4,--96EN9NUQM,242,alarm clock ringing,train


In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(device)

cuda


In [7]:
os.environ["TOKENIZERS_PARALLELISM"] = "false" if device == "cpu" else "true"

In [8]:
def extract_audio_and_video_within_interval(youtube_video_id, input_video_path, start_time, end_time):
    # Load the video clip
    video_clip = VideoFileClip(input_video_path)

    # Define the subclip with the specified start and end times
    subclip = video_clip.subclip(start_time, end_time)

    # Extract audio from the subclip
    audio_clip = subclip.audio

    # Resample the audio to 16 kHz
    resampled_audio_clip = audio_clip.set_fps(16000)

    # Write the resampled audio to a WAV file
    audio_file_path = f"./data/audio/audio_{youtube_video_id}.wav"
    resampled_audio_clip.write_audiofile(audio_file_path, codec='pcm_s16le')

    # Write the video to an MP4 file
    video_file_path = f"./data/video/video_{youtube_video_id}.mp4"
    subclip.write_videofile(video_file_path, codec='libx264')

    # Close the clips
    video_clip.close()
    audio_clip.close()

In [9]:
def download_files(youtube_video_id, start_seconds):
    print(f"Starting to download file: {youtube_video_id}")

    # Construct the YouTube video URL
    video_url = f"https://www.youtube.com/watch?v={youtube_video_id}"

    # Initialize a YouTube object with the URL
    yt = YouTube(video_url)

    downloaded_file = f"{youtube_video_id}.mp4"

    try:
        # Get a stream that includes both video and audio
        stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()

        # Download the video
        stream.download(output_path=".", filename=downloaded_file)

        print(f"Downloaded video: {youtube_video_id}")

        # Specify the start and end times for clipping (in seconds)
        input_file_path = downloaded_file
        extract_audio_and_video_within_interval(youtube_video_id, input_file_path, start_seconds,
                                                start_seconds + TIME_DURATION_IN_SEC)

        print(f"Saved audio and video for: {youtube_video_id}")

    except Exception as e:
        print(f"Error downloading video with YouTube ID {youtube_video_id}: {e}")

    if os.path.exists(f"./{downloaded_file}"):
        os.remove(f"./{downloaded_file}")

In [10]:
def extract_audio_video(limit=2):
    start_time = datetime.datetime.now()
    # Create a ThreadPoolExecutor with 3 worker threads
    with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
        # Submit each video downloading task for execution asynchronously
        futures = [executor.submit(download_files, row['youtube_video_id'], row['start_seconds']) for index, row in
                   df.iterrows() if index <= limit]

        # Wait for all tasks to complete
        concurrent.futures.wait(futures)

    end_time = datetime.datetime.now()

    total_time = (end_time - start_time) / 60

    print(f"Time taken for download: {total_time} minutes")

In [11]:
count = 5

In [None]:
extract_audio_video(count)

Starting to download file: --0PQM4-hqg
Starting to download file: --56QUhyDQM
Starting to download file: --5OkAjCI7g
Error downloading video with YouTube ID --5OkAjCI7g: --5OkAjCI7g is a private video
Starting to download file: --8puiAGLhs
Downloaded video: --0PQM4-hqg
Downloaded video: --56QUhyDQM
Downloaded video: --8puiAGLhs
MoviePy - Writing audio in ./data/audio/audio_--0PQM4-hqg.wav


chunk:   1%|          | 1/81 [00:00<00:00, 263.81it/s, now=None]

MoviePy - Writing audio in ./data/audio/audio_--56QUhyDQM.wav



                                                                
chunk:   1%|          | 1/81 [00:00<00:16,  4.78it/s, now=None]
chunk:   1%|          | 1/81 [00:00<00:15,  5.29it/s, now=None][A

MoviePy - Writing audio in ./data/audio/audio_--8puiAGLhs.wav




chunk:   2%|▏         | 2/81 [00:00<00:12,  6.28it/s, now=None]
chunk:   2%|▏         | 2/81 [00:00<00:12,  6.12it/s, now=None][A

chunk:  33%|███▎      | 27/81 [00:00<00:00, 54.23it/s, now=None][A[A
                                                                [A

[A[A                                                         
[A                                                             

chunk:  32%|███▏      | 26/81 [00:00<00:07,  7.17it/s, now=None][A[A
chunk:  52%|█████▏    | 42/81 [00:00<00:00, 53.45it/s, now=None][A

[A[A                                                          
[A                                                             

chunk:  32%|███▏      | 26/81 [00:00<00:07,  7.17it/s, now=None][A[A
chunk:  56%|█████▌    | 45/81 [00:00<00:00, 53.45it/s, now=None][A

[A[A                                                          
[A                                                             

chunk:  32%|███▏      | 26/81 [00:00<00:07,  7.17it/s, 

MoviePy - Done.
Moviepy - Building video ./data/video/video_--0PQM4-hqg.mp4.
MoviePy - Writing audio in video_--0PQM4-hqgTEMP_MPY_wvf_snd.mp3


chunk:   0%|          | 0/221 [00:00<?, ?it/s, now=None]
                                                                [A

chunk:   0%|          | 1/221 [00:00<00:06, 34.70it/s, now=None]

                                                                [A[A

chunk:   0%|          | 1/221 [00:00<00:06, 31.45it/s, now=None]

                                                                [A[A

chunk:   0%|          | 1/221 [00:00<00:08, 27.33it/s, now=None]

chunk:  32%|███▏      | 26/81 [00:00<00:07,  7.17it/s, now=None][A[A

MoviePy - Done.
Moviepy - Building video ./data/video/video_--56QUhyDQM.mp4.
MoviePy - Writing audio in video_--56QUhyDQMTEMP_MPY_wvf_snd.mp3



chunk:   0%|          | 0/221 [00:00<?, ?it/s, now=None][A

chunk:  33%|███▎      | 27/81 [00:00<00:01, 47.71it/s, now=None][A[A

                                                                [A[A
chunk:   0%|          | 1/221 [00:00<00:51,  4.24it/s, now=None]
                                                                [A
chunk:   0%|          | 1/221 [00:00<00:53,  4.12it/s, now=None]
                                                                [A
chunk:   0%|          | 1/221 [00:00<00:54,  4.04it/s, now=None]
chunk:   0%|          | 1/221 [00:00<00:41,  5.28it/s, now=None][A

MoviePy - Done.
Moviepy - Building video ./data/video/video_--8puiAGLhs.mp4.
MoviePy - Writing audio in video_--8puiAGLhsTEMP_MPY_wvf_snd.mp3




chunk:   1%|          | 2/221 [00:00<00:38,  5.75it/s, now=None]
chunk:   1%|          | 2/221 [00:00<00:35,  6.21it/s, now=None][A

chunk:  14%|█▍        | 32/221 [00:00<00:03, 61.95it/s, now=None][A[A
chunk:  41%|████      | 90/221 [00:00<00:00, 174.72it/s, now=None][A
chunk:  66%|██████▌   | 146/221 [00:00<00:00, 263.70it/s, now=None][A
                                                                   [A
chunk:  74%|███████▍  | 164/221 [00:00<00:00, 271.53it/s, now=None][A

[A[A                                                          
[A                                                                

chunk:  14%|█▍        | 31/221 [00:00<00:25,  7.35it/s, now=None][A[A
chunk:  77%|███████▋  | 170/221 [00:00<00:00, 271.53it/s, now=None][A

[A[A                                                           
[A                                                                

chunk:  14%|█▍        | 31/221 [00:00<00:25,  7.35it/s, now=None][A[A
chunk:  77%|███████▋  | 17

MoviePy - Done.
Moviepy - Writing video ./data/video/video_--0PQM4-hqg.mp4



t:   0%|          | 0/300 [00:00<?, ?it/s, now=None]
chunk:  95%|█████████▌| 210/221 [00:01<00:00, 314.97it/s, now=None][A
                                                                   [A

t:   4%|▎         | 11/300 [00:00<00:03, 95.37it/s, now=None]    

                                                             one][A[A

t:   4%|▎         | 11/300 [00:00<00:03, 89.96it/s, now=None]    

chunk:  14%|█▍        | 31/221 [00:00<00:25,  7.35it/s, now=None][A[A

MoviePy - Done.
Moviepy - Writing video ./data/video/video_--56QUhyDQM.mp4




t:   0%|          | 0/300 [00:00<?, ?it/s, now=None][A

chunk:  14%|█▍        | 32/221 [00:00<00:05, 34.75it/s, now=None][A[A

chunk:  40%|███▉      | 88/221 [00:01<00:01, 105.17it/s, now=None][A[A

chunk:  70%|██████▉   | 154/221 [00:01<00:00, 193.79it/s, now=None][A[A

chunk:  96%|█████████▌| 212/221 [00:01<00:00, 265.30it/s, now=None][A[A

                                                                   [A[A
t:   4%|▍         | 12/300 [00:00<00:14, 20.24it/s, now=None]
                                                             [A
t:   4%|▍         | 12/300 [00:00<00:14, 20.24it/s, now=None]
t:   4%|▍         | 12/300 [00:00<00:14, 20.24it/s, now=None][A

MoviePy - Done.
Moviepy - Writing video ./data/video/video_--8puiAGLhs.mp4





t:  12%|█▏        | 35/300 [00:00<00:04, 60.95it/s, now=None]
t:   4%|▍         | 12/300 [00:00<00:15, 18.52it/s, now=None][A
t:  18%|█▊        | 53/300 [00:01<00:04, 55.54it/s, now=None][A
t:  18%|█▊        | 53/300 [00:01<00:04, 59.35it/s, now=None][A

t:  21%|██▏       | 64/300 [00:01<00:05, 41.52it/s, now=None][A[A

t:  11%|█         | 33/300 [00:00<00:05, 44.92it/s, now=None][A[A
t:  21%|██▏       | 64/300 [00:01<00:05, 40.31it/s, now=None][A

t:  24%|██▍       | 72/300 [00:01<00:07, 32.32it/s, now=None][A[A
t:  26%|██▌       | 78/300 [00:02<00:07, 29.71it/s, now=None][A