# YouTube Video to 10-Second Audio Clips

- This notebook downloads a YouTube video, converts it to audio, and splits it into 10-second segments. 
- We used this notebook to get audio clips from YouTube and test our model with real-life examples.

## Requirements
- **FFmpeg** must be installed on your system (required by both yt-dlp and pydub)
  - Windows: Download from https://ffmpeg.org/download.html and add to PATH
  - Linux: `sudo apt install ffmpeg`
  - macOS: `brew install ffmpeg`


In [1]:
# Install required packages
%pip install yt-dlp pydub


Collecting yt-dlp
  Downloading yt_dlp-2025.12.8-py3-none-any.whl.metadata (180 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m180.3/180.3 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Downloading yt_dlp-2025.12.8-py3-none-any.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: yt-dlp
Successfully installed yt-dlp-2025.12.8


In [2]:
import os
import yt_dlp
from pydub import AudioSegment
from pathlib import Path


  m = re.match('([su]([0-9]{1,2})p?) \(([0-9]{1,2}) bit\)$', token)
  m2 = re.match('([su]([0-9]{1,2})p?)( \(default\))?$', token)
  elif re.match('(flt)p?( \(default\))?$', token):
  elif re.match('(dbl)p?( \(default\))?$', token):


In [3]:
# Configuration
YOUTUBE_URL = "https://www.youtube.com/watch?v=WSSIIC58Fsc"
SEGMENT_DURATION_MS = 10 * 1000  # 10 seconds in milliseconds
OUTPUT_DIR = "audio_segments"
AUDIO_FILE = "downloaded_audio.wav"


In [4]:
def download_youtube_audio(url: str, output_file: str) -> str:
    """
    Download audio from a YouTube video and save as WAV file.

    Args:
        url: YouTube video URL
        output_file: Output filename for the audio

    Returns:
        Path to the downloaded audio file
    """
    # Remove extension as yt-dlp adds it
    output_base = os.path.splitext(output_file)[0]

    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'wav',
            'preferredquality': '192',
        }],
        'outtmpl': output_base,
        'quiet': False,
        'no_warnings': False,
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        print(f"Downloading audio from: {url}")
        ydl.download([url])

    final_path = f"{output_base}.wav"
    print(f"Audio downloaded successfully: {final_path}")
    return final_path


In [5]:
def split_audio_into_segments(audio_path: str, segment_duration_ms: int, output_dir: str) -> list:
    """
    Split an audio file into segments of specified duration.

    Args:
        audio_path: Path to the input audio file
        segment_duration_ms: Duration of each segment in milliseconds
        output_dir: Directory to save the segments

    Returns:
        List of paths to the created segment files
    """
    # Create output directory if it doesn't exist
    Path(output_dir).mkdir(parents=True, exist_ok=True)

    # Load the audio file
    print(f"Loading audio file: {audio_path}")
    audio = AudioSegment.from_wav(audio_path)

    total_duration_ms = len(audio)
    total_duration_sec = total_duration_ms / 1000
    print(f"Total audio duration: {total_duration_sec:.2f} seconds")

    # Calculate number of segments
    num_segments = (total_duration_ms + segment_duration_ms - 1) // segment_duration_ms
    print(f"Splitting into {num_segments} segments of {segment_duration_ms / 1000} seconds each")

    segment_paths = []

    for i in range(num_segments):
        start_ms = i * segment_duration_ms
        end_ms = min((i + 1) * segment_duration_ms, total_duration_ms)

        # Extract segment
        segment = audio[start_ms:end_ms]

        # Create filename with segment number and time range
        start_sec = start_ms / 1000
        end_sec = end_ms / 1000
        segment_filename = f"segment_{i+1:04d}_{start_sec:.1f}s-{end_sec:.1f}s.wav"
        segment_path = os.path.join(output_dir, segment_filename)

        # Export segment
        segment.export(segment_path, format="wav")
        segment_paths.append(segment_path)

        print(f"Created: {segment_filename} ({end_sec - start_sec:.1f}s)")

    print(f"\nTotal segments created: {len(segment_paths)}")
    return segment_paths


In [6]:
# Main execution: Download and split audio

# Step 1: Download audio from YouTube
audio_file = download_youtube_audio(YOUTUBE_URL, AUDIO_FILE)

# Step 2: Split audio into 10-second segments
segments = split_audio_into_segments(audio_file, SEGMENT_DURATION_MS, OUTPUT_DIR)

print(f"\n Done! All {len(segments)} audio segments saved to '{OUTPUT_DIR}/' folder")


Downloading audio from: https://www.youtube.com/watch?v=WSSIIC58Fsc
[youtube] Extracting URL: https://www.youtube.com/watch?v=WSSIIC58Fsc
[youtube] WSSIIC58Fsc: Downloading webpage




[youtube] WSSIIC58Fsc: Downloading android sdkless player API JSON
[youtube] WSSIIC58Fsc: Downloading web safari player API JSON




[youtube] WSSIIC58Fsc: Downloading m3u8 information




[info] WSSIIC58Fsc: Downloading 1 format(s): 251
[download] Destination: downloaded_audio
[download] 100% of    3.99MiB in 00:00:00 at 25.97MiB/s  
[ExtractAudio] Destination: downloaded_audio.wav
Deleting original file downloaded_audio (pass -k to keep)
Audio downloaded successfully: downloaded_audio.wav
Loading audio file: downloaded_audio.wav
Total audio duration: 231.11 seconds
Splitting into 24 segments of 10.0 seconds each
Created: segment_0001_0.0s-10.0s.wav (10.0s)
Created: segment_0002_10.0s-20.0s.wav (10.0s)
Created: segment_0003_20.0s-30.0s.wav (10.0s)
Created: segment_0004_30.0s-40.0s.wav (10.0s)
Created: segment_0005_40.0s-50.0s.wav (10.0s)
Created: segment_0006_50.0s-60.0s.wav (10.0s)
Created: segment_0007_60.0s-70.0s.wav (10.0s)
Created: segment_0008_70.0s-80.0s.wav (10.0s)
Created: segment_0009_80.0s-90.0s.wav (10.0s)
Created: segment_0010_90.0s-100.0s.wav (10.0s)
Created: segment_0011_100.0s-110.0s.wav (10.0s)
Created: segment_0012_110.0s-120.0s.wav (10.0s)
Created: se

In [7]:
# Optional: List all created segments
print("Created audio segments:")
for i, segment_path in enumerate(segments, 1):
    file_size = os.path.getsize(segment_path) / 1024  # Size in KB
    print(f"  {i}. {os.path.basename(segment_path)} ({file_size:.1f} KB)")


Created audio segments:
  1. segment_0001_0.0s-10.0s.wav (1875.0 KB)
  2. segment_0002_10.0s-20.0s.wav (1875.0 KB)
  3. segment_0003_20.0s-30.0s.wav (1875.0 KB)
  4. segment_0004_30.0s-40.0s.wav (1875.0 KB)
  5. segment_0005_40.0s-50.0s.wav (1875.0 KB)
  6. segment_0006_50.0s-60.0s.wav (1875.0 KB)
  7. segment_0007_60.0s-70.0s.wav (1875.0 KB)
  8. segment_0008_70.0s-80.0s.wav (1875.0 KB)
  9. segment_0009_80.0s-90.0s.wav (1875.0 KB)
  10. segment_0010_90.0s-100.0s.wav (1875.0 KB)
  11. segment_0011_100.0s-110.0s.wav (1875.0 KB)
  12. segment_0012_110.0s-120.0s.wav (1875.0 KB)
  13. segment_0013_120.0s-130.0s.wav (1875.0 KB)
  14. segment_0014_130.0s-140.0s.wav (1875.0 KB)
  15. segment_0015_140.0s-150.0s.wav (1875.0 KB)
  16. segment_0016_150.0s-160.0s.wav (1875.0 KB)
  17. segment_0017_160.0s-170.0s.wav (1875.0 KB)
  18. segment_0018_170.0s-180.0s.wav (1875.0 KB)
  19. segment_0019_180.0s-190.0s.wav (1875.0 KB)
  20. segment_0020_190.0s-200.0s.wav (1875.0 KB)
  21. segment_0021_200.0s