In [2]:
import re
import os
import subprocess
from pathlib import Path
from yt_dlp import YoutubeDL
from moviepy.editor import AudioFileClip

import ssl
import urllib.request

ssl._create_default_https_context = ssl._create_unverified_context

from tkinter import filedialog, Tk

# Prompt user to choose directory
Tk().withdraw()  # Hide main tkinter window
output_base = filedialog.askdirectory(title="Select folder to save MP3 splits")
if not output_base:
    print("❌ No folder selected. Exiting.")
    exit()


def sanitize_filename(name):
    return re.sub(r'[\\/:"*?<>|]+', '', name).strip()

def timestamp_to_seconds(ts):
    parts = list(map(int, ts.split(":")))
    if len(parts) == 2:
        return parts[0] * 60 + parts[1]
    elif len(parts) == 3:
        return parts[0] * 3600 + parts[1] * 60 + parts[2]
    return 0

def parse_description(description):
    pattern = r'(?P<num>\d{1,2}[.)]?)?\s*(?P<title>.+?)\s+(?P<time>\d{1,2}:\d{2}(?::\d{2})?)$'
    results = []
    for line in description.splitlines():
        match = re.match(pattern, line.strip())
        if match:
            title = match.group('title').strip()
            time_str = match.group('time')
            seconds = timestamp_to_seconds(time_str)
            results.append((seconds, title))
    return results

# === Main ===
url = input("🎵 Enter the YouTube URL: ").strip()

# Step 1: Download audio and get metadata
print("⬇️ Downloading audio...")
ydl_opts = {
    'format': 'bestaudio/best',
    'outtmpl': 'temp_audio.%(ext)s',
    'quiet': True,
    'nocheckcertificate': True,
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'mp3',
        'preferredquality': '192',  # 👈 better than 128
    }],
    'writesubtitles': False,
    'writeautomaticsub': False,
    'writeinfojson': True,
}


with YoutubeDL(ydl_opts) as ydl:
    info = ydl.extract_info(url, download=True)

description = info.get("description", "")
title = sanitize_filename(info.get("title", "YouTube_Audio"))
file_path = "temp_audio.mp3"

# Step 2: Parse timestamps from description
print("🕒 Parsing timestamps...")
timestamps = parse_description(description)
if len(timestamps) < 2:
    print("❌ Not enough timestamps found.")
    exit()

# Step 3: Split the MP3
output_folder = Path(output_base) / f"{title}_splits"
output_folder.mkdir(parents=True, exist_ok=True)

print("✂️ Splitting audio...")
audio = AudioFileClip(file_path)
timestamps.append((audio.duration, "End"))  # Add last boundary

for i in range(len(timestamps) - 1):
    start, label = timestamps[i]
    end, _ = timestamps[i + 1]
    safe_title = sanitize_filename(f"{i+1:02d}. {label}.mp3")
    out_path = output_folder / safe_title
    audio.subclip(start, end).write_audiofile(str(out_path), codec='libmp3lame', verbose=False, logger=None)

print(f"✅ Done! MP3s saved to: {output_folder}")
audio.close()
os.remove(file_path)


⬇️ Downloading audio...
🕒 Parsing timestamps...                                   
✂️ Splitting audio...
✅ Done! MP3s saved to: D:\shared\Purification - Dwell In The House Of The Lord Forever (Full Album 2020)_splits
