<a href="https://colab.research.google.com/github/ras0k/auto-lyrics/blob/main/list2mp3-v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title List to mp3
!pip install -U yt-dlp
!apt install ffmpeg -y

import subprocess
import os
import json
import glob
import ipywidgets as widgets
from IPython.display import display
import requests
from bs4 import BeautifulSoup
import re
from urllib.parse import urlparse, parse_qs

# Clear temporary files from previous runs
def clear_folder():
    patterns = ["*.mp3", "file_list.txt", "cover*", "cropped_cover*"]
    for pattern in patterns:
        for file in glob.glob(pattern):
            try:
                os.remove(file)
            except Exception as e:
                print(f"Could not remove {file}: {e}")

clear_folder()

# Function to transform a URL like:
# https://www.youtube.com/watch?v=...&list=...
# into a proper playlist URL:
# https://www.youtube.com/playlist?list=...
def transform_playlist_url(url):
    if "list=" in url and "playlist?" not in url:
        parsed = urlparse(url)
        query_params = parse_qs(parsed.query)
        if "list" in query_params:
            list_id = query_params["list"][0]
            new_url = f"https://www.youtube.com/playlist?list={list_id}"
            return new_url
    return url

# Function to get playlist name using BeautifulSoup
def get_playlist_title_bs(url):
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print("Failed to retrieve the playlist page.")
        return "merged_audio"
    soup = BeautifulSoup(response.text, "html.parser")
    title_tag = soup.find("title")
    if not title_tag:
        return "merged_audio"
    title = title_tag.text
    title = re.sub(r"\s*-\s*YouTube\s*$", "", title)
    title = re.sub(r'[\/*?:"<>|]', "", title)
    title = title.replace("//", "-")
    return title.strip()

def get_playlist_name(url):
    title = get_playlist_title_bs(url)
    print("Playlist title (sanitized):", title)
    return title

# Extract metadata from the first video.
# If the URL is a playlist, use --playlist-items 1 to get only the first video's metadata.
def get_video_metadata(url):
    try:
        url = transform_playlist_url(url)
        if "playlist?list=" in url:
            cmd = f"yt-dlp -j --playlist-items 1 {url}"
        else:
            cmd = f"yt-dlp -j {url}"
        result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
        if result.returncode != 0:
            print("Error fetching video metadata.")
            return {}
        info = json.loads(result.stdout)
        return {
            "channel": info.get("uploader", "Unknown Artist"),
            "thumbnail": info.get("thumbnail", ""),
            "title": info.get("title", "")
        }
    except Exception as e:
        print("Exception while fetching video metadata:", e)
        return {}

# Download thumbnail image from URL
def download_thumbnail(url, filename="cover.jpg"):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            with open(filename, "wb") as f:
                f.write(response.content)
            return True
        else:
            print("Failed to download thumbnail.")
            return False
    except Exception as e:
        print("Exception while downloading thumbnail:", e)
        return False

# Crop the thumbnail to a centered square using ffmpeg.
# The crop filter calculates the smaller dimension (min(iw,ih)) as both width and height,
# and centers the crop by computing the proper x and y offsets.
def crop_thumbnail(input_file, output_file):
    crop_cmd = f'ffmpeg -y -i {input_file} -vf "crop=min(iw\\,ih):min(iw\\,ih):(iw-min(iw\\,ih))/2:(ih-min(iw\\,ih))/2" {output_file}'
    result = subprocess.run(crop_cmd, shell=True, capture_output=True, text=True)
    if result.returncode == 0:
        print("Thumbnail cropped to square successfully.")
        return True
    else:
        print("Thumbnail cropping failed:", result.stderr)
        return False

# Download audio, merge into a single MP3, and integrate metadata (with cropped cover art)
def list_to_mp3(urls):
    print("Splitting input URLs...")
    url_list = [transform_playlist_url(url.strip()) for url in urls.splitlines() if url.strip()]

    # Determine output name: if a single playlist URL is provided, use its title
    if len(url_list) == 1 and "playlist?list=" in url_list[0]:
        output_name = get_playlist_name(url_list[0])
    else:
        output_name = "merged_audio"
    print("Output file will be:", output_name + ".mp3")

    # Download audio tracks for each URL
    for idx, url in enumerate(url_list, start=1):
        print(f"Downloading audio for URL {idx}/{len(url_list)}: {url}")
        result = subprocess.run(
            f"yt-dlp --restrict-filenames -x --audio-format mp3 -o '%(playlist_index)s_%(title)s.%(ext)s' {url}",
            shell=True,
            capture_output=True,
            text=True
        )
        if result.returncode != 0:
            print(f"Error downloading {url}:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}")
            raise Exception("yt-dlp command failed")
        else:
            print(f"Successfully downloaded audio for URL {idx}/{len(url_list)}")

    # Create file list for merging with ffmpeg using absolute paths
    print("Creating file list for merging...")
    file_list_path = "file_list.txt"
    with open(file_list_path, "w") as f:
        for file in sorted(os.listdir()):
            if file.endswith(".mp3"):
                filepath = os.path.abspath(file)
                f.write(f"file '{filepath}'\n")

    # Merge audio files into a single MP3 file using ffmpeg
    ffmpeg_cmd = f"ffmpeg -f concat -safe 0 -i {file_list_path} -c copy '{output_name}.mp3'"
    print("The ffmpeg merge command to be executed is:")
    print(ffmpeg_cmd)
    print("Merging downloaded audio files into a single MP3...")
    merge_result = subprocess.run(ffmpeg_cmd, shell=True, capture_output=True, text=True)
    if merge_result.returncode != 0:
        print(f"Error merging files:\nSTDOUT:\n{merge_result.stdout}\nSTDERR:\n{merge_result.stderr}")
        raise Exception("ffmpeg merge command failed")
    print("Merge completed successfully.")

    # Integrate metadata using ffmpeg
    print("Integrating metadata into the merged MP3...")
    video_metadata = get_video_metadata(url_list[0])
    channel_name = video_metadata.get("channel", "Unknown Artist")
    thumbnail_url = video_metadata.get("thumbnail", "")
    cover_filename = "cover.jpg"
    if thumbnail_url:
        if download_thumbnail(thumbnail_url, cover_filename):
            print("Thumbnail downloaded successfully.")
            cropped_cover = "cropped_" + cover_filename
            if crop_thumbnail(cover_filename, cropped_cover):
                cover_filename = cropped_cover
            else:
                print("Proceeding with the original thumbnail.")
        else:
            print("Thumbnail download failed. Proceeding without cover art.")
            cover_filename = None
    else:
        print("No thumbnail URL found. Proceeding without cover art.")
        cover_filename = None

    if cover_filename:
        ffmpeg_meta_cmd = (
            f'ffmpeg -i "{output_name}.mp3" -i {cover_filename} -map 0 -map 1 '
            f'-c copy -id3v2_version 3 -metadata title="{output_name}" '
            f'-metadata artist="{channel_name}" -metadata:s:v title="Album cover" '
            f'-metadata:s:v comment="Cover (front)" "temp_{output_name}.mp3"'
        )
    else:
        ffmpeg_meta_cmd = (
            f'ffmpeg -i "{output_name}.mp3" -c copy -id3v2_version 3 '
            f'-metadata title="{output_name}" -metadata artist="{channel_name}" '
            f'"temp_{output_name}.mp3"'
        )
    print("The metadata embedding ffmpeg command to be executed is:")
    print(ffmpeg_meta_cmd)
    meta_result = subprocess.run(ffmpeg_meta_cmd, shell=True, capture_output=True, text=True)
    if meta_result.returncode != 0:
        print(f"Error embedding metadata:\nSTDOUT:\n{meta_result.stdout}\nSTDERR:\n{meta_result.stderr}")
        raise Exception("ffmpeg metadata embedding command failed")
    os.replace(f"temp_{output_name}.mp3", f"{output_name}.mp3")
    print("Metadata integrated successfully into the MP3 file.")

    return f"✅ Merged audio saved as: {output_name}.mp3"

# Create ipywidgets for input
text_area = widgets.Textarea(
    value='',
    placeholder='Enter YouTube URLs separated by new lines',
    description='URLs:',
    layout={'width': '100%', 'height': '200px'}
)
process_button = widgets.Button(
    description='Download & Merge',
    button_style='success'
)
output_display = widgets.Output()

def on_button_click(b):
    with output_display:
        output_display.clear_output()
        print("Starting processing. Please wait...\n")
        try:
            result = list_to_mp3(text_area.value)
            print("\n" + result)
        except Exception as e:
            print("\nAn error occurred:", e)

process_button.on_click(on_button_click)
display(text_area, process_button, output_display)
