In [None]:
# --- SYSTEM & PYTHON PACKAGE INSTALLS ---
!apt-get update
!apt-get install -y ffmpeg

# Install Python packages
!pip install openai-whisper pydub yt-dlp demucs ffmpeg-python


0% [Working]            Hit:1 https://cli.github.com/packages stable InRelease
0% [Connecting to archive.ubuntu.com (185.125.190.81)] [Connecting to security.                                                                               Hit:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:4 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:8 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:9 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:11 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists... Done
W: Skipping acq

In [None]:
import os
from pydub import AudioSegment, silence
import whisper
import subprocess
from pytube import YouTube
from google.colab import files
from urllib.parse import urlparse, parse_qs
import yt_dlp
import ffmpeg


In [None]:
# --- CONFIG ---
OUTPUT_DIR = "output"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# --- STEP 1: Download YouTube Audio (robust for short links) ---
def download_youtube_audio(url, filename="input.mp4"):
    output_path = os.path.join(OUTPUT_DIR, filename)
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': output_path,
        'quiet': False,
        'noplaylist': True
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])
    print(f"Downloaded YouTube audio to {output_path}")
    return output_path

# --- STEP 2: Convert to WAV (16kHz mono) ---
def convert_to_wav(input_file, output_file="processed.wav"):
    out_path = os.path.join(OUTPUT_DIR, output_file)
    import ffmpeg
    (
        ffmpeg
        .input(input_file)
        .output(out_path, ar=16000, ac=1)
        .overwrite_output()
        .run(quiet=True)
    )
    print(f"Converted {input_file} to WAV at {out_path}")
    return out_path

# --- STEP 3: Extract Vocals with Demucs ---
def extract_vocals(input_file):
    result_dir = os.path.join(OUTPUT_DIR, "demucs_output")
    os.makedirs(result_dir, exist_ok=True)

    command = [
        "/usr/local/bin/demucs",
        "--two-stems=vocals",
        "-o", result_dir,
        input_file
    ]
    print("Running Demucs to separate vocals...")
    subprocess.run(command, check=True)
    print("Demucs finished!")

    model_folders = os.listdir(result_dir)
    if not model_folders:
        raise FileNotFoundError("Demucs did not create any output folder.")
    model_folder = os.path.join(result_dir, model_folders[0])

    song_name = os.path.splitext(os.path.basename(input_file))[0]
    song_folder = os.path.join(model_folder, song_name)

    vocals_path = os.path.join(song_folder, "vocals.wav")
    if not os.path.exists(vocals_path):
        raise FileNotFoundError(f"Vocals not found at {vocals_path}")
    print(f"Vocals located at: {vocals_path}")
    return vocals_path

# --- STEP 4: Silence-Aware Splitting ---
def split_on_silence(input_file, min_silence_len=700, silence_thresh_offset=-40):
    audio = AudioSegment.from_wav(input_file)
    chunks = silence.split_on_silence(
        audio,
        min_silence_len=min_silence_len,
        silence_thresh=audio.dBFS + silence_thresh_offset,
        keep_silence=250  # small buffer
    )
    paths = []
    for i, chunk in enumerate(chunks):
        chunk_path = os.path.join(OUTPUT_DIR, f"chunk_{i}.wav")
        chunk.export(chunk_path, format="wav")
        paths.append(chunk_path)
    print(f"Split into {len(chunks)} chunks")
    return paths

# --- STEP 5: Transcribe with Whisper ---
def transcribe_audio(input_file, model_size="small"):
    model = whisper.load_model(model_size)
    result = model.transcribe(input_file)
    return result["text"]

# --- MAIN PIPELINE ---
def main(youtube_url=None, uploaded_file=None, model_size="small"):
    if youtube_url:
        input_file = download_youtube_audio(youtube_url)
    elif uploaded_file:
        input_file = uploaded_file
    else:
        raise ValueError("Provide either youtube_url or uploaded_file")

    wav_file = convert_to_wav(input_file)
    vocals = extract_vocals(wav_file)
    chunks = split_on_silence(vocals)

    lyrics = []
    for i, chunk in enumerate(chunks):
        print(f"Transcribing chunk {i+1}/{len(chunks)}...")
        lyrics.append(transcribe_audio(chunk, model_size=model_size))

    final_text = "\n".join(lyrics)
    with open(os.path.join(OUTPUT_DIR, "lyrics.txt"), "w", encoding="utf-8") as f:
        f.write(final_text)

    print("Transcription complete! Lyrics saved to lyrics.txt")
    return final_text


In [None]:
# --- Interactive selection ---
mode = input("Type 'youtube' for a YouTube link or 'upload' to upload a file: ").strip().lower()

if mode == "youtube":
    youtube_url = input("Paste the YouTube link here and press Enter: ").strip()
    lyrics = main(youtube_url=youtube_url, model_size="small")
    print("\n--- TRANSCRIBED LYRICS ---\n")
    print(lyrics)

elif mode == "upload":
    uploaded = files.upload()  # opens file picker
    for filename in uploaded.keys():
        print(f"Uploaded file: {filename}")
        file_path = os.path.join("/content", filename)
        lyrics = main(uploaded_file=file_path, model_size="small")
        print("\n--- TRANSCRIBED LYRICS ---\n")
        print(lyrics)

else:
    print("Invalid input. Type 'youtube' or 'upload'.")


[youtube] Extracting URL: https://youtu.be/-J0SpPMLB9w?si=KzWkRcSM6pKGdDnb
[youtube] -J0SpPMLB9w: Downloading webpage
[youtube] -J0SpPMLB9w: Downloading tv simply player API JSON
[youtube] -J0SpPMLB9w: Downloading tv client config
[youtube] -J0SpPMLB9w: Downloading tv player API JSON
[info] -J0SpPMLB9w: Downloading 1 format(s): 251
[download] output/input.mp4 has already been downloaded
[download] 100% of    3.12MiB
Downloaded YouTube audio to output/input.mp4
Converted output/input.mp4 to WAV at output/processed.wav
Running Demucs to separate vocals...
Demucs finished!
Vocals located at: output/demucs_output/htdemucs/processed/vocals.wav
Split into 2 chunks
Transcribing chunk 1/2...


100%|███████████████████████████████████████| 461M/461M [00:10<00:00, 46.2MiB/s]


Transcribing chunk 2/2...




Transcription complete! Lyrics saved to lyrics.txt

--- TRANSCRIBED LYRICS ---

 Maybe I regret it when my heart goes into phases like I'm not in control I know how to hold, solely losing focus can't get myself to call I don't know where to go when I take things slow They just go so fast, I don't know
 When to hold on to feelings that I don't know what lasts I don't know what you did but you make me feel so tight But wait just let me know you're interested or not Are you alone? Should we just move on? Why would you have to show up? It's not like you can know nothing, things that you did Why would you have to show up? It's not like you can know nothing, things that you did One thing you know, two we don't Feelings there, you shut the door, lady I don't know why you make my mind spin like I overdosed Like an addict to the court When I'm losing sleep, I'm rolling in the deep I don't know what you did but you make me feel so tight But wait just let me know you're interested or not Are you 