In [5]:
# ===============================================
# 🎵 AI Lyrics Extractor — Neatly Formatted Lines
# Works 100% in Google Colab — No API required
# ===============================================

!pip install -q git+https://github.com/openai/whisper.git
!pip install -q ffmpeg-python pydub

import whisper
from pydub import AudioSegment
from google.colab import files
import re

# Load Whisper model (try "medium" for better quality)
model = whisper.load_model("small")

def smart_format_lyrics(result, max_words=12, gap_threshold=5.0):
    """
    Formats lyrics beautifully using timestamps, punctuation, and word count.
    """
    segments = result.get("segments", [])
    lines, buffer = [], ""
    prev_end = 0.0

    for seg in segments:
        text = seg["text"].strip()
        start, end = seg["start"], seg["end"]

        # Add space before appending new text
        if buffer and not buffer.endswith(" "):
            buffer += " "
        buffer += text

        # Check conditions to create new line
        if (
            len(buffer.split()) >= max_words
            or (start - prev_end) > gap_threshold
            or re.search(r"[.!?,;]", text)
        ):
            clean_line = re.sub(r"\s+", " ", buffer).strip().capitalize()
            if clean_line:
                lines.append(clean_line)
            buffer = ""

        prev_end = end

    # Add any remaining text
    if buffer.strip():
        lines.append(buffer.strip().capitalize())

    # Format numbered lyrics
    formatted = "\n".join([f"{i+1:02d}. {line}" for i, line in enumerate(lines)])
    return formatted


def extract_lyrics(audio_path):
    """Extract and neatly format lyrics from an audio file"""
    try:
        # Convert audio if needed
        audio = AudioSegment.from_file(audio_path)
        temp_path = "temp.wav"
        audio.export(temp_path, format="wav")

        print("🎧 Transcribing (please wait)...")
        result = model.transcribe(temp_path, fp16=False)

        formatted_lyrics = smart_format_lyrics(result)
        print("\n🎶 Cleanly Formatted Lyrics:\n")
        print(formatted_lyrics)

    except Exception as e:
        print("⚠️ Error:", e)


# Step 1: Upload your song
print("📤 Upload your song (MP3/WAV/M4A):")
uploaded = files.upload()

# Step 2: Process file
for filename in uploaded.keys():
    print(f"\n🎵 Now processing: {filename}")
    extract_lyrics(filename)


  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
📤 Upload your song (MP3/WAV/M4A):


Saving Shape of you.mp3 to Shape of you.mp3

🎵 Now processing: Shape of you.mp3
🎧 Transcribing (please wait)...

🎶 Cleanly Formatted Lyrics:

01. The club is in the best place to find a lover so the bar is where i go
02. Me and my friends at the table doing shots drinking fast and then we talk slow
03. You can go and start up a conversation with just me and trust me i'll give it a chance
04. I'll take my hand, stop with the man in the jukebox and then we start to dance
05. And i'm a single like girl you know i want your love
06. Your love was handmade for somebody like me come and now follow my lead i may be crazy don't mind me
07. Say boy let's not talk too much grab on my waist and put that body on me
08. Come and now follow my lead come, come and now follow my lead
09. I'm in love with the shape of you you're pushing blue like a magnet dude
10. Although my heart is falling too i'm in love with your body
11. Last night you were in my room and i'm a bad tree smell like you
12. Every d