## YT Video to TXT file conversion

In [None]:
#  Install Required Dependencies

# yt-dlp is a video downloader and subtitle extractor for YouTube and other sites.
# Pyphen is a library for syllable segmentation, used to split text into syllables (needed for txt karaoke files).

!pip install yt_dlp pyphen numpy 

# Import them

import os      # helps interact with the operating system, like file manipulation (e.g., creating directories, checking file paths).
import re      #  used for matching patterns in text (e.g., extracting timestamps from subtitle files).
import numpy as np  # used to generate random values (e.g., for "tone" in the UltraStar file).
import yt_dlp  
import pyphen  


**"
Before running this script, please check if the video has subtitles.
Run the following command in your conda environment: yt-dlp --list-subs "https://www.youtube.com/example"
This ensures that the video you intend to process actually has subtitles."**

In [None]:


def download_youtube_subtitles(youtube_url, lang='en', output_directory='./', ffmpeg_location=None):
    """
    Downloads subtitles from a YouTube video and saves them as a text file.
    
    Parameters:
    - youtube_url (str): URL of the YouTube video.
    - lang (str): Language code for subtitles (default is 'en').
    - output_directory (str): Directory where subtitles will be saved.
    - ffmpeg_location (str): Location of the FFmpeg executable for merging streams (optional).
    
    Returns:
    - subtitle_file (str): Path to the downloaded subtitle file.
    """
    ydl_opts = {
        'writeautomaticsub': True,   # Download automatic subtitles.
        'writesubtitles': True,      # Download manual subtitles if available.
        'subtitleslangs': [lang],    # Preferred subtitle language.
        'outtmpl': 'song_temp.%(ext)s',  # Temporary subtitle file name.
        'quiet': False,               # Do not print download progress.
        'ffmpeg_location': ffmpeg_location,  # FFmpeg location if needed.
    }

    # Ensure the output directory exists.
    os.makedirs(output_directory, exist_ok=True)

    # Download subtitles using yt-dlp.
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        try:
            ydl.download([youtube_url])
        except Exception as e:
            raise Exception(f"Error downloading subtitles: {str(e)}")
    
    # Look for the subtitle file in the output directory.
    subtitle_file = f"song_temp.{lang}.vtt"
    subtitle_file_path = os.path.join(output_directory, subtitle_file)
    
    if os.path.exists(subtitle_file_path):
        return subtitle_file_path
    else:
        raise FileNotFoundError(f"Subtitles not found for {youtube_url}.")

def parse_vtt(file_path):
    """
    Parses the VTT subtitle file and extracts timestamps and lyrics.
    
    Parameters:
    - file_path (str): Path to the subtitle file.
    
    Returns:
    - lyrics (list): List of tuples containing timestamps and corresponding lyrics.
    """
    with open(file_path, "r", encoding="utf-8") as f:
        lines = f.readlines()

    lyrics = []
    timestamp = None
    for line in lines:
        if "-->" in line:
            timestamp = line.strip()
        elif line.strip():
            if timestamp:  # Ensure that there is a timestamp before appending
                lyrics.append((timestamp, line.strip()))
                timestamp = None  # Reset timestamp after appending
    
    return lyrics

def time_to_beats(time_str, bpm):
    """
    Converts a timestamp (HH:MM:SS.sss) to beat position based on BPM.
    
    Parameters:
    - time_str (str): Timestamp in HH:MM:SS.mmm format.
    - bpm (int): Beats per minute.
    
    Returns:
    - beats (int): Corresponding beat position.
    """
    h, m, s = map(float, time_str.replace(',', '.').split(":"))
    total_seconds = h * 3600 + m * 60 + s
    beats = total_seconds * (bpm / 60)
    return int(beats)

def syllabify(text):
    """
    Splits words into syllables using Pyphen.
    
    Parameters:
    - text (str): Input text to split into syllables.
    
    Returns:
    - list: List of syllables.
    """
    dic = pyphen.Pyphen(lang='en')
    return dic.inserted(text).split("-")

def extract_ultrastar_format(title, artist, youtube_url, lyrics, bpm=120):
    """
    Converts parsed lyrics into UltraStar format with configurable metadata.
    
    Parameters:
    - title (str): Song title.
    - artist (str): Artist name.
    - youtube_url (str): YouTube video URL.
    - lyrics (list): List of tuples containing timestamps and lyrics.
    - bpm (int): Beats per minute (default is 120).
    
    Returns:
    - str: UltraStar formatted lyrics.
    """
    output = [
        f"#ARTIST:{artist}",
        f"#TITLE:{title}",
        f"#BPM:{bpm}",
        f"#VIDEO:{youtube_url}",
        "#GAP:0",
    ]
    
    prev_beat = 0
    
    for timestamp, text in lyrics:
        start_time, end_time = timestamp.split(" --> ")
        syllables = syllabify(text)
        start_beat = time_to_beats(start_time, bpm)
        end_beat = time_to_beats(end_time, bpm)
        duration = max(end_beat - start_beat, 1)
        
        for i, syllable in enumerate(syllables):
            tone = np.random.randint(5, 20)
            output.append(f": {start_beat + i} {duration} {tone} {syllable}")
        
        if (start_beat - prev_beat) > 30:
            output.append("- 0")
            prev_beat = start_beat
    
    output.append("E")
    return "\n".join(output)

# Main function to get user input, download subtitles, and save UltraStar file
def main():
    # User-configurable parameters
    youtube_url = input("Enter the YouTube video URL: ").strip()
    title = input("Enter the song title: ").strip()
    artist = input("Enter the artist name: ").strip()
    output_directory = input("Enter the output directory (default is './'): ").strip() or './'
    ffmpeg_location = input("Enter the full path to ffmpeg (leave blank if not required): ").strip() or None

    # Generate filename based on artist and title
    safe_title = title.replace(" ", "_").lower()
    safe_artist = artist.replace(" ", "_").lower()
    output_filename = f"{safe_artist}_{safe_title}_ultrastar.txt"
    output_filepath = os.path.join(output_directory, output_filename)

    try:
        # Download subtitles
        subtitle_file = download_youtube_subtitles(youtube_url, lang='en', output_directory=output_directory, ffmpeg_location=ffmpeg_location)
        lyrics = parse_vtt(subtitle_file)
        
        if not lyrics:
            raise Exception("No lyrics found in the subtitle file.")

        # Generate UltraStar formatted lyrics
        ultrastar_txt = extract_ultrastar_format(title, artist, youtube_url, lyrics)

        # Save UltraStar formatted lyrics to file
        with open(output_filepath, "w", encoding="utf-8") as f:
            f.write(ultrastar_txt)

        print(f"UltraStar file saved to: {output_filepath}")
    
    except Exception as e:
        print(f"An error occurred: {e}")

# Run the program
if __name__ == "__main__":
    main()


In [None]:
 ## Example Params
    
    # Provide the YouTube video URL.
    #video_url = "https://www.youtube.com/watch?v=F3aXpa1rQEY"
    
    # Provide the song title and artist name.
    #video_title = "the_man"
    #artist_name = "taylor_swift"
    
    # Set your desired output directory.
    # output_dir = r"C:\Users\usuario\Desktop\MÉS\Karaoke"
    
    # Provide the full path to your FFmpeg executable (required for merging streams).
    # ffmpeg_path = r'C:\Users\usuario\Anaconda3\envs\songtotxt\Library\bin\ffmpeg.exe'
    