In [18]:
import json
import datetime
from googletrans import Translator

translator = Translator()

def ms_to_timestamp(ms):
    seconds, milliseconds = divmod(ms, 1000)
    minutes, seconds = divmod(seconds, 60)
    return f"{minutes:02}:{seconds:02}.{milliseconds:02}"

def convert_transcript_to_lrc(transcript_file, output_file):
    # Try reading the file with utf-8 encoding and handle potential errors
    try:
        with open(transcript_file, 'r', encoding='utf-8') as file:
            data = json.load(file)
    except UnicodeDecodeError:
        # If utf-8 fails, try reading the file with a different encoding
        with open(transcript_file, 'r', encoding='latin-1') as file:
            data = json.load(file)

    sections = data.get("section", [])
    lrc_lines = []

    for section in sections:
        start_ms = section.get("startMs")
        title = section.get("title", {}).get("title", "")
        sentence = section.get("text", {}).get("sentence", {}).get("text", "")
        
        if start_ms is not None:
            timestamp = ms_to_timestamp(start_ms)
            if title:
                lrc_lines.append(f"[{timestamp}] {title}")
                tr = translator.translate(title, src="de", dest="zh-cn")
                lrc_lines.append(f"[{timestamp}][tr] {tr.text}")
            if sentence:
                lrc_lines.append(f"[{timestamp}] {sentence}")
                tr = translator.translate(sentence, src="de", dest="zh-cn")
                lrc_lines.append(f"[{timestamp}][tr] {tr.text}")

    with open(output_file, 'w', encoding='utf-8') as file:
        file.write("\n".join(lrc_lines))

# Usage
transcript_file = '4rjNo0BaMnkVykloVAx8Zs.json'
output_file = '4rjNo0BaMnkVykloVAx8Zs.lrc'
convert_transcript_to_lrc(transcript_file, output_file)
print(f"Lyrics file saved as {output_file}")

Lyrics file saved as lyrics.lrc
