In [1]:
import whisper
from whisper.utils import WriteJSON,WriteSRT,WriteTSV,WriteTXT,WriteVTT, get_writer
import os
import pandas as pd

In [2]:
archivo_audio = "Audios/Track-47.wav"

In [3]:
modelo = whisper.load_model("medium")
result = modelo.transcribe(archivo_audio)



KeyboardInterrupt: 

: 

In [4]:
pd.DataFrame(result["segments"])[["id","start","end", "text"]]

Unnamed: 0,id,start,end,text
0,0,0.0,7.68,Muy grave lo que ha acontecido esta semana en...
1,1,7.68,15.64,"pensional que lesiona de manera muy grave, pe..."
2,2,15.64,24.04,porque los más los obliga a todos aquellos qu...
3,3,24.04,32.08,que los 6.20 a partir de ahí van a los fondos...
4,4,32.08,38.32,significa entonces que la libertad de escoger...
5,5,38.32,44.08,"Y uno de los grandes triunfos que consiguen, ..."
6,6,44.08,51.44,era llevar obligatoriamente a un sector de la...
7,7,51.44,57.4,fondos privados. Todos sabemos lo que signifi...
8,8,57.4,63.68,"que se obtienen, son pensiones públicas. Por ..."
9,9,63.68,69.8,"menores de 40 años, esta reforma que se está ..."


In [5]:
output_dir = "subtitulos/"
audio_basename = "salida"

In [6]:
from typing import TextIO, Optional
def format_timestamp(seconds: float, always_include_hours: bool = False, decimal_marker: str = ","):
    assert seconds >= 0, "non-negative timestamp expected"
    milliseconds = round(seconds * 1000.0)

    hours = milliseconds // 3_600_000
    milliseconds -= hours * 3_600_000

    minutes = milliseconds // 60_000
    milliseconds -= minutes * 60_000

    seconds = milliseconds // 1_000
    milliseconds -= seconds * 1_000

    hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
    return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"


class ResultWriter:
    extension: str

    def __init__(self, output_dir: str):
        self.output_dir = output_dir

    def __call__(
        self, result: dict, audio_path: str, options: Optional[dict] = None, **kwargs
    ):
        audio_basename = os.path.basename(audio_path)
        audio_basename = os.path.splitext(audio_basename)[0]
        output_path = os.path.join(
            self.output_dir, audio_basename + "." + self.extension
        )

        with open(output_path, "w", encoding="utf-8") as f:
            self.write_result(result, file=f, options=options, **kwargs)

    def write_result(
        self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs
    ):
        raise NotImplementedError

class WriteASS(ResultWriter):
    extension: str = "ass"

    def write_result(
        self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs
    ):
        # Encabezado del archivo ASS
        file.write("[Script Info]\n")
        file.write("Title: Subtítulos personalizados\n")
        file.write("ScriptType: v4.00+\n")
        file.write("WrapStyle: 0\n")
        file.write("\n")
        file.write("[V4+ Styles]\n")
        file.write("Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\n")
        file.write("Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00020713,&H00000000,-1,0,0,0,100,100,0,0,1,2,0,2,10,10,10,1\n")
        file.write("\n")
        file.write("[Events]\n")
        file.write("Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n")

        # Escribir los subtítulos con los estilos personalizados
        for segment in result["segments"]:
            start_time = format_timestamp(segment["start"], always_include_hours=True, decimal_marker=",")
            end_time = format_timestamp(segment["end"], always_include_hours=True, decimal_marker=",")
            text = segment["text"].strip()
            # Aplicar estilos personalizados aquí
            # Ejemplo: text = "{\\c&HFF0000&}Este es un subtítulo rojo"
            file.write(f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{text}\n")


In [7]:
# Guardar los resultados en varios formatos
for formato in ["txt", "srt", "tsv", "json"]:
    writer = get_writer(formato, output_dir)
    output_path = os.path.join(output_dir, f"{audio_basename}.{formato}")
    with open(output_path, "w", encoding="utf-8") as f:
        writer(result, archivo_audio)  # Pasar el nombre del archivo en lugar del objeto de archivo


In [8]:
for formato in ["txt", "srt", "tsv", "json", "ass","vtt"]:
    if formato == "ass":
        writer = WriteASS(output_dir)
    else:
        writer = get_writer(formato, output_dir)
    output_path = os.path.join(output_dir, f"{audio_basename}.{formato}")
    with open(output_path, "w", encoding="utf-8") as f:
        writer(result, archivo_audio)