<a href="https://colab.research.google.com/github/oops-internet/AED/blob/main/APP_Transcripci%C3%B3n_WX.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title Instalar Whisper X {"vertical-output":true}
import os
import time
from tqdm import tqdm
import warnings

warnings.filterwarnings("ignore")

def install_packages(commands):
    for command in tqdm(commands, desc="Instalando paquetes", unit="comando"):
        os.system(f"{command} > /dev/null 2>&1")
        time.sleep(1)

def setup_cuda_library_path():
    os.system("echo /usr/lib64-nvidia/ > /etc/ld.so.conf.d/libcuda.conf")
    os.system("ldconfig")

if __name__ == "__main__":
    print("Configurando el entorno CUDA...")
    setup_cuda_library_path()
    print("Entorno CUDA configurado correctamente.")

    commands = [
        "pip install git+https://github.com/openai/whisper.git -q",
        "pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 -q"
    ]
    install_packages(commands)

    try:
        import whisper
        print("\nWhisper instalado correctamente y listo para usarse.")
    except ImportError as e:
        print(f"\nError durante la instalación de Whisper: {e}")


In [None]:
# @title Transcribir {"vertical-output":true}
import os
import whisper
from tqdm import tqdm
import contextlib
from google.colab import files
import ipywidgets as widgets
from IPython.display import display

@contextlib.contextmanager
def suppress_stdout_stderr():
    """Context manager to suppress stdout and stderr."""
    with open(os.devnull, 'w') as fnull:
        with contextlib.redirect_stdout(fnull), contextlib.redirect_stderr(fnull):
            yield

LANGUAGE_MAP = {
    "es": "Español",
    "en": "Inglés",
    "fr": "Francés",
    "de": "Alemán",
    "it": "Italiano",
    "pt": "Portugués",
    "zh": "Chino",
    "ja": "Japonés",
    "ru": "Ruso"
}

def setup_environment():
    print("Configurando el entorno de ejecución...")
    os.system("pip install git+git+https://github.com/openai/whisper.git -q > /dev/null 2>&1")
    os.system("pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 -q > /dev/null 2>&1")
    import torch
    if not torch.cuda.is_available():
        raise RuntimeError("La GPU no está disponible o los controladores de CUDA no son compatibles. Verifica la configuración del entorno.")
    print("Entorno configurado correctamente. GPU disponible.")

def load_whisper_model_with_progress(model_name="large", device="cuda"):
    print("Inicializando carga del modelo...")
    with tqdm(total=100, desc="Cargando modelo de Whisper", unit="%", unit_scale=True, dynamic_ncols=True) as progress_bar:
        for i in range(1, 101, 10):
            time.sleep(0.1)
            progress_bar.update(10)

    with suppress_stdout_stderr():
        model = whisper.load_model(model_name, device=device)

    print("Modelo cargado correctamente.")
    return model

def upload_audio_file():
    print("Por favor, sube un archivo de audio:")
    uploaded_files = files.upload()
    if not uploaded_files:
        raise FileNotFoundError("No se subió ningún archivo.")
    file_path = list(uploaded_files.keys())[0]
    base_name, _ = os.path.splitext(file_path)
    output_folder = f"{base_name}_output"
    os.makedirs(output_folder, exist_ok=True)
    os.rename(file_path, os.path.join(output_folder, file_path))
    print(f"Archivo '{file_path}' cargado exitosamente y guardado en la carpeta '{output_folder}'.")
    return os.path.join(output_folder, file_path), output_folder, base_name

def transcribe_audio_with_progress(model, audio_file, output_folder, base_name, language="es"):
    language_name = LANGUAGE_MAP.get(language, language)
    print(f"Transcribiendo en idioma: {language_name}...")

    transcription = None
    with suppress_stdout_stderr():
        with tqdm(total=100, desc="Progreso de transcripción", unit="%", unit_scale=True, dynamic_ncols=True) as progress_bar:
            transcription = model.transcribe(audio_file, language=language, verbose=False)

            # Barra de progreso basada en segmentos
            num_segments = len(transcription.get("segments", []))
            for i, _ in enumerate(transcription.get("segments", []), start=1):
                progress_percentage = int((i / num_segments) * 100)
                progress_bar.update(progress_percentage - progress_bar.n)

    # Guardar la transcripción en un archivo de texto
    transcript_path = os.path.join(output_folder, f"{base_name}_transcription.txt")
    with open(transcript_path, "w", encoding="utf-8") as f:
        f.write(transcription["text"])

    print("Transcripción completada.")
    print(f"Transcripción guardada en: {transcript_path}")

    # Descargar automáticamente el archivo de transcripción
    files.download(transcript_path)

# Widgets para control de flujo
def create_transcription_interface():
    try:
        setup_environment()
        model = load_whisper_model_with_progress()
        audio_file, output_folder, base_name = upload_audio_file()

        language_dropdown = widgets.Dropdown(
            options=[(name, code) for code, name in LANGUAGE_MAP.items()],
            value='es',
            description='Selecciona el idioma:',
            style={'description_width': 'initial'}
        )
        transcribe_button = widgets.Button(description="Iniciar Transcripción")

        def on_transcribe_button_clicked(_):
            transcribe_audio_with_progress(model, audio_file, output_folder, base_name, language=language_dropdown.value)

        transcribe_button.on_click(on_transcribe_button_clicked)

        display(language_dropdown, transcribe_button)
    except Exception as e:
        print(f"Error: {e}")

# Llamada a la interfaz
create_transcription_interface()
