# Digital People - Whisper Diarization

In [None]:
!pip install -r requirements.txt

In [None]:
OPENAI_API_KEY = ""  # @param {type:"string"}
OPENAI_WHISPER_MODEL = "whisper-1"  # @param ["whisper-1"]
OPENAI_WHISPER_PROMPT = ""  # @param {type:"string"}

HF_API_KEY = ""  # @param {type:"string"}

PYANNOTE_MODEL = (
    "pyannote/speaker-diarization-3.1"  # @param ["pyannote/speaker-diarization-3.1"]
)

In [None]:
import os


os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
os.environ["OPENAI_WHISPER_MODEL"] = OPENAI_WHISPER_MODEL
os.environ["OPENAI_WHISPER_PROMPT"] = OPENAI_WHISPER_PROMPT
os.environ["HF_API_KEY"] = HF_API_KEY
os.environ["PYANNOTE_MODEL"] = PYANNOTE_MODEL

In [None]:
import argparse
import logging
import shutil
from pathlib import Path

from app.diarization import get_pipeline
from app.settings import settings
from app.vtt import create_vtt
from app.whisper import get_transcripts

logger = logging.getLogger(__name__)
settings.tmp_folder.mkdir(exist_ok=True)

In [None]:
from google.colab import files

uploaded = files.upload()
filename = None

for fn in uploaded.keys():
    filename = fn

filename = Path(filename).absolute()

if not filename.exists():
    logger.error(f"🛑 File {filename} not exists")
    exit(1)

In [None]:
(audio_segment, diarization) = get_pipeline(filename)

transcripts = get_transcripts(diarization, audio_segment)

vtt = create_vtt(transcripts)

export_filename = filename.with_suffix(".vtt")
with open(export_filename, "w") as f:
    vtt.write(f)

shutil.rmtree(settings.tmp_folder.absolute())

In [None]:
files.download(export_filename)