In [1]:
import os
from dotenv import load_dotenv
from speechless.speaker_diarization import diarize_audio, merge_transcript
from speechless.transcribe_audio import transcribe_audio

In [2]:
AUDIO_PATH = "../data/audio/sales_call_example_1.mp3"
FOLDER_TO_STORE = "../data/diaized_and_transcribed"
OVERLAP = 0.1

In [3]:
# load environment variables
load_dotenv("../.env")
picovoice_api_key = os.getenv("PICOVOICE_API_KEY")
api_key = os.getenv("OPENAI_API_KEY")

In [4]:
diarized_audio = diarize_audio(
    AUDIO_PATH,
    picovoice_api_key
)

In [None]:
transcript = transcribe_audio(
    AUDIO_PATH
)

In [6]:
transcribed_and_diarized = merge_transcript(
    transcript,
    diarized_audio,
    OVERLAP
)

In [7]:
def write_list_to_disk(data_list: list, folder_path: str, file_name: str):
    """
    Write a list to disk as a text file.

    Parameters
    ----------
    data_list : list
        The list to write to disk.
    folder_path : str
        The folder path where the file should be saved.
    """
    # Create the folder if it doesn't exist
    os.makedirs(folder_path, exist_ok=True)
    
    # Define the file path
    file_path = os.path.join(folder_path, file_name)
    
    # Write the list to the file
    with open(file_path, "w", encoding="utf-8") as file:
        for item in data_list:
            file.write(f"{item}\n")
    
    print(f"List written to {file_path}")


In [None]:
write_list_to_disk(transcribed_and_diarized, FOLDER_TO_STORE, AUDIO_PATH.split("/")[-1].replace(".mp3", ".txt"))