In [None]:
import whisper
import openai
from dotenv import dotenv_values
import time

In [None]:
# Available models: tiny, base, small, medium, large (default large-v2)
model = whisper.load_model("large")

In [None]:
config = dotenv_values(".env")
openai.api_key = config["OPENAI_API_KEY"]

In [None]:
def transcribe_and_append(audio_path, output_path, separator="\n--------------------------------\n"):
    result = model.transcribe(audio_path)
    transcription = result["text"]

    with open(output_path, "a") as output_file:
        output_file.write(separator + "\n")
        output_file.write(transcription + "\n")

    return transcription

In [None]:
def split_text_into_chunks(text, max_tokens):
    words = text.split()
    chunks = []
    current_chunk = []

    for word in words:
        if len(" ".join(current_chunk) + " " + word) <= max_tokens:
            current_chunk.append(word)
        else:
            chunks.append(" ".join(current_chunk))
            current_chunk = [word]

    if current_chunk:
        chunks.append(" ".join(current_chunk))

    return chunks

In [None]:
def process_text_with_gpt35_turbo(text, max_retries=3, retry_delay=5):
    max_tokens = 3272 - 200  # Reserve tokens for the prompt and other overheads
    chunks = split_text_into_chunks(text, max_tokens)

    edited_text = ""

    for chunk in chunks:
        message = [
            {
                "role": "system",
                "content": (
                    "Act as a copy editor. Go through the transcribed text in triple quotes below."
                    "Edit it for spelling mistakes, grammar issues, punctuation. "
                    "Format the text into appropriately sized paragraphs. Make your best effort.\n\n"
                    f""" {chunk} """
                    "\n\nReturn only the edited text. If the text involves multiple people, start each new speaker on a new line, along with their name. Do not alter the actual content of the text."
                ),
            }
        ]

        retries = 0
        success = False

        while retries < max_retries and not success:
            try:
                response = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=message,
                    max_tokens=max_tokens,
                    temperature=0.7,
                    top_p=1,
                    frequency_penalty=0,
                    presence_penalty=0,
                )

                edited_chunk = response.choices[0].message.content.strip()
                edited_text += " " + edited_chunk
                success = True

            except openai.APIError as e:
                retries += 1
                print(f"Error occurred: {e}. Retrying {retries}/{max_retries}.")
                with open("failed_requests.log", "a") as f:
                    f.write(f"{time.strftime('%Y-%m-%d %H:%M:%S')} - {e}\nRequest: {message}\n\n")
                if retries < max_retries:
                    time.sleep(retry_delay)

        if not success:
            print("Failed to process the following chunk after maximum retries:")
            print(chunk)
            print("Check 'failed_requests.log' for more details.")

    return edited_text.strip()

In [None]:
audio_file_path = "./audio.mp3"
original_output_file_path = "./audio_original.txt"
revised_output_file_path = "./audio_revised.txt"

In [None]:
transcription = transcribe_and_append(audio_file_path, original_output_file_path)

In [None]:
edited_text = process_text_with_gpt35_turbo(transcription)

In [None]:
with open(revised_output_file_path, "w") as f:
    f.write(edited_text)