In [13]:
from openai import OpenAI
from pydub import AudioSegment
import os
from dotenv import load_dotenv



# hide sensible OpenAI key information by importing environment variables from an env. file
load_dotenv(r"OpenAI.env")

# store your API key in a variable            
OPENAI_API_KEY= os.environ.get("name of your api key in the env file")
client=OpenAI(api_key=OPENAI_API_KEY)

In [14]:
def split_audio(file_path, target_size_mb=10, format='mp3'):
    # Load the file using AudioSegment from pydub
    audio = AudioSegment.from_file(file_path, format=format)

    # estimate the maximum duration of each segment i miliseconds base on the target size in MB
    # Use a conversative estimate of bitrate  (128 kps) for mp3 to calculate duration
    # 1 Byte = 128 kps, so 128000 bits per second = 1600 Bytes
    estimated_bitrate = 128000  # in bps
    bytes_per_second = estimated_bitrate / 8
    
    # we want to split segments into a maximum size of 10 MB because using 20 or 24 MB always led to API disconnects
    max_segment_duration_ms = int((target_size_mb * 1024 * 1024 / bytes_per_second) * 1000)

    # create a list to store the names of the chunks
    chunks = []
    
    # using a for loop to split big audio files into chunks and save them as separate chunked files with the index name
    for i in range(0, len(audio), max_segment_duration_ms):
        chunk = audio[i:i+max_segment_duration_ms]
        chunk_name = f"{file_path}_part{i//max_segment_duration_ms}.{format}"
        chunk.export(chunk_name, format=format)
        chunks.append(chunk_name)

    return chunks

# using the Whisper API client to transcribe audio files
def transcribe_audio(file_path):
    # open audio files in read-binary mode because audio files are binary data 
    with open(file_path, "rb") as audio_file:
        # send audio file to the whisper  transcription service using the api client and return transcription (response.text)
        response = client.audio.transcriptions.create(
            model="whisper-1",
            file=audio_file)
    return response.text


# create a directory where we want to store the transcriptions if it doesn't exist yet
def process_directory(directory_path):
    # this will create a subfolder /transcriptions inside the folder where the audio files are located (if it doesn't exist yet)
    output_dir = os.path.join(directory_path, "transcriptions")
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # iterate over all mp3 files in the audio file directory
    for file_name in os.listdir(directory_path):
        if file_name.endswith('.mp3'):
            file_path = os.path.join(directory_path, file_name)
            file_size = os.path.getsize(file_path)
            # initialize an empty string that we'll use for batch transcribing files larger than 10 MB
            complete_transcription=""

            # here comes the tricky part: if a mp3 file is bigger than 10 MB
            # we use the split audio function to  split the audio into chunks, each 10 MB or less
            if file_size > 10 * 1024 * 1024:
                parts = split_audio(file_path)

                # for each chunk, we want to batch transcribe that chunk and add the transcription text to the empty string from before
                for part in parts:
                    part_transcription=transcribe_audio(part)
                    complete_transcription += part_transcription + " "
                    # after the part has been transcribed, we want to remove the chunked file from our directory so we don't end with a lot of chunked files
                    os.remove(part)
            
            # if the mp3 file is NOT bigger than 10 MB, we want to transcribe it directly without any chunking
            else:
                complete_transcription = transcribe_audio(file_path)

            # after transcription, write the complete transcription to our output directory with the same name as the audio file as a text file
            transcription_file_path = os.path.join(output_dir, file_name.replace('.mp3', '.txt'))
            with open(transcription_file_path, 'w') as f:
                f.write(complete_transcription)


# define your local directory that contains all the audio files by copy & pasting the directory path below
directory_path = "your directory path here"

# call the process_directory to start the directory transcription process
process_directory(directory_path)