In [None]:
!pip install openai-whisper

In [1]:
import os
import whisper
import pandas as pd

## Creating subfolders for the transcription

In [8]:
def transcribe_episode_fragments(episode_number):
    # Load the Whisper model
    model = whisper.load_model("base")

    # Paths for the fragments and transcriptions directories
    fragments_dir = f'client_data/fragments_per_episode/ep_{episode_number}'
    transcriptions_dir = f'client_data/transcriptions_per_episode/ep_{episode_number}'

    # Create the transcriptions directory if it doesn't exist
    os.makedirs(transcriptions_dir, exist_ok=True)

    # List all fragment files in the episode's fragment directory
    fragment_files = [f for f in os.listdir(fragments_dir) if f.endswith('.mp3')]

    # Get the total number of fragments for progress tracking
    total_fragments = len(fragment_files)

    print(f"Starting transcription of {total_fragments} fragments for episode {episode_number}.")

    # Process each fragment
    for i, fragment_file in enumerate(fragment_files, start=1):
        print(f"\nProcessing fragment {i}/{total_fragments}...")
        
        # Construct the full path to the fragment
        fragment_path = os.path.join(fragments_dir, fragment_file)
        
        # Transcribe the fragment
        audio = model.transcribe(fragment_path, language="en", fp16=False)

        # Construct the path for the transcription text file
        transcription_file_name = os.path.splitext(fragment_file)[0] + '.txt'
        transcription_path = os.path.join(transcriptions_dir, transcription_file_name)

        # Save the transcription text
        with open(transcription_path, 'w') as f:
            f.write(audio['text'])
        
        print(f"Finished processing {fragment_file}. Transcription saved.")

    print(f"\nAll fragments for episode {episode_number} have been processed and transcribed.")

In [24]:
# Example usage
episode_number = 10  # Replace with the episode number you want to process
transcribe_episode_fragments(episode_number)

Starting transcription of 30 fragments for episode 10.

Processing fragment 1/30...
Finished processing ER22_ep10_fra_1.mp3. Transcription saved.

Processing fragment 2/30...
Finished processing ER22_ep10_fra_10.mp3. Transcription saved.

Processing fragment 3/30...
Finished processing ER22_ep10_fra_11.mp3. Transcription saved.

Processing fragment 4/30...
Finished processing ER22_ep10_fra_12.mp3. Transcription saved.

Processing fragment 5/30...
Finished processing ER22_ep10_fra_13.mp3. Transcription saved.

Processing fragment 6/30...
Finished processing ER22_ep10_fra_14.mp3. Transcription saved.

Processing fragment 7/30...
Finished processing ER22_ep10_fra_15.mp3. Transcription saved.

Processing fragment 8/30...
Finished processing ER22_ep10_fra_16.mp3. Transcription saved.

Processing fragment 9/30...
Finished processing ER22_ep10_fra_17.mp3. Transcription saved.

Processing fragment 10/30...
Finished processing ER22_ep10_fra_18.mp3. Transcription saved.

Processing fragment 11/3