<a href="https://colab.research.google.com/github/limyewjin/podcast-transcript/blob/main/podcast_transcript.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Basic script for using the OpenAI Whisper model to transcribe an audio file.

The output can be passed onto other scripts in [github repository](https://github.com/limyewjin/podcast-transcript) to clean up and provide summaries.

# Parameters

In [1]:
# Choose which model to use by uncommenting
#model_name = "tiny.en"
#model_name = "base.en"
#model_name = "small.en"
#model_name = "medium.en"
model_name = "large-v2"

output_folder = "transcriptions"
language = "english"
export_timestamp_data = True # Outputs timestamp data at the word level if True

# Installation (run once or after changing `model_name`)

In [3]:
# Required third party packages: whisper
!pip install -U openai-whisper

import whisper
import io
import time
import os
import json
import pathlib

model = whisper.load_model(model_name)



100%|█████████████████████████████████████| 2.87G/2.87G [00:54<00:00, 56.7MiB/s]


# Run

Tip: Upload the file to colab and then right-click to copy the path and paste after running

In [7]:
print(f"Using model: {model_name}")
file_path = input("Path to file being transcribed: ")
file_path = file_path.strip("\"")
if not os.path.exists(file_path):
	print("Error getting file")
	exit()

if not os.path.exists(output_folder):
	os.makedirs(output_folder)
	print(f"Created output folder {output_folder}.\n")

# From Whisper API:
# Optional text to provide as a prompt for the first window. This can be used to provide, or
# "prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns
# to make it more likely to predict those word correctly.
prompt = input("Optional text prompt: ").strip()
if prompt == "": prompt = None

# Get filename stem using pathlib (filename without extension)
filename_stem = pathlib.Path(file_path).stem

result_fileName = f"{filename_stem}.txt"
json_fileName = f"{filename_stem}.json"

start = time.time()
result = model.transcribe(audio=file_path, language=language, prompt=prompt)
end = time.time()
elapsed = float(end - start)

# Save transcription text to file
print("\nWriting transcription to file...")
with open(os.path.join(output_folder, result_fileName), "w", encoding="utf-8") as f:
	f.write(result["text"])

# Save the segments data to json file
if export_timestamp_data == True:
	print("\nWriting segment data to file...")
	with open(os.path.join(output_folder, json_fileName), "w", encoding="utf-8") as f:
		segments_data = result["segments"]
		json.dump(segments_data, f, indent=4)

elapsed_minutes = str(round(elapsed/60, 2))
print(f"\nElapsed time With {model_name} Model: {elapsed_minutes} minutes")

Using model: large-v2
Path to file being transcribed: /content/output_podcast.mp3
Optional text prompt: 

Writing transcription to file...

Writing segment data to file...


NameError: ignored