In [1]:
!pip install openai-whisper ffmpeg-python torch

Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/800.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.4/800.5 kB[0m [31m2.8 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m798.7/800.5 kB[0m [31m12.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting tiktoken (from openai-whisper)
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata 

In [3]:
import os
import json
import whisper
from pathlib import Path

def find_media_files(directory, extensions={'.mp3', '.wav', '.mp4', '.mkv', '.flac', '.aac'}):

    media_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if any(file.lower().endswith(ext) for ext in extensions):
                media_files.append(os.path.join(root, file))
    return media_files

def transcribe_media(file_path, model):

    print(f"Transcribing: {file_path}")
    result = model.transcribe(file_path)
    return result["text"]

def save_transcription(output_folder, file_path, transcription):

    output_folder = Path(output_folder)
    output_folder.mkdir(parents=True, exist_ok=True)
    output_file = output_folder / (Path(file_path).stem + ".json")

    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump({"file": file_path, "transcription": transcription}, f, ensure_ascii=False, indent=4)

    print(f"Saved transcription: {output_file}")

def main(input_folder, output_folder):

    model = whisper.load_model("tiny")  # Using the smallest model
    media_files = find_media_files(input_folder)

    if not media_files:
        print("No media files found in the directory.")
        return

    for file_path in media_files:
        transcription = transcribe_media(file_path, model)
        save_transcription(output_folder, file_path, transcription)

if __name__ == "__main__":
    input_directory = "/content/media"  # Change this to the folder containing media files
    output_directory = "/content/transcript"
    main(input_directory, output_directory)


  checkpoint = torch.load(fp, map_location=device)


Transcribing: /content/media/Your_First_Lesson.mp3




Saved transcription: /content/transcript/Your_First_Lesson.json
Transcribing: /content/media/file_example_WAV_1MG.wav




Saved transcription: /content/transcript/file_example_WAV_1MG.json
Transcribing: /content/media/Power_English_Update.mp3




Saved transcription: /content/transcript/Power_English_Update.json
Transcribing: /content/media/New_Year_Resolution.mp3




Saved transcription: /content/transcript/New_Year_Resolution.json
