In [8]:
import os
import torch
import whisper
import numpy as np
from tqdm.notebook import tqdm
from pathlib import Path
from whisper.utils import write_vtt

In [2]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
model = whisper.load_model("base.en").to(DEVICE)

In [9]:
def transcribe_files(model, files):
    print(f"Model is {'multilingual' if model.is_multilingual else 'English-only'} "
          f"and has {sum(np.prod(p.shape) for p in model.parameters()):,} parameters."
         )
    for input_file in tqdm(files):
        print(f"Transcribing file: {input_file}")
        result = model.transcribe(str(input_file), verbose=False, language="en", )

        # save TXT
        with open(input_file.with_suffix(".txt"), "w", encoding="utf-8") as txt:
            print(result["text"], file=txt)

        # save VTT
        with open(input_file.with_suffix(".vtt"), "w", encoding="utf-8") as vtt:
            write_vtt(result["segments"], file=vtt)

In [10]:
files = list(Path("fastai_transcripts").glob("*.mp4"))

In [11]:
transcribe_files(model, files)

Model is English-only and has 71,825,408 parameters.


  0%|          | 0/8 [00:00<?, ?it/s]

Transcribing file: fastai_transcripts/lesson_2_practical_deep_learning_for_coders_2022.mp4
Transcribing file: fastai_transcripts/lesson_3_practical_deep_learning_for_coders_2022.mp4
Transcribing file: fastai_transcripts/lesson_7_practical_deep_learning_for_coders_2022.mp4
Transcribing file: fastai_transcripts/lesson_6_practical_deep_learning_for_coders_2022.mp4
Transcribing file: fastai_transcripts/lesson_4_practical_deep_learning_for_coders_2022.mp4
Transcribing file: fastai_transcripts/lesson_8_-_practical_deep_learning_for_coders_2022.mp4
Transcribing file: fastai_transcripts/lesson_1_practical_deep_learning_for_coders_2022.mp4
Transcribing file: fastai_transcripts/lesson_5_practical_deep_learning_for_coders_2022.mp4
