/
whisper_functions.py
84 lines (60 loc) · 2.12 KB
/
whisper_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import whisper
from yt_dlp import YoutubeDL
from tqdm import tqdm
model = whisper.load_model("base.en")
def get_letters(text):
chars = list(text)
letters = []
for char in chars:
if char.isalpha():
letters.append(char.lower())
return letters
def transcribe(audio):
# load audio and pad/trim it to fit 30 seconds
audio = whisper.load_audio(audio)
options = whisper.DecodingOptions(language="en")
text = ""
window_size = 480000 # 30 seconds
step_size = window_size // 10 # 3 seconds
skip_size = window_size - step_size
for end in tqdm(range(window_size, len(audio) + skip_size, skip_size)):
start = end - window_size
chunk = audio[start:end]
chunk = whisper.pad_or_trim(chunk)
mel = whisper.log_mel_spectrogram(chunk).to(model.device)
result = whisper.decode(model, mel, options)
result_text = result.text
letters_overlap_index = -1
letters = get_letters(text)
result_letters = get_letters(result_text)
for i in range(1, len(result_letters)):
if letters[-i:] == result_letters[:i]:
letters_overlap_index = i
if letters_overlap_index == -1:
letters_overlap_index = 0
overlap_index = -1
if letters_overlap_index != 0:
for i in range(len(result_text)):
if get_letters(result_text[:i]) == result_letters[:letters_overlap_index]:
overlap_index = i
if overlap_index == -1:
overlap_index = 0
if len(text) != 0:
text += " "
text += result_text[overlap_index:]
return text
def download(url, path='tmp/tmp.wav'):
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': path,
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'wav',
'preferredquality': '192',
}]
}
with YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
if __name__ == '__main__':
download("https://www.youtube.com/watch?v=XxCha4Kez9c")
print(transcribe("tmp/tmp.wav"))