-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtranscript.py
48 lines (41 loc) · 1.52 KB
/
transcript.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import argparse
import os
import sounddevice as sd
import soundfile as sf
from google.cloud import speech_v1 as speech
def speech_to_text(credentials_path, audio_file_path, language_code):
client = speech.SpeechClient.from_service_account_json(credentials_path)
config = {
"language_code": language_code,
"audio_channel_count": 2,
"enable_automatic_punctuation": True
}
with open(audio_file_path, "rb") as audio_file:
content = audio_file.read()
audio = speech.RecognitionAudio(content=content)
response = client.recognize(config=config, audio=audio)
print_sentences(response)
def print_sentences(response):
for result in response.results:
best_alternative = result.alternatives[0]
transcript = best_alternative.transcript
confidence = best_alternative.confidence
print(f"{transcript}")
# set up sounddevice
CHUNK = 1024
FORMAT = 'wav'
CHANNELS = 2
RATE =48000
RECORD_SECONDS = 6
WAVE_OUTPUT_FILENAME = "output.wav"
# start recording
print("say something...")
frames = sd.rec(int(RATE * RECORD_SECONDS), samplerate=RATE, channels=CHANNELS, blocking=True)
# save recorded audio to file
sf.write(WAVE_OUTPUT_FILENAME, frames, RATE)
# transcribe saved audio file with Google Cloud Speech-to-Text API
credentials_path = "/home/pi/MagicMirror/modules/MMM-Chat/credentials.json" #path to credentials.json
language_code = "en-US"
speech_to_text(credentials_path, WAVE_OUTPUT_FILENAME, language_code)
# delete saved audio file
os.remove(WAVE_OUTPUT_FILENAME)