-
Notifications
You must be signed in to change notification settings - Fork 0
/
audio_grabber.py
131 lines (113 loc) · 5.09 KB
/
audio_grabber.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import pyaudio
import base64
import json
import requests
import time
import threading
import wave
from urllib3.util.retry import Retry
from urllib3.exceptions import MaxRetryError
# The AudioGrabber class initializes a PyAudio stream to capture audio from the microphone.
# The start method starts a separate thread to send audio chunks to the server.
# The send_audio method reads audio data from the stream, buffers it, and sends it to the server
# every 1 second.
# When the buffer reaches 20 seconds of audio, the send_chunk method is called to send the chunk
# to the server.
# The send_chunk method encodes the audio data with base64, creates a JSON payload with the chunk ID,
# and sends it to the server using requests.
# If the server responds with a 200 status
RATE = 16000
CHUNK_SIZE = RATE
AUDIO_FORMAT = pyaudio.paInt16
CHANNELS = 1
BUFFER_SIZE = 2 * 10 * RATE # 10 seconds of audio
SILENCE_THRESHOLD = 500
INPUT_DEVICE_INDEX = 1
class AudioGrabber:
def __init__(self):
self.audio = pyaudio.PyAudio()
self.stream = self.audio.open(format=AUDIO_FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
input_device_index=INPUT_DEVICE_INDEX,
frames_per_buffer=CHUNK_SIZE,
stream_callback=self.audio_callback)
self.buffer = bytearray()
self.chunk_id = str(int(time.time() * 1000))
self.send_thread = None
self.recording = True
def audio_callback(self, audio_data, frame_count, time_info, status):
# Process the audio data here
# print("callback audio size " + str(len(audio_data)))
audio_sample = wave.struct.unpack("%dh" % (len(audio_data) / 2), audio_data)
# if there is silence, we reset the buffer
if (self.is_silent(audio_sample)):
print("silence..")
# we can reset the buffer here because we know that the current buffer has been send; its therefore the final buffer
self.buffer = bytearray() # Reset buffer
self.chunk_id = str(int(time.time() * 1000)) # get new chunk ID: time in milliseconds
else:
print("audio extend")
self.buffer.extend(audio_data)
print("buffer audio size " + str(len(self.buffer)))
# always send the buffern unless it's too small
if len(self.buffer) > 0:
print("send chunk")
self.send_chunk()
# in case that the buffer is too large, we start a new one. Then the previously send buffer was final
if len(self.buffer) >= BUFFER_SIZE:
self.buffer = bytearray() # Reset buffer
self.chunk_id = str(int(time.time() * 1000)) # get new chunk ID: time in milliseconds
# Return the status code to continue the stream
return audio_data, pyaudio.paContinue
def start(self):
self.send_thread = threading.Thread(target=self.send_audio)
self.send_thread.start()
def is_silent(self, data):
m = max(data)
print(str(m))
return m < SILENCE_THRESHOLD
def send_chunk(self):
audio_b64 = base64.b64encode(self.buffer).decode('utf-8')
data = {'chunk_id': self.chunk_id, 'audio_b64': audio_b64}
try:
retry_policy = Retry(total=5, # Total number of retries
backoff_factor=1, # Pause between retries in seconds
status_forcelist=[500, 502, 503, 504]) # Retry on these status codes
adapter = requests.adapters.HTTPAdapter(max_retries=retry_policy)
session = requests.Session()
session.mount('http://', adapter)
session.mount('https://', adapter)
headers = {'Content-Type': 'application/json'} # Ensure correct header
response = session.post('http://localhost:5040/transcribe', json=data)
if response.status_code == 200:
print(f'Sent chunk {self.chunk_id} with {len(self.buffer)} bytes')
else:
print(f'Error sending chunk: {response.status_code}:{response.text}')
except MaxRetryError as e:
print(f'Error: Maximum retries exceeded. Could not connect to the endpoint.')
except requests.exceptions.RequestException as e:
print(f'Error sending chunk: {e}')
def start(self):
self.stream.start_stream()
def stop(self):
self.recording = False
self.send_thread.join()
self.stream.stop_stream()
self.stream.close()
self.audio.terminate()
if __name__ == '__main__':
p = pyaudio.PyAudio()
# Get the list of input devices
device_count = p.get_device_count()
for i in range(device_count):
device_info = p.get_device_info_by_index(i)
print(f"Device {i}: {device_info['name']}")
grabber = AudioGrabber()
grabber.start()
try:
while True: time.sleep(1)
except KeyboardInterrupt:
grabber.stop()
print("Recording stopped by user")