Whisper for 100 concurrent users #2192

saboorniazi · 2024-05-27T14:45:07Z

saboorniazi
May 27, 2024

`import asyncio
import websockets
import numpy as np
import whisper
import torch
import threading
import time
from urllib.parse import parse_qs

Load Whisper model instances globally

model_pool = [whisper.load_model("tiny.en") for _ in range(5)]
print('5 instances of whisper model tiny.en loaded')

Global variables and locks

frames_locks = [threading.Lock() for _ in range(len(model_pool))] # one lock per model instance
threshold = 20 # voice pitch threshold
ms = 500 # milliseconds
connected_clients = set()
user_data = {}
model_counter = 0 # Counter to distribute requests

async def audio_data(websocket, path):
global connected_clients
connected_clients.add(websocket)
query_string = path.split('?')[1]
query_params = dict(parse_qs(query_string))
VAD = query_params.get('vad')
VAD = int(VAD[0]) if VAD else ms

connection_id = id(websocket)
user_data[connection_id] = {
    'frames_in': [],
    'silence_duration': 0,
    'last_speech_time': time.time(),
    'start': False,
    'VAD': VAD,
    'plainResponse': False
}

print(f'New connection: {connection_id} | Total Connected Clients: {len(connected_clients)}')

try:
    await handle_audio_stream(websocket, connection_id)
finally:
    connected_clients.remove(websocket)
    del user_data[connection_id]
    print(f'Removed connection: {connection_id} | Total Connected Clients: {len(connected_clients)}')

async def handle_audio_stream(websocket, connection_id):
try:
async for message in websocket:
user = user_data.get(connection_id)
if not user:
return
audio_buffer = np.frombuffer(message, dtype=np.int16)
b = ((np.abs(np.fft.rfft(audio_buffer))) * 16000 / len(message) // 2).mean()

        if b > threshold * 1000:
            user['silence_duration'] = 0
            user['start'] = True
            user['last_speech_time'] = time.time()
            user['frames_in'].append(audio_buffer)
        else:
            if user['start']:
                user['frames_in'].append(audio_buffer)
            user['silence_duration'] = time.time() - user['last_speech_time']

        if user['silence_duration'] * 1000 >= user['VAD'] and user['start']:
            user['start'] = False
            if user['frames_in']:
                frames_copy = user['frames_in'].copy()
                user['frames_in'].clear()
                audio_data_np = np.concatenate(frames_copy).astype(np.float32) / 32768.0
                audio_tensor = torch.from_numpy(audio_data_np)

                # Transcribe concurrently using multiple locks
                asyncio.create_task(process_transcription(audio_tensor, websocket, connection_id))
except websockets.exceptions.ConnectionClosedError:
    print(f'Connection {connection_id} closed unexpectedly.')

async def process_transcription(audio_tensor, websocket, connection_id):
global model_counter
lock_index = model_counter % len(frames_locks)
model_instance = model_pool[model_counter % len(model_pool)]
model_counter += 1

start_time = time.time()
with frames_locks[lock_index]:
    result = await asyncio.to_thread(model_instance.transcribe, audio_tensor)
transcription = result['text']
end_time = time.time()
elapsed = end_time - start_time

print(f"Sockets Connected: {len(connected_clients)} | Time: {elapsed} | {len(audio_tensor)}| {connection_id} --> {transcription}")

try:
    await websocket.send(transcription)
except websockets.exceptions.ConnectionClosedOK:
    print(f'Connection {connection_id} closed gracefully.')

start_server = websockets.serve(audio_data, "localhost", 8001)

asyncio.get_event_loop().run_until_complete(start_server)
asyncio.get_event_loop().run_forever()`
this is my code i want to run this for 100 users conccurrently and i am running this on 2 rtx 3090 gpus .
can someone guide me to correctly use asyncio and threads to make this for 100 users?

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Whisper for 100 concurrent users #2192

{{title}}

Replies: 0 comments

Select a reply

Whisper for 100 concurrent users #2192

saboorniazi May 27, 2024

Load Whisper model instances globally

Global variables and locks

Replies: 0 comments

saboorniazi
May 27, 2024