## Introduction 

This documentation is based on https://www.assemblyai.com/blog/real-time-speech-recognition-with-python/, adapted for my own personal use.

*In terminal:*

brew install websocat

Must escape '?' character by adding quotes:

websocat wss:"//api.assemblyai.com/v2/realtime/ws?sample_rate=16000" -H Authorization:<Your_API_Token>

The websocket communication has started and it results in output like:

(.venv) stevenoneill@Stevens-MacBook-Pro python % websocat wss:"//api.assemblyai.com/v2/realtime/ws?sample_rate=16000" -H Authorization:3a8d9bb2d9f643b5aa50f91d46a147b4
{"message_type": "SessionBegins", "session_id": "1a525566-dd5f-4b4f-8113-78cfccfe7cba", "expires_at": "2022-02-04T23:39:22.389242"}
{"audio_start": 0, "audio_end": 30, "confidence": 0.0, "text": "", "words": [], "created": "2022-02-04T15:39:33.016907", "message_type": "FinalTranscript", "punctuated": true, "text_formatted": true}
{"audio_start": 30, "audio_end": 60, "confidence": 0.0, "text": "", "words": [], "created": "2022-02-04T15:39:42.468178", "message_type": "FinalTranscript", "punctuated": true, "text_formatted": true}
{"audio_start": 60, "audio_end": 90, "confidence": 0.0, "text": "", "words": [], "created": "2022-02-04T15:39:52.483167", "message_type": "FinalTranscript", "punctuated": true, "text_formatted": true}


Install portaudio and pyaudio for listening to your Mac

brew install portaudio
pip install pyaudio

Now bring it all together:

In [None]:
import pyaudio
import websockets
import asyncio
import base64
import json
from configure import auth_key

FRAMES_PER_BUFFER = 3200
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
p = pyaudio.PyAudio()
 
# starts recording
stream = p.open(
   format=FORMAT,
   channels=CHANNELS,
   rate=RATE,
   input=True,
   frames_per_buffer=FRAMES_PER_BUFFER
)
 
# the AssemblyAI endpoint we're going to hit
URL = "wss://api.assemblyai.com/v2/realtime/ws?sample_rate=16000"
 
async def send_receive():
   print(f'Connecting websocket to url ${URL}')
   async with websockets.connect(
       URL,
       extra_headers=(("Authorization", auth_key),),
       ping_interval=5,
       ping_timeout=20
   ) as _ws:
       await asyncio.sleep(0.1)
       print("Receiving SessionBegins ...")
       session_begins = await _ws.recv()
       print(session_begins)
       print("Sending messages ...")
       async def send():
           while True:
               try:
                   data = stream.read(FRAMES_PER_BUFFER)
                   data = base64.b64encode(data).decode("utf-8")
                   json_data = json.dumps({"audio_data":str(data)})
                   await _ws.send(json_data)
               except websockets.exceptions.ConnectionClosedError as e:
                   print(e)
                   assert e.code == 4008
                   break
               except Exception as e:
                   assert False, "Not a websocket 4008 error"
               await asyncio.sleep(0.01)
          
           return True
      
       async def receive():
           while True:
               try:
                   result_str = await _ws.recv()
                   print(json.loads(result_str)['text'])
               except websockets.exceptions.ConnectionClosedError as e:
                   print(e)
                   assert e.code == 4008
                   break
               except Exception as e:
                   assert False, "Not a websocket 4008 error"
      
       send_result, receive_result = await asyncio.gather(send(), receive())

asyncio.run(send_receive())