# EMISSOR chat bot with audio backend

@CLTL


## 1. Installing the platform


requirements.txt:

In [None]:
cltl.backend[impl,host]
cltl.asr[impl]
cltl.vad[impl]

numpy==1.21.2

transformers==4.10.0
torch==1.9.0
cffi==1.14.6
jiwer==2.2.0
sounddevice==0.4.2
soundfile==0.10.3.post1
PyAudio==0.2.11;sys_platform=='darwin'
webrtcvad==2.0.10


cltl packages are in cltl-requirements repo, clone it and install with -f option

In [None]:
! git clone https://github.com/leolani/cltl-requirements.git
! python -m venv venv
! source venv/bin/activate
! pip install -f cltl-requirements/leolani -r requirements.txt
! python -m ipykernel install --user --name=cltl

Should not be necessary: Instead of pinning versions in the requirements.txt we can use the ones defined in cltl-requirements, mirror the external dependencies and install them from cltl-requirements/mirror/

In [None]:
! git clone --recurse-submodules -j8 https://github.com/leolani/cltl-requirements.git
! cd cltl-requirements & make build & cd ..
! python -m venv venv
! source venv/bin/activate
! pip install --no-index -f cltl-requirements/mirror -f cltl-requirements/leolani -r requirements.txt
! python -m ipykernel install --user --name=cltl

## Running with ASR

In [None]:
import numpy as np
from cltl.asr.wav2vec_asr import Wav2Vec2ASR
from cltl.backend.source.pyaudio_source import PyAudioSource
from cltl.vad.webrtc_vad import WebRtcVAD

from cltl.backend.api.util import raw_frames_to_np

In [None]:
source = PyAudioSource(16000, 1, 480)
vad = WebRtcVAD()
asr = Wav2Vec2ASR(model_id="facebook/wav2vec2-large-960h", sampling_rate=16000)

while True:
    try:
        with source as audio:
            frames = raw_frames_to_np(audio, source.frame_size, source.channels, source.depth)
            speech, offset, consumed = tuple(vad.detect_vad(frames, source.rate))
            text = asr.speech_to_text(np.concatenate(tuple(speech)), source.rate)
            print("Detected:", text)
    except Exception as e:
        print("Failed", e)