# Ondewo s2s client tutorial

First: Importing your clients with these lines:

In [1]:
from ondewo.csi.client.client_config import ClientConfig
from ondewo.nlu.client_config import ClientConfig as NluClientConfig

from ondewo.csi.client.client import Client as CsiClient
from ondewo.s2t.client.client import Client as S2tClient
from ondewo.t2s.client.client import Client as T2sClient
from ondewo.nlu.client import Client as NluClient

Second: Import the grpc auto generated code. This is built automatically from protobuff libraries

In [2]:
import ondewo.s2t.speech_to_text_pb2 as s2t
import ondewo.t2s.text_to_speech_pb2 as t2s
import ondewo.nlu.agent_pb2 as agent

In [3]:
# TODO enter csi json file in colab

with open('csi.json') as fi:
    config = ClientConfig.from_json(fi.read())
with open('csi.json') as fi:
    nlu_config = NluClientConfig.from_json(fi.read())

## CSI client 

In [4]:
csi_client = CsiClient(config=config)

## s2t client

Now you have the s2t client 

In [5]:
s2t_client = S2tClient(config=config)

#### Example 1 s2t 

In [157]:
def find_pipeline_by_language_s2t(language):
    
    pipelines_ids = []
    
    pipelines = s2t_client.services.speech_to_text.list_s2t_pipelines(
        request=ListS2tPipelinesRequest(languages=[language])).pipeline_configs
    
    for pipeline in pipelines:
        pipelines_ids.append(pipeline)
    
    return pipelines

In [169]:
import wave

from google.protobuf.empty_pb2 import Empty

from ondewo.s2t import speech_to_text_pb2
from ondewo.s2t.speech_to_text_pb2 import ListS2tPipelinesRequest

AUDIO_FILE: str = "s2t_examples/audiofiles/sample_1.wav"

# List all speech-2-text pipelines (model setups) present on the server
# We are going to pick the first pipeline (model setup)
pipelines = s2t_client.services.speech_to_text.list_s2t_pipelines(request = Empty()).pipeline_configs
pipeline = find_pipeline_by_language_s2t('en')[0]
                

# Read file which we want to transcribe
with wave.open(AUDIO_FILE) as w:
    audio: bytes = w.readframes(w.getnframes())

# Create transcription request
request = speech_to_text_pb2.TranscribeFileRequest(
    s2t_pipeline_id=pipeline.id,
    audio_file=audio,
    ctc_decoding=speech_to_text_pb2.CTCDecoding.BEAM_SEARCH_WITH_LM
)
# Send transcription request and get response
transcribe_response = s2t_client.services.speech_to_text.transcribe_file(request=request)


In [170]:
print(transcribe_response.transcription)

s2t_pipelines = s2t_client.services.speech_to_text.list_s2t_pipelines(request=s2t.ListS2tPipelinesRequest())

print(f"Speech to text pipelines: {[pipeline.id for pipeline in s2t_pipelines.pipeline_configs]}")

print(f"Speech to text domains: { set([pipeline.description.domain for pipeline in s2t_pipelines.pipeline_configs])}")

print(f"Speech to text languages: { set([pipeline.description.language for pipeline in s2t_pipelines.pipeline_configs])}")


hello i would like to order a large piece of whamphoas 
Speech to text pipelines: ['ato001', 'default_german', 'keb001', 'german_general', 'tir002', 'general_english']
Speech to text domains: {'general'}
Speech to text languages: {'en', 'de'}


#### Example 2 s2t

In [165]:
import wave
from typing import Iterator
from google.protobuf.empty_pb2 import Empty

from ondewo.s2t import speech_to_text_pb2

AUDIO_FILE: str = "s2t_examples/audiofiles/sample_2.wav"
CHUNK_SIZE: int = 8000


# We are going to make to send the file chunk-by-chunk to simulate a stream
def get_streaming_audio(audio_path: str) -> Iterator[bytes]:
    with wave.open(audio_path) as w:
        chunk: bytes = w.readframes(CHUNK_SIZE)
        while chunk != b"":
            yield chunk
            chunk = w.readframes(CHUNK_SIZE)


def create_streaming_request(
        audio_stream: Iterator[bytes],
        pipeline_id: str,
) -> Iterator[speech_to_text_pb2.TranscribeStreamRequest]:
    for i, chunk in enumerate(audio_stream):
        yield speech_to_text_pb2.TranscribeStreamRequest(
            audio_chunk=chunk,
            s2t_pipeline_id=pipeline_id,
            spelling_correction=False,
            ctc_decoding=speech_to_text_pb2.CTCDecoding.GREEDY,
            end_of_stream=False,
        )
    # End the stream


# List all speech-2-text pipelines (model setups) present on the server
# We are going to pick the first pipeline (model setup)
pipelines = s2t_client.services.speech_to_text.list_s2t_pipelines(request = Empty()).pipeline_configs
pipeline = pipelines[5]
pipeline_id = find_pipeline_by_language_s2t('en')[0].id

# Get audio stream (iterator of audio chunks)
audio_stream: Iterator[bytes] = get_streaming_audio(AUDIO_FILE)

# Create streaming request
streaming_request: Iterator[speech_to_text_pb2.TranscribeStreamRequest] = \
    create_streaming_request(audio_stream=audio_stream, pipeline_id=pipeline_id)

# Transcribe the stream and get back responses
response_gen: Iterator[speech_to_text_pb2.TranscribeStreamResponse] = \
    s2t_client.services.speech_to_text.transcribe_stream(streaming_request)


In [166]:
# Print transcribed utterances
for i, response_chunk in enumerate(response_gen):
    print(response_chunk.transcription)

hello i would like to order one large bitza 


In [167]:
s2t_pipelines = s2t_client.services.speech_to_text.list_s2t_pipelines(request=s2t.ListS2tPipelinesRequest())

print(f"Speech to text pipelines: {[pipeline.id for pipeline in s2t_pipelines.pipeline_configs]}")

print(f"Speech to text domains: { set([pipeline.description.domain for pipeline in s2t_pipelines.pipeline_configs])}")

print(f"Speech to text languages: { set([pipeline.description.language for pipeline in s2t_pipelines.pipeline_configs])}")


Speech to text pipelines: ['ato001', 'default_german', 'keb001', 'german_general', 'tir002', 'general_english']
Speech to text domains: {'general'}
Speech to text languages: {'en', 'de'}


In [59]:
import logging                                                           
import queue                                                             
import time                                                              
import uuid                                                              
from abc import ABCMeta, abstractmethod                                  
from typing import Iterator, Optional                                                                       
from ondewo.logging.logger import logger_console                         
from ondewo.nlu.session_pb2 import (                                     
    InputAudioConfig,                                                    
    QueryInput,                                                          
    StreamingDetectIntentRequest,                                        
)                                                                        
from ondewo.s2t import speech_to_text_pb2                                
                                                                 
                                                                         
CHUNK: int = 8000                                                        
MONO: int = 1                                                            
RATE: int = 16000                                                        
PLAYING: bool = False                                                    
WAV_HEADER_LENGTH: int = 46                                              
SAMPLEWIDTH: int = 2                                                     

        
class StreamerInInterface(metaclass=ABCMeta):                            
    @property                                                            
    @abstractmethod                                                      
    def mute(self) -> bool:                                              
        pass                                                             
                                                                         
    @mute.setter                                                         
    def mute(self, value: bool) -> None:                                 
        pass                                                             
                                                                         
    @abstractmethod
    def create_s2t_request(
        self,
        audio_stream: Iterator[bytes],
        pipeline_id: str,
    ) -> Iterator[speech_to_text_pb2.TranscribeStreamRequest]:
        pass
    
    @abstractmethod                                                      
    def close(self) -> None:                                             
        pass                                                             

class PyAudioStreamerIn(StreamerInInterface):                                                         
    def __init__(self) -> None:                                                                       
        import pyaudio                             

        self.CHUNK: int = CHUNK                                                                       
        self.pyaudio_object: pyaudio.PyAudio = pyaudio.PyAudio()                                               
        self.stream: pyaudio.Stream = self.pyaudio_object.open(                                                
            channels=1,                            
            format=pyaudio.paInt16,                                                                   
            rate=16000,                            
            input=True,                            
            frames_per_buffer=self.CHUNK,                                                             
        )                                          

    @property                                      
    def mute(self) -> bool:                                                                           
        return PLAYING                             

    @mute.setter                                   
    def mute(self, value: bool) -> None:                                                              
        global PLAYING                             
        PLAYING = value                            

    def close(self) -> None:                                                                          
        self.stream.close()                                                                           
        self.pyaudio_object.terminate()                                                               

    def create_s2t_request(
            self,
            audio_stream: Iterator[bytes],
            pipeline_id: str,
    ) -> Iterator[speech_to_text_pb2.TranscribeStreamRequest]:
        for i, chunk in enumerate(audio_stream):
            yield speech_to_text_pb2.TranscribeStreamRequest(
                audio_chunk=chunk,
                s2t_pipeline_id=pipeline_id,
                spelling_correction=False,
                ctc_decoding=speech_to_text_pb2.CTCDecoding.GREEDY,
                end_of_stream=False,
            )
        # End the stream
        yield speech_to_text_pb2.TranscribeStreamRequest(
            audio_chunk=b'',
            s2t_pipeline_id=pipeline_id,
            spelling_correction=False,
            ctc_decoding=speech_to_text_pb2.CTCDecoding.GREEDY,
            end_of_stream=True,
        )                                                 

    def create_transciption_requests_from_stream(self, pipeline_id: str) -> Iterator[speech_to_text_pb2.TranscribeStreamRequest]:         
        while True:                                
            chunk: bytes = self.stream.read(CHUNK)                                                    
            logging.info(f"Sending {len(chunk)} bytes")                                                        
            yield speech_to_text_pb2.TranscribeStreamRequest(
                audio_chunk=chunk,                                                                    
                s2t_pipeline_id=pipeline_id,                                                          
                spelling_correction=False,                                                            
                ctc_decoding=speech_to_text_pb2.CTCDecoding.BEAM_SEARCH_WITH_LM,                               
                end_of_stream=False,                                                                  
            )                                      
            time.sleep(0.1)                      
            
        
                                      
def live_speech_helper(                                                                                        
    pipeline_id: str,                                                                            
    session_id: str,                                                                             
    save_to_disk: bool,                                                                          
    streamer_name: str,                                                                          
    initial_intent_display_name: Optional[str] = None,                                           
) -> None:                                                                                       
    session_id = session_id if session_id else str(uuid.uuid4())                                                                                                                              
    client: CsiClient = CsiClient(config=config, use_secure_channel=config.grpc_cert is not None)      
    conversations_service: Conversations = client.services.conversations
       
    
    if "pyaudio" in streamer_name:                                                               
        # Get audio stream (iterator of audio chunks):                                           
        streamer: StreamerInInterface = PyAudioStreamerIn()
        
        transcription_reqeusts = streamer.create_transciption_requests_from_stream(pipeline_id)
        
        # Transcribe the stream and get back responses
        response_gen: Iterator[speech_to_text_pb2.TranscribeStreamResponse] = \
            s2t_client.services.speech_to_text.transcribe_stream(transcription_reqeusts)
              
        # Print transcribed utterances
        for i, response_chunk in enumerate(response_gen):
            print(response_chunk.transcription)
            
def live_speech(pipeline_id,session_id = str(uuid.uuid4()),save_to_disk = False,streamer_name = "pyaudio"):
    live_speech_helper(pipeline_id,session_id,save_to_disk,streamer_name)

In [77]:
# live_speech('default_german')

## t2s client

In [61]:
import io
from typing import Any

import IPython.display as ipd
import soundfile as sf


from ondewo.t2s.client.services.text_to_speech import Text2Speech
from ondewo.t2s.text_to_speech_pb2 import ListT2sPipelinesRequest, Text2SpeechConfig

In [62]:
t2s_client = T2sClient(config=config)

#### Example t2s

In [63]:
def find_pipeline_by_language_t2s(language):
    
    pipelines_ids = []
    
    pipelines = t2s_client.services.text_to_speech.list_t2s_pipelines(
        request=ListT2sPipelinesRequest(languages=[language])
    ).pipelines
    
    
    for pipeline in pipelines:
        pipelines_ids.append(pipeline.id)
    
    return pipelines_ids

In [64]:
t2s_service: Text2Speech = t2s_client.services.text_to_speech
def synthesis_request(t2s_service: Text2Speech, **req_kwargs: Any):
    request = t2s.SynthesizeRequest(**req_kwargs)
    response = t2s_service.synthesize(request=request)

    print(
        f"Length of the generated audio is {response.audio_length} sec.",
        f"Generation time is {response.generation_time} sec.",
    )

    bio = io.BytesIO(response.audio)
    audio = sf.read(bio)
    return audio


In [65]:
import IPython.display as ipd


def say(text, length_scale = 0.9, t2s_pipeline_id = find_pipeline_by_language_t2s('de')[0]):
    audio = synthesis_request(
    t2s_service, text=text, t2s_pipeline_id=t2s_pipeline_id, length_scale=length_scale
    )
    
    sound = []
    sr = audio[1] # sample rate
    sound = ipd.Audio(audio[0], rate=sr, autoplay=False) # load a NumPy array

    return sound

In [66]:
t2s_pipelines = t2s_client.services.text_to_speech.list_t2s_pipelines(request=t2s.ListT2sPipelinesRequest())

print(f"Text to speech pipelines: {[pipeline.id for pipeline in t2s_pipelines.pipelines]}")

print(f"Text to speech domains: {set([pipeline.description.domain for pipeline in t2s_pipelines.pipelines])}")
print(f"Text to speech languages: {set([pipeline.description.language for pipeline in t2s_pipelines.pipelines])}")

Text to speech pipelines: ['thorsten', 'default_german', 'thomas', 'linda', 'sandra', 'kerstin', 'thomas_2']
Text to speech domains: {'general'}
Text to speech languages: {'de'}


In [67]:
voice_num = 3
language = "de"
length_scale = 1
say("Gruess dich Ich bin Gabriel", length_scale, find_pipeline_by_language_t2s(language)[voice_num])

Length of the generated audio is 2.1014058589935303 sec. Generation time is 0.09093021601438522 sec.


In [68]:
from ondewo.nlu.session_pb2 import (
    DetectIntentRequest,
    DetectIntentResponse,
    QueryInput,
    QueryParameters,
    TextInput,
)


In [69]:
nlu_client = NluClient(config=nlu_config)

In [70]:
def create_session_nlu(project_id = "924e70ca-c786-494c-bc48-4d0999da74db"):
    project_parent = f'projects/{project_id}/agent'
    session_id = str(uuid.uuid4())
    session = f'{project_parent}/sessions/{session_id}'
    
    return session

In [71]:
project_id =  "924e70ca-c786-494c-bc48-4d0999da74db"
session = create_session_nlu(project_id)

In [72]:
nlu_request: DetectIntentRequest = DetectIntentRequest(
    session=session,
    query_input=QueryInput(
        text=TextInput(
            text='Hallo, Ich habe hunger',
            language_code='de',
        )
    ),
)
# detect intent (= get the NLU response)
nlu_response: DetectIntentResponse = nlu_client.services.sessions.detect_intent(
    request=nlu_request,
)

In [73]:
nlu_response.query_result.fulfillment_messages[0].text.text[0]

'Ich habe dich leider nicht verstanden. Kannst du das bitte wiederholen.'

In [146]:
import re

def get_agent_by_id(search_id):
    agent_exists(search_id,0)
    
def get_agent_by_name(search_name):
    agent_exists(search_name,1)
    
def agent_exists(search_agent, search_category):
    agents_collect = []
    for i in range(0, 100, 10):
        agents = nlu_client.services.agents.list_agents(request=agent.ListAgentsRequest(page_token=f'current_index-{i}'))
        if agents.agents_with_owners:
            agents_collect += [(agent.agent.parent,agent.agent.display_name) for agent in agents.agents_with_owners]
    found = 0
    for a in agents_collect:
        if re.findall(f"(?i){search_agent}", a[search_category]):
            print(found)
            print(a[1])
            print(a[0].split('/')[1]) 
            found += 1 
    
    if not found :
        print("Nothing found")
    

In [147]:
search_agent_name = "KEb"
get_agent_by_name(search_agent_name)

0
KEB001 Sortierroboter
3440ff23-b612-4231-a29b-e2e692c06eda
1
KEB001 Sortierroboter
cbb4b540-2f5e-4b56-a137-64d9261d35e2
2
KEB001 Sortierroboter ChristophEtzlinger
35a31183-e8b7-4760-85c6-f41067e8463d
3
KEB001 Sortierroboter Josef Steiner
31a15059-46a2-4fb1-a86e-7f73bb752db0
4
KEB001 Sortierroboter Martin Baresch
0e1eafd1-3d93-4c10-8940-575cae01352d
5
KEB001 Sortierroboter MatthiasReiter
a5b35557-5b95-497c-8ab6-fdfbc66742b9


In [150]:
search_agent_by_id = "924e70ca-c786-494c-bc48-4d0999da74db"
get_agent_by_id(search_agent_by_id)

0
SHOWCASE Pizza Bot
924e70ca-c786-494c-bc48-4d0999da74db
