## S2T

In [None]:
import wave
from google.protobuf.empty_pb2 import Empty
from ondewo.s2t import speech_to_text_pb2
from ondewo.s2t.speech_to_text_pb2 import ListS2tPipelinesRequest

from typing import Iterator
from ondewo.s2t import speech_to_text_pb2
from typing import Iterator
                             

In [None]:
def find_pipeline_by_language_s2t(language):
    
    pipelines_ids = []
    
    pipelines = s2t_client.services.speech_to_text.list_s2t_pipelines(
        request=ListS2tPipelinesRequest(languages=[language])).pipeline_configs
    
    for pipeline in pipelines:
        pipelines_ids.append(pipeline)
    
    return pipelines

#### Example 1 

In [None]:
def s2t_ex_1(AUDIO_FILE, language):
    
    # List all speech-2-text pipelines (model setups) present on the server
    # We are going to pick the first pipeline (model setup)
    pipelines = s2t_client.services.speech_to_text.list_s2t_pipelines(request = Empty()).pipeline_configs
    pipeline = find_pipeline_by_language_s2t(language)[0]


    # Read file which we want to transcribe
    with wave.open(AUDIO_FILE) as w:
        audio: bytes = w.readframes(w.getnframes())

    # Create transcription request
    request = speech_to_text_pb2.TranscribeFileRequest(
        s2t_pipeline_id=pipeline.id,
        audio_file=audio,
        ctc_decoding=speech_to_text_pb2.CTCDecoding.BEAM_SEARCH_WITH_LM
    )
    # Send transcription request and get response
    transcribe_response = s2t_client.services.speech_to_text.transcribe_file(request=request)
    
    return transcribe_response

#### Example 2

In [None]:
# We are going to make to send the file chunk-by-chunk to simulate a stream
def get_streaming_audio(audio_path: str, CHUNK_SIZE) -> Iterator[bytes]:
    with wave.open(audio_path) as w:
        chunk: bytes = w.readframes(CHUNK_SIZE)
        while chunk != b"":
            yield chunk
            chunk = w.readframes(CHUNK_SIZE)


def create_streaming_request(
        audio_stream: Iterator[bytes],
        pipeline_id: str,
) -> Iterator[speech_to_text_pb2.TranscribeStreamRequest]:
    for i, chunk in enumerate(audio_stream):
        yield speech_to_text_pb2.TranscribeStreamRequest(
            audio_chunk=chunk,
            s2t_pipeline_id=pipeline_id,
            spelling_correction=False,
            ctc_decoding=speech_to_text_pb2.CTCDecoding.GREEDY,
            end_of_stream=False,
        )
    # End the stream
    
    
def s2t_ex_2(AUDIO_FILE, language, CHUNK_SIZE = 8000):
    # List all speech-2-text pipelines (model setups) present on the server
    # We are going to pick the first pipeline (model setup)
    pipelines = s2t_client.services.speech_to_text.list_s2t_pipelines(request = Empty()).pipeline_configs
    pipeline = pipelines[5]
    pipeline_id = find_pipeline_by_language_s2t(language)[0].id

    # Get audio stream (iterator of audio chunks)
    audio_stream: Iterator[bytes] = get_streaming_audio(AUDIO_FILE, CHUNK_SIZE)

    # Create streaming request
    streaming_request: Iterator[speech_to_text_pb2.TranscribeStreamRequest] = \
        create_streaming_request(audio_stream=audio_stream, pipeline_id=pipeline_id)

    # Transcribe the stream and get back responses
    response_gen: Iterator[speech_to_text_pb2.TranscribeStreamResponse] = \
        s2t_client.services.speech_to_text.transcribe_stream(streaming_request)
        
    return response_gen

In [None]:
import logging                                                           
import queue                                                             
import time                                                              
import uuid                                                              
from abc import ABCMeta, abstractmethod                                  
from typing import Iterator, Optional                                                                       
from ondewo.logging.logger import logger_console                         
from ondewo.nlu.session_pb2 import (                                     
    InputAudioConfig,                                                    
    QueryInput,                                                          
    StreamingDetectIntentRequest,                                        
)       

#### Example 3

In [None]:
CHUNK: int = 8000                                                        
MONO: int = 1                                                            
RATE: int = 16000                                                        
PLAYING: bool = False                                                    
WAV_HEADER_LENGTH: int = 46                                              
SAMPLEWIDTH: int = 2  

class StreamerInInterface(metaclass=ABCMeta):                            
    @property                                                            
    @abstractmethod                                                      
    def mute(self) -> bool:                                              
        pass                                                             
                                                                         
    @mute.setter                                                         
    def mute(self, value: bool) -> None:                                 
        pass                                                             
                                                                         
    @abstractmethod
    def create_s2t_request(
        self,
        audio_stream: Iterator[bytes],
        pipeline_id: str,
    ) -> Iterator[speech_to_text_pb2.TranscribeStreamRequest]:
        pass
    
    @abstractmethod                                                      
    def close(self) -> None:                                             
        pass                                                             

class PyAudioStreamerIn(StreamerInInterface):                                                         
    def __init__(self) -> None:                                                                       
        import pyaudio                             

        self.CHUNK: int = CHUNK                                                                       
        self.pyaudio_object: pyaudio.PyAudio = pyaudio.PyAudio()                                               
        self.stream: pyaudio.Stream = self.pyaudio_object.open(                                                
            channels=1,                            
            format=pyaudio.paInt16,                                                                   
            rate=16000,                            
            input=True,                            
            frames_per_buffer=self.CHUNK,                                                             
        )                                          

    @property                                      
    def mute(self) -> bool:                                                                           
        return PLAYING                             

    @mute.setter                                   
    def mute(self, value: bool) -> None:                                                              
        global PLAYING                             
        PLAYING = value                            

    def close(self) -> None:                                                                          
        self.stream.close()                                                                           
        self.pyaudio_object.terminate()                                                               

    def create_s2t_request(
            self,
            audio_stream: Iterator[bytes],
            pipeline_id: str,
    ) -> Iterator[speech_to_text_pb2.TranscribeStreamRequest]:
        for i, chunk in enumerate(audio_stream):
            yield speech_to_text_pb2.TranscribeStreamRequest(
                audio_chunk=chunk,
                s2t_pipeline_id=pipeline_id,
                spelling_correction=False,
                ctc_decoding=speech_to_text_pb2.CTCDecoding.GREEDY,
                end_of_stream=False,
            )
        # End the stream
        yield speech_to_text_pb2.TranscribeStreamRequest(
            audio_chunk=b'',
            s2t_pipeline_id=pipeline_id,
            spelling_correction=False,
            ctc_decoding=speech_to_text_pb2.CTCDecoding.GREEDY,
            end_of_stream=True,
        )                                                 

    def create_transciption_requests_from_stream(self, pipeline_id: str) -> Iterator[speech_to_text_pb2.TranscribeStreamRequest]:         
        while True:                                
            chunk: bytes = self.stream.read(CHUNK)                                                    
            logging.info(f"Sending {len(chunk)} bytes")                                                        
            yield speech_to_text_pb2.TranscribeStreamRequest(
                audio_chunk=chunk,                                                                    
                s2t_pipeline_id=pipeline_id,                                                          
                spelling_correction=False,                                                            
                ctc_decoding=speech_to_text_pb2.CTCDecoding.BEAM_SEARCH_WITH_LM,                               
                end_of_stream=False,                                                                  
            )                                      
            time.sleep(0.1)                      
            
        
                                      
def live_speech_helper(                                                                                        
    pipeline_id: str,                                                                            
    session_id: str,                                                                             
    save_to_disk: bool,                                                                          
    streamer_name: str,                                                                          
    initial_intent_display_name: Optional[str] = None,                                           
) -> None:                                                                                       
    session_id = session_id if session_id else str(uuid.uuid4())                                                                                                                              
    client: CsiClient = CsiClient(config=config, use_secure_channel=config.grpc_cert is not None)      
    conversations_service: Conversations = client.services.conversations
       
    
    if "pyaudio" in streamer_name:                                                               
        # Get audio stream (iterator of audio chunks):                                           
        streamer: StreamerInInterface = PyAudioStreamerIn()
        
        transcription_reqeusts = streamer.create_transciption_requests_from_stream(pipeline_id)
        
        # Transcribe the stream and get back responses
        response_gen: Iterator[speech_to_text_pb2.TranscribeStreamResponse] = \
            s2t_client.services.speech_to_text.transcribe_stream(transcription_reqeusts)
              
        # Print transcribed utterances
        for i, response_chunk in enumerate(response_gen):
            print(response_chunk.transcription)

## T2S

In [None]:
import io
from typing import Any

import IPython.display as ipd
import soundfile as sf


from ondewo.t2s.client.services.text_to_speech import Text2Speech
from ondewo.t2s.text_to_speech_pb2 import ListT2sPipelinesRequest, Text2SpeechConfig
import IPython.display as ipd

In [None]:
def find_pipeline_by_language_t2s(language,t2s_client):
    
    pipelines_ids = []
    
    pipelines = t2s_client.services.text_to_speech.list_t2s_pipelines(
        request=ListT2sPipelinesRequest(languages=[language])
    ).pipelines
    
    
    for pipeline in pipelines:
        pipelines_ids.append(pipeline.id)
    
    return pipelines_ids

def synthesis_request(t2s_service: Text2Speech, **req_kwargs: Any):
    request = t2s.SynthesizeRequest(**req_kwargs)
    response = t2s_service.synthesize(request=request)

    print(
        f"Length of the generated audio is {response.audio_length} sec.",
        f"Generation time is {response.generation_time} sec.",
    )

    bio = io.BytesIO(response.audio)
    audio = sf.read(bio)
    return audio

def say(t2s_client,text, length_scale = 0.9, language = 'de', voice_num = 0):
    t2s_service: Text2Speech = t2s_client.services.text_to_speech

    t2s_pipeline_id = find_pipeline_by_language_t2s(language,t2s_client)[voice_num]
    audio = synthesis_request(
    t2s_service, text=text, t2s_pipeline_id=t2s_pipeline_id, length_scale=length_scale
    )
    
    sound = []
    sr = audio[1] # sample rate
    sound = ipd.Audio(audio[0], rate=sr, autoplay=False) # load a NumPy array

    return sound

## NLU

In [None]:
from ondewo.nlu.session_pb2 import (
    DetectIntentRequest,
    DetectIntentResponse,
    QueryInput,
    QueryParameters,
    TextInput,
)

In [1]:
def create_session_nlu(project_id, session_id = None):
    session_id = session_id if session_id else str(uuid.uuid4())
    project_parent = f'projects/{project_id}/agent'
    return f'{project_parent}/sessions/{session_id}'


In [7]:
def test_nlu(session, text="Hallo, Ich habe hunger"):
    nlu_response = test_nlu_helper(session,text)
    for message in nlu_response.query_result.fulfillment_messages:
        print('bot: ', end = '')
        print(message.text.text[0])


In [None]:
def test_nlu_helper(session, text):
    nlu_request: DetectIntentRequest = DetectIntentRequest(
        session=session,
        query_input=QueryInput(
            text=TextInput(
                text= text,
                language_code='de',
            )
        ),
    )
    # detect intent (= get the NLU response)
    nlu_response: DetectIntentResponse = nlu_client.services.sessions.detect_intent(
        request=nlu_request,
    )
    return nlu_response

In [None]:
import re

def get_agent_by_id(search_id):
    agent_exists(search_id,0)
    
def get_agent_by_name(search_name):
    agent_exists(search_name,1)
    
def agent_exists(search_agent, search_category):
    agents_collect = []
    for i in range(0, 100, 10):
        agents = nlu_client.services.agents.list_agents(request=agent.ListAgentsRequest(page_token=f'current_index-{i}'))
        if agents.agents_with_owners:
            agents_collect += [(agent.agent.parent,agent.agent.display_name) for agent in agents.agents_with_owners]
    found = 0
    for a in agents_collect:
        if re.findall(f"(?i){search_agent}", a[search_category]):
            print(found)
            print(a[1])
            print(a[0].split('/')[1]) 
            found += 1 
    
    if not found :
        print("Nothing found")
    

In [8]:
def nlu_convo():
    while True:
        text = input('you: ')
        if text == 'end convo':
            break
        test_nlu(session , text)