# Step 2: Speech to Text with LUIS

**Content**

* Speech to Text with LUIS
    * Summarizing the classification results
* Saving transcriptions on json files


In [50]:
! pip install azure-cognitiveservices-speech



You should consider upgrading via the 'c:\users\blueshift\appdata\local\programs\python\python38\python.exe -m pip install --upgrade pip' command.


In [52]:
from collections import Counter
import time
import ast
import json, re
from os import listdir
import sys
import os.path 


In [53]:
import azure.cognitiveservices.speech as speechsdk
import os, yaml, time

Load Configs

In [55]:
config_file = os.path.join("config","config.yaml")
with open(config_file, 'r') as ymlfile:
    config = yaml.load(ymlfile, Loader=yaml.FullLoader)

##### Configurações do Speech:
speech_key=config['speech']['speech_key']
speech_service_region = config['speech']['speech_region']
language=config['speech']['speech_language']

##### Configurações do LUIS
luis_app_id=config['luis_app']['app_id']


## Speech to Text com LUIS

Referências:
* https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/samples/python/console/intent_sample.py

In [56]:
def recognize_intent_from_speech_continuous(speech_key, speech_service_region,luis_app_id, language, file_name):
    """Método para realizar o Speech to Text integrado ao LUIS, classificando cada fala do audio com as intenções"""
    result_json = {}
    result_json['result'] = []

    # <IntentContinuousRecognitionWithFile>
    speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=speech_service_region, speech_recognition_language=language)
    audio_config = speechsdk.audio.AudioConfig(filename=file_name)

    # Set up the intent recognizer
    intent_recognizer = speechsdk.intent.IntentRecognizer(speech_config=speech_config, audio_config=audio_config )

    # set up the intents that are to be recognized. These can be a mix of simple phrases and
    # intents specified through a LanguageUnderstanding Model.
    model = speechsdk.intent.LanguageUnderstandingModel(app_id=luis_app_id)
    intent_recognizer.add_all_intents(model)

    # Connect callback functions to the signals the intent recognizer fires.
    done = False

    def stop_cb(evt):
        """callback that signals to stop continuous recognition upon receiving an event `evt`"""
        print('CLOSING on {}'.format(evt))
        if ('SessionEventArgs' in str(evt)):
            print ("ok")
            nonlocal done
            done = True
    
    intent_recognizer.recognized.connect(lambda evt: print(
        "RECOGNIZED: {}\n\tText: {} (Reason: {})\n\tIntent Id: {}\n\tIntent JSON: {}".format(
            evt, evt.result.text, evt.result.reason, evt.result.intent_id, evt.result.intent_json, result_json['result'].append(json.loads(evt.result.intent_json)))))
    
    # cancellation event
    intent_recognizer.canceled.connect(lambda evt: print("CANCELED: {} ({})".format(evt.cancellation_details, evt.reason)))

    # stop continuous recognition on session stopped, end of speech or canceled events
    intent_recognizer.session_stopped.connect(stop_cb)
    intent_recognizer.speech_end_detected.connect(stop_cb)
    intent_recognizer.canceled.connect(stop_cb)

    # And finally run the intent recognizer. The output of the callbacks should be printed to the console.
    intent_recognizer.start_continuous_recognition()
    while not done:
        time.sleep(.5)

    intent_recognizer.stop_continuous_recognition()
    
    # </IntentContinuousRecognitionWithFile>
    return result_json

In [57]:
def list_files(dir):
    '''Listar arquivos em um diretório específico no SO'''
    return [f for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]

In [58]:
def realizar_transcricao_plus_intencao( container_name,files_name=None):
    ''' Realizar a transcrição + classificação das inteções de todos os audios 
        ou somente de algum específico'''
    results = {}
    if (files_name is None):
        files_name = list_files(container_name)
    for f in files_name:
        print("Processando arquivo: {}.".format(f))
        results[f] = recognize_intent_from_speech_continuous(speech_key, speech_service_region,luis_app_id, language,os.path.join(container_name, f))
    return results

In [42]:
######## Teste para 1 arquivo ######## 
#results_json = realizar_transcricao_plus_intencao("audios", files_name=['id_5.wav'])

In [61]:
######## Teste para todos os arquivos ######## 
results_json = realizar_transcricao_plus_intencao("audios")

Processando arquivo: id_1.wav.
CLOSING on RecognitionEventArgs(session_id=17ecdb7341c44ae194afe060113c3a97)
RECOGNIZED: IntentRecognitionEventArgs(session_id=17ecdb7341c44ae194afe060113c3a97, result=IntentRecognitionResult(result_id=2f5cf3d37331444daa0270d0f75e8451, text="São Bernardo do Campo.", intent_id=None, reason=ResultReason.RecognizedIntent))
	Text: São Bernardo do Campo. (Reason: ResultReason.RecognizedIntent)
	Intent Id: None
	Intent JSON: {
  "query": "são bernardo do campo",
  "topScoringIntent": {
    "intent": "None",
    "score": 0.7145854
  },
  "entities": []
}
CLOSING on RecognitionEventArgs(session_id=a7c37df11fdc4cc7bf5d967b63075caa)
CLOSING on RecognitionEventArgs(session_id=17ecdb7341c44ae194afe060113c3a97)
RECOGNIZED: IntentRecognitionEventArgs(session_id=17ecdb7341c44ae194afe060113c3a97, result=IntentRecognitionResult(result_id=0c629abbad2b4d6ba0dd467ac5d193ff, text="Muito obrigada pelas informações. Você poderia anotar agora? O número do protocolo?", intent_id=

### Summarizing the classification results
	* Roubo: id_5
	* Furto:  id_1
	* Colisão: OK id_2
	* Quebra de retrovisores: OK id_3
    * Indefinido (enchente): OK id_4

In [62]:
for i in results_json.keys():
    print(i)
    print (dict(Counter(tok['topScoringIntent']['intent'] for tok in results_json[i]['result'])))

id_1.wav
{'None': 23, 'classe_furto': 1, 'dizer_protocolo': 1}
id_2.wav
{'None': 22, 'classe_colisao': 1, 'dizer_protocolo': 1}
id_3.wav
{'None': 22, 'classe_quebra_retrovisores': 1, 'dizer_protocolo': 1}
id_4.wav
{'None': 23, 'dizer_protocolo': 1}
id_5.wav
{'None': 22, 'classe_roubo': 1, 'dizer_protocolo': 1}


## Save transcriptions locally

In [63]:
def save_transcriptions(dir, results_json):
    try:
        os.makedirs(dir)
    except FileExistsError:
        print ('Directory already existis: ' + dir)
        pass
    else:
        print ('Directory successfully created: ' + dir)
    for f in results_json.keys():
        file_name = re.findall(r"(.*).wav", f)[0]
        with open(os.path.join(dir, file_name + ".json"), 'w', encoding='utf-8') as j:
            json.dump(results_json[f], j, indent=4, ensure_ascii=False)
            print("Saving file {}.json".format(file_name))

In [64]:
save_transcriptions("transcricoes", results_json)

Directory successfully created: transcricoes
Saving file id_1.json
Saving file id_2.json
Saving file id_3.json
Saving file id_4.json
Saving file id_5.json
