diff --git a/core/asr/ASRManager.py b/core/asr/ASRManager.py index 32d837ddc..62bd5d07f 100644 --- a/core/asr/ASRManager.py +++ b/core/asr/ASRManager.py @@ -100,8 +100,8 @@ def onStartListening(self, session: DialogSession): self.ThreadManager.newThread(name=f'streamdecode_{session.siteId}', target=self.decodeStream, args=[session]) - def onAsrIntermediateResult(self, result: str): - self.logDebug(result) + def onPartialTextCaptured(self, session: DialogSession, text: str, likelihood: float, seconds: float): + self.logDebug(f'Captured {text} with a likelihood of {likelihood}') def decodeStream(self, session: DialogSession): diff --git a/core/asr/model/ASR.py b/core/asr/model/ASR.py index 07e03f063..ceb13244f 100644 --- a/core/asr/model/ASR.py +++ b/core/asr/model/ASR.py @@ -49,12 +49,14 @@ def install(self) -> bool: try: for dep in self.DEPENDENCIES['system']: + self.logInfo(f'Installing "{dep}"') self.Commons.runRootSystemCommand(['apt-get', 'install', '-y', dep]) - self.logInfo(f'Installed "{dep}"') + self.logInfo(f'Installed!') for dep in self.DEPENDENCIES['pip']: + self.logInfo(f'Installing "{dep}"') self.Commons.runSystemCommand(['./venv/bin/pip', 'install', dep]) - self.logInfo(f'Installed "{dep}"') + self.logInfo(f'Installed!') return True except Exception as e: @@ -102,3 +104,7 @@ def checkLanguage(self) -> bool: def downloadLanguage(self) -> bool: return False + + + def partialTextCaptured(self, session: DialogSession, text: str, likelihood: float, seconds: float): + self.MqttManager.partialTextCaptured(session, text, likelihood, seconds) diff --git a/core/asr/model/GoogleASR.py b/core/asr/model/GoogleASR.py index c955638fd..b5217221d 100644 --- a/core/asr/model/GoogleASR.py +++ b/core/asr/model/GoogleASR.py @@ -1,12 +1,10 @@ -from pathlib import Path -from typing import Optional - import os +from pathlib import Path +from typing import Generator, Optional from core.asr.model.ASR import ASR from core.asr.model.ASRResult import ASRResult from core.asr.model.Recorder import Recorder -from core.commons import constants from core.dialog.model.DialogSession import DialogSession try: @@ -57,7 +55,7 @@ def decodeStream(self, session: DialogSession) -> Optional[ASRResult]: audioStream = stream.audioStream() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audioStream) responses = self._client.streaming_recognize(self._streamingConfig, requests) - result = self._checkResponses(responses) + result = self._checkResponses(session, responses) self.end(recorder, session) @@ -69,7 +67,7 @@ def decodeStream(self, session: DialogSession) -> Optional[ASRResult]: ) if result else None - def _checkResponses(self, responses) -> Optional[tuple]: + def _checkResponses(self, session: DialogSession, responses: Generator) -> Optional[tuple]: if responses is None: return None @@ -84,6 +82,6 @@ def _checkResponses(self, responses) -> Optional[tuple]: if result.is_final: return result.alternatives[0].transcript, result.alternatives[0].confidence else: - self.broadcast(method=constants.EVENT_ASR_INTERMEDIATE_RESULT, exceptions=[constants.DUMMY], propagateToSkills=True, result=result.alternatives[0].transcript) + self.partialTextCaptured(session=session, text=result.alternatives[0].transcript, likelihood=result.alternatives[0].confidence, seconds=0) return None diff --git a/core/asr/model/PocketSphinxASR.py b/core/asr/model/PocketSphinxASR.py index 5794c600e..27e14a90b 100644 --- a/core/asr/model/PocketSphinxASR.py +++ b/core/asr/model/PocketSphinxASR.py @@ -7,6 +7,7 @@ from core.asr.model.ASR import ASR from core.asr.model.ASRResult import ASRResult from core.asr.model.Recorder import Recorder +from core.commons import constants from core.dialog.model.DialogSession import DialogSession from core.util.Stopwatch import Stopwatch @@ -29,16 +30,21 @@ class PocketSphinxASR(ASR): } LANGUAGE_PACKS = { - 'fr': { - 'cmusphinx-fr-ptm-8khz-5.2.tar.gz': 'https://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/French/cmusphinx-fr-ptm-8khz-5.2.tar.gz/download', - 'cmudict-fr-fr.dict' : 'https://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/French/fr.dict/download', - 'fr-fr.lm.bin' : 'https://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/French/fr-small.lm.bin/download' - }, - 'de': { - 'cmusphinx-de-voxforge-5.2.tar.gz': 'https://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/German/cmusphinx-de-voxforge-5.2.tar.gz/download', - 'cmudict-de-de.dict' : 'https://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/German/cmusphinx-voxforge-de.dic/download', - 'de-de.lm.bin' : 'https://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/German/cmusphinx-voxforge-de.lm.bin/download', - } + 'en': [ + f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/en-us/en-us.tar', + f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/en-us/en-us.lm.bin', + f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/en-us/cmudict-en-us.dict' + ], + 'fr': [ + f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/fr-fr/fr-fr.tar', + f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/fr-fr/fr-fr.lm.bin', + f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/fr-fr/cmudict-fr-fr.dict' + ], + 'de': [ + f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/de-de/de-de.tar', + f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/de-de/de-de.lm.bin', + f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/de-de/cmudict-de-de.dict' + ] } @@ -71,25 +77,33 @@ def checkLanguage(self) -> bool: return True + def timeout(self): + super().timeout() + try: + self._decoder.end_utt() + except: + # If this fails we don't care, at least we tried to close the utterance + pass + + def downloadLanguage(self) -> bool: self.logInfo(f'Downloading language model for "{self.LanguageManager.activeLanguage}"') venv = Path(self.Commons.rootDir(), 'venv/lib/python3.7/site-packages/pocketsphinx/') - for filename, url in self.LANGUAGE_PACKS[self.LanguageManager.activeLanguage].items(): + for url in self.LANGUAGE_PACKS[self.LanguageManager.activeLanguage]: + filename = Path(url).name download = Path(venv, 'model', filename) - self.Commons.downloadFile(url=url, dest=str(download)) + self.Commons.downloadFile(url=f'{url}?raw=true', dest=str(download)) - if filename.endswith('.tar.gz'): + if download.suffix == '.tar': dest = Path(venv, 'model', self.LanguageManager.activeLanguageAndCountryCode.lower()) if dest.exists(): shutil.rmtree(dest) - dirName = filename.replace('.tar.gz', '') tar = tarfile.open(str(download)) - tar.extractall(str(download).replace('.tar.gz', '')) - Path(venv, 'model', dirName, dirName).rename(str(Path(venv, 'model', self.LanguageManager.activeLanguageAndCountryCode.lower()))) - shutil.rmtree(Path(venv, 'model', dirName)) + tar.extractall(str(dest)) + download.unlink() self.logInfo('Downloaded and installed') diff --git a/core/base/model/ProjectAliceObject.py b/core/base/model/ProjectAliceObject.py index a7816b751..2be9a8a1a 100644 --- a/core/base/model/ProjectAliceObject.py +++ b/core/base/model/ProjectAliceObject.py @@ -363,7 +363,7 @@ def onNluTrained(self, **kwargs): pass - def onAsrIntermediateResult(self, result: str): + def onPartialTextCaptured(self, session, text: str, likelihood: float, seconds: float): pass diff --git a/core/commons/constants.py b/core/commons/constants.py index 9fd711353..d90358460 100644 --- a/core/commons/constants.py +++ b/core/commons/constants.py @@ -25,6 +25,7 @@ TOPIC_SESSION_QUEUED = 'hermes/dialogueManager/sessionQueued' TOPIC_SESSION_ENDED = 'hermes/dialogueManager/sessionEnded' TOPIC_TEXT_CAPTURED = 'hermes/asr/textCaptured' +TOPIC_PARTIAL_TEXT_CAPTURED = 'hermes/asr/partialTextCaptured' TOPIC_INTENT_NOT_RECOGNIZED = 'hermes/dialogueManager/intentNotRecognized' TOPIC_INTENT_PARSED = 'hermes/nlu/intentParsed' TOPIC_TTS_SAY = 'hermes/tts/say' @@ -62,6 +63,7 @@ EVENT_START_LISTENING = 'startListening' EVENT_STOP_LISTENING = 'stopListening' EVENT_CAPTURED = 'captured' +EVENT_PARTIAL_TEXT_CAPTURED = 'partialTextCaptured' EVENT_NLU_QUERY = 'nluQuery' EVENT_INTENT_PARSED = 'intentParsed' EVENT_SESSION_ENDED = 'sessionEnded' @@ -81,4 +83,3 @@ EVENT_WAKEUP = 'wakeup' EVENT_SLEEP = 'sleep' EVENT_NLU_TRAINED = 'nluTrained' -EVENT_ASR_INTERMEDIATE_RESULT = 'asrIntermediateResult' diff --git a/core/server/MqttManager.py b/core/server/MqttManager.py index d309b320f..3b95fb5fc 100644 --- a/core/server/MqttManager.py +++ b/core/server/MqttManager.py @@ -1,11 +1,11 @@ import json +import random +import re import uuid from pathlib import Path import paho.mqtt.client as mqtt import paho.mqtt.publish as publish -import random -import re from core.ProjectAliceExceptions import AccessLevelTooLow from core.base.model.Intent import Intent @@ -70,6 +70,8 @@ def onStart(self): self._mqttClient.message_callback_add(constants.TOPIC_NLU_QUERY, self.onTopicNluQuery) + self._mqttClient.message_callback_add(constants.TOPIC_PARTIAL_TEXT_CAPTURED, self.onNluPartialCapture) + self.connect() @@ -442,6 +444,15 @@ def onSnipsIntentNotRecognized(self, client, data, msg: mqtt.MQTTMessage): self.broadcast(method=constants.EVENT_INTENT_NOT_RECOGNIZED, exceptions=[self.name], propagateToSkills=True, session=session) + def onNluPartialCapture(self, client, data, msg: mqtt.MQTTMessage): + sessionId = self.Commons.parseSessionId(msg) + session = self.DialogSessionManager.getSession(sessionId) + + if session: + payload = self.Commons.payload(msg) + self.broadcast(method=constants.EVENT_PARTIAL_TEXT_CAPTURED, exceptions=[self.name], propagateToSkills=True, session=session, text=payload['text'], likelihood=payload['likelihood'], seconds=payload['seconds']) + + def reviveSession(self, session: DialogSession, text: str): self.endSession(session.sessionId) self.DialogSessionManager.planSessionRevival(session) @@ -682,6 +693,16 @@ def endSession(self, sessionId): })) + def partialTextCaptured(self, session: DialogSession, text: str, likelihood: float, seconds: float): + self._mqttClient.publish(constants.TOPIC_PARTIAL_TEXT_CAPTURED, json.dumps({ + 'text': text, + 'likelihood': likelihood, + 'seconds': seconds, + 'siteId': session.siteId, + 'sessionId': session.sessionId + })) + + def playSound(self, soundFilename: str, location: Path = None, sessionId: str = '', siteId: str = constants.DEFAULT_SITE_ID, uid: str = '', suffix: str = '.wav'): if not sessionId: diff --git a/core/snips/SnipsAssistantManager.py b/core/snips/SnipsAssistantManager.py index 30fe21e94..d977fa48b 100644 --- a/core/snips/SnipsAssistantManager.py +++ b/core/snips/SnipsAssistantManager.py @@ -47,7 +47,7 @@ def checkConsistency(self) -> bool: for intent in data['intents']: existingIntents.add(intent['name']) - if not intent['slots']: + if 'slots' not in intent or not intent['slots']: continue for slot in intent['slots']: @@ -130,7 +130,7 @@ def train(self): entityVSType[f'{slot["type"]}_{slot["name"]}'] = f'{rand9}' else: - # Check if a slot with same type and name already exist and use it's id, else use the new random + # Check if a slot with same type and name already exists and use its id else use the new random slotId = entityVSType.get(f'{slot["type"]}_{slot["name"]}', rand9) intentSlot = { @@ -161,7 +161,7 @@ def train(self): except Exception as e: self.broadcast(method='snipsAssistantFailedTraining', exceptions=[self.name], propagateToSkills=True) if not self._assistantPath.exists(): - self.logFatal('Assistant failed training and no assistant existing, stopping here, sorry....') + self.logFatal(f'Assistant failed training and no assistant existing, stopping here, sorry.... What happened? {e}') def generateAssistant(self) -> dict: