Skip to content

Commit

Permalink
Add VAD platform capability through webrtcvad
Browse files Browse the repository at this point in the history
  • Loading branch information
ad31c0 committed May 10, 2021
1 parent 6e83261 commit c79cf9f
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 5 deletions.
4 changes: 1 addition & 3 deletions config.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,4 @@ module.exports.ENABLE_DB_ENCRYPTION = false;
module.exports.SEMPRE_URL = process.env.THINGENGINE_NLP_URL || 'https://nlp-staging.almond.stanford.edu';
module.exports.THINGPEDIA_URL = process.env.THINGPEDIA_URL || 'https://dev.almond.stanford.edu/thingpedia';
module.exports.CLOUD_SYNC_URL = process.env.THINGENGINE_CLOUD_SYNC_URL || 'https://dev.almond.stanford.edu';
module.exports.MS_SPEECH_RECOGNITION_PRIMARY_KEY = 'de1f02817356494483ba502b2ce95f6f';
module.exports.MS_SPEECH_RECOGNITION_SECONDARY_KEY = '3dc6ce0b832940f0b0c984a1517c457e';

module.exports.NL_URL = process.env.THINGENGINE_NL_URL || 'https://nlp-staging.almond.stanford.edu';
2 changes: 1 addition & 1 deletion main.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ async function init(platform) {

if (platform.hasCapability('sound')) {
const speech = new Genie.SpeechHandler(conversation, platform, {
subscriptionKey: Config.MS_SPEECH_RECOGNITION_PRIMARY_KEY
nlUrl: Config.NL_URL
});

let play;
Expand Down
18 changes: 18 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
"optionalDependencies": {
"canberra": "^0.1.2",
"pulseaudio2": "^0.5.0",
"snowboy": "^1.3.1"
"snowboy": "^1.3.1",
"webrtcvad": "^1.0.1"
},
"devDependencies": {
"coveralls": "^3.1.0",
Expand Down
40 changes: 40 additions & 0 deletions service/platform/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,38 @@ class SoundEffectsApi {
}
}

let webrtcvad;
try {
webrtcvad = require('webrtcvad').default;
} catch(e) {
console.log("VAD not available");
webrtcvad = null;
}

class VAD {
constructor() {
this._instance = null;
}

setup(bitrate, level) {
if (this._instance)
this._instance = null;

if (webrtcvad) {
this._instance = new webrtcvad(bitrate, level);
// console.log("setup VAD bitrate", bitrate, "level", level);
return true;
}

return false;
}

process(chunk) {
if (!this._instance)
return false;
return this._instance.process(chunk);
}
}

class ServerPlatform extends Tp.BasePlatform {
constructor() {
Expand All @@ -260,6 +292,7 @@ class ServerPlatform extends Tp.BasePlatform {
safeMkdirSync(this._cacheDir);

this._wakeWordDetector = null;
this._voiceDetector = null;
this._soundEffects = null;

this._sqliteKey = null;
Expand Down Expand Up @@ -317,6 +350,9 @@ class ServerPlatform extends Tp.BasePlatform {

if (canberra)
this._soundEffects = new SoundEffectsApi();

if (webrtcvad && VAD)
this._voiceDetector = new VAD();
} else {
this._pulse = null;
}
Expand Down Expand Up @@ -373,6 +409,8 @@ class ServerPlatform extends Tp.BasePlatform {
case 'wakeword-detector':
this._ensurePulseAudio();
return this._wakeWordDetector !== null;
case 'voice-detector':
return this._voiceDetector !== null;

case 'sound-effects':
this._ensurePulseAudio();
Expand Down Expand Up @@ -400,6 +438,8 @@ class ServerPlatform extends Tp.BasePlatform {
case 'wakeword-detector':
this._ensurePulseAudio();
return this._wakeWordDetector;
case 'voice-detector':
return this._voiceDetector;
case 'sound-effects':
this._ensurePulseAudio();
return this._soundEffects;
Expand Down

0 comments on commit c79cf9f

Please sign in to comment.