Skip to content
This repository has been archived by the owner on May 6, 2022. It is now read-only.

Commit

Permalink
feat: wakeword-only profile and empty ASR
Browse files Browse the repository at this point in the history
This adds a no-op ASR and new pipeline profile for a
wakeword-only use case. Upon successful wakeword
recognition, the pipeline remains active for a single
frame and is then deactivated.
  • Loading branch information
space-pope committed Jul 22, 2021
1 parent f9f7c13 commit 8afb3c6
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 0 deletions.
67 changes: 67 additions & 0 deletions src/main/java/io/spokestack/spokestack/asr/EmptyRecognizer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package io.spokestack.spokestack.asr;

import io.spokestack.spokestack.SpeechConfig;
import io.spokestack.spokestack.SpeechContext;
import io.spokestack.spokestack.SpeechProcessor;

import java.nio.ByteBuffer;

/**
* Empty speech recognizer
*
* <p>
* This recognizer is designed for use in profiles that want to skip ASR
* entirely, dispatching only activate and deactivate events from a wakeword
* recognizer.
* </p>
*
* <p>
* Once the wakeword is recognized, this stage allows the pipeline to remain
* active for a single frame then deactivates it.
* </p>
*/
public class EmptyRecognizer implements SpeechProcessor {

private boolean active = false;

/**
* initializes a new recognizer instance.
*
* @param speechConfig Spokestack speech configuration
*/
public EmptyRecognizer(SpeechConfig speechConfig) {
// no configuration necessary
}

@Override
public void process(SpeechContext context, ByteBuffer frame)
throws Exception {
// all we want to do is return control to the wakeword component, so
// simply deactivate the context. this allows multiple wakeword
// utterances to be recognized in quick succession.
// we want to leave the context active for one frame, though, so the
// wakeword trigger has a chance to recognize the activity and reset
// itself when we deactivate on the following frame; otherwise, we'll
// get repeated activations as the wakeword trigger fires for multiple
// frames in a row.
if (this.active) {
context.setActive(false);
}
this.active = context.isActive();
}

@Override
public void reset() throws Exception {
}

@Override
public void close() throws Exception {
}

/**
* determines whether the recognizer is currently active. used for testing.
*/
boolean isActive() {
return this.active;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package io.spokestack.spokestack.profile;

import io.spokestack.spokestack.PipelineProfile;
import io.spokestack.spokestack.SpeechPipeline;

import java.util.ArrayList;
import java.util.List;

/**
* A speech pipeline profile that uses TensorFlow Lite for wakeword detection
* and no ASR.
*
* <p>
* Wakeword detection requires configuration to locate the models used for
* classification; these properties must be set elsewhere:
* </p>
*
* <ul>
* <li>
* <b>wake-filter-path</b> (string, required): file system path to the
* "filter" Tensorflow-Lite model, which is used to calculate a mel
* spectrogram frame from the linear STFT; its inputs should be shaped
* [fft-width], and its outputs [mel-width]
* </li>
* <li>
* <b>wake-encode-path</b> (string, required): file system path to the
* "encode" Tensorflow-Lite model, which is used to perform each
* autoregressive step over the mel frames; its inputs should be shaped
* [mel-length, mel-width], and its outputs [encode-width], with an
* additional state input/output shaped [state-width]
* </li>
* <li>
* <b>wake-detect-path</b> (string, required): file system path to the
* "detect" Tensorflow-Lite model; its inputs shoudld be shaped
* [encode-length, encode-width], and its outputs [1]
* </li>
* </ul>
*
* @see io.spokestack.spokestack.asr.EmptyRecognizer
* @see io.spokestack.spokestack.wakeword.WakewordTrigger
*/
public class TFWakewordEmptyASR implements PipelineProfile {
@Override
public SpeechPipeline.Builder apply(SpeechPipeline.Builder builder) {
List<String> stages = new ArrayList<>();
stages.add("io.spokestack.spokestack.webrtc.AutomaticGainControl");
stages.add("io.spokestack.spokestack.webrtc.AcousticNoiseSuppressor");
stages.add("io.spokestack.spokestack.webrtc.VoiceActivityDetector");
stages.add("io.spokestack.spokestack.wakeword.WakewordTrigger");
stages.add("io.spokestack.spokestack.asr.EmptyRecognizer");

return builder
.setInputClass(
"io.spokestack.spokestack.android.PreASRMicrophoneInput")
.setProperty("ans-policy", "aggressive")
.setProperty("vad-mode", "very-aggressive")
.setProperty("vad-fall-delay", 800)
.setProperty("wake-threshold", 0.9)
.setProperty("pre-emphasis", 0.97)
.setStageClasses(stages);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ public class SpeechPipelineTest implements OnSpeechEventListener {
io.spokestack.spokestack.profile.PushToTalkSpokestackASR.class,
io.spokestack.spokestack.profile.TFWakewordAndroidASR.class,
io.spokestack.spokestack.profile.TFWakewordAzureASR.class,
io.spokestack.spokestack.profile.TFWakewordEmptyASR.class,
io.spokestack.spokestack.profile.TFWakewordGoogleASR.class,
io.spokestack.spokestack.profile.TFWakewordKeywordASR.class,
io.spokestack.spokestack.profile.TFWakewordSpokestackASR.class,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package io.spokestack.spokestack.asr;

import io.spokestack.spokestack.SpeechConfig;
import io.spokestack.spokestack.SpeechContext;
import org.junit.Test;

import static org.junit.Assert.*;

public class EmptyRecognizerTest {

@Test
public void testProcess() throws Exception {
SpeechConfig config = new SpeechConfig();
EmptyRecognizer recognizer = new EmptyRecognizer(config);
assertFalse(recognizer.isActive());
SpeechContext context = new SpeechContext(config);
// context is inactive, so the stage does nothing
recognizer.process(context, null);
assertFalse(recognizer.isActive());
// the first process call after activation sets the internal flag
// but doesn't deactivate the context
context.setActive(true);
recognizer.process(context, null);
assertTrue(recognizer.isActive());
assertTrue(context.isActive());
// another process call deactivates both the context and
// the internal flag
recognizer.process(context, null);
assertFalse(context.isActive());
assertFalse(recognizer.isActive());
}
}

0 comments on commit 8afb3c6

Please sign in to comment.