This repository has been archived by the owner on May 6, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #36 from spokestack/jz-profiles
Feature: SpeechPipeline.Builder profiles
- Loading branch information
Showing
11 changed files
with
447 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
26 changes: 26 additions & 0 deletions
26
src/main/java/io/spokestack/spokestack/PipelineProfile.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
package io.spokestack.spokestack; | ||
|
||
/** | ||
* A pipeline profile encapsulates a series of configuration values tuned for | ||
* a specific task to make building a {@link SpeechPipeline} more convenient. | ||
* | ||
* <p> | ||
* Profiles are not authoritative; they act just like calling a series of | ||
* methods on a {@link SpeechPipeline.Builder}, and any configuration | ||
* properties they set can be overridden by subsequent calls. | ||
* </p> | ||
* | ||
* <p> | ||
* Pipeline profiles must not require arguments in their constructors. | ||
* </p> | ||
*/ | ||
public interface PipelineProfile { | ||
|
||
/** | ||
* Apply this profile to the pipeline builder. | ||
* | ||
* @param builder The builder to which the profile should be applied. | ||
* @return The modified pipeline builder. | ||
*/ | ||
SpeechPipeline.Builder apply(SpeechPipeline.Builder builder); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
36 changes: 36 additions & 0 deletions
36
src/main/java/io/spokestack/spokestack/profile/PushToTalkAndroidASR.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
package io.spokestack.spokestack.profile; | ||
|
||
import io.spokestack.spokestack.PipelineProfile; | ||
import io.spokestack.spokestack.SpeechPipeline; | ||
|
||
/** | ||
* A speech pipeline profile that relies on manual pipeline activation, | ||
* using Android's {@code SpeechRecognizer} API for ASR. | ||
* | ||
* <p> | ||
* Using Android's built-in ASR requires that an Android {@code Context} object | ||
* be attached to the speech pipeline using it. This must be done separately | ||
* from profile application, using | ||
* {@link SpeechPipeline.Builder#setAndroidContext(android.content.Context)}. | ||
* </p> | ||
* | ||
* @see io.spokestack.spokestack.android.AndroidSpeechRecognizer | ||
*/ | ||
public class PushToTalkAndroidASR implements PipelineProfile { | ||
@Override | ||
public SpeechPipeline.Builder apply(SpeechPipeline.Builder builder) { | ||
return builder | ||
.setInputClass( | ||
"io.spokestack.spokestack.android.MicrophoneInput") | ||
.addStageClass( | ||
"io.spokestack.spokestack.webrtc.AcousticNoiseSuppressor") | ||
.addStageClass( | ||
"io.spokestack.spokestack.webrtc.AutomaticGainControl") | ||
.setProperty("agc-compression-gain-db", 15) | ||
.addStageClass( | ||
"io.spokestack.spokestack.webrtc.VoiceActivityDetector") | ||
.addStageClass("io.spokestack.spokestack.ActivationTimeout") | ||
.addStageClass( | ||
"io.spokestack.spokestack.android.AndroidSpeechRecognizer"); | ||
} | ||
} |
44 changes: 44 additions & 0 deletions
44
src/main/java/io/spokestack/spokestack/profile/PushToTalkGoogleASR.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
package io.spokestack.spokestack.profile; | ||
|
||
import io.spokestack.spokestack.PipelineProfile; | ||
import io.spokestack.spokestack.SpeechPipeline; | ||
|
||
/** | ||
* A speech pipeline profile that relies on manual pipeline activation, | ||
* using Google Speech for ASR. | ||
* | ||
* <p> | ||
* Google Speech requires extra configuration, which must be added to the | ||
* pipeline build process separately from this profile: | ||
* </p> | ||
* | ||
* <ul> | ||
* <li> | ||
* <b>google-credentials</b> (string): json-stringified google service | ||
* account credentials, used to authenticate with the speech API | ||
* </li> | ||
* <li> | ||
* <b>locale</b> (string): language code for speech recognition | ||
* </li> | ||
* </ul> | ||
* | ||
* @see io.spokestack.spokestack.google.GoogleSpeechRecognizer | ||
*/ | ||
public class PushToTalkGoogleASR implements PipelineProfile { | ||
@Override | ||
public SpeechPipeline.Builder apply(SpeechPipeline.Builder builder) { | ||
return builder | ||
.setInputClass( | ||
"io.spokestack.spokestack.android.MicrophoneInput") | ||
.addStageClass( | ||
"io.spokestack.spokestack.webrtc.AcousticNoiseSuppressor") | ||
.addStageClass( | ||
"io.spokestack.spokestack.webrtc.AutomaticGainControl") | ||
.setProperty("agc-compression-gain-db", 15) | ||
.addStageClass( | ||
"io.spokestack.spokestack.webrtc.VoiceActivityDetector") | ||
.addStageClass("io.spokestack.spokestack.ActivationTimeout") | ||
.addStageClass( | ||
"io.spokestack.spokestack.google.GoogleSpeechRecognizer"); | ||
} | ||
} |
73 changes: 73 additions & 0 deletions
73
src/main/java/io/spokestack/spokestack/profile/TFWakewordAndroidASR.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
package io.spokestack.spokestack.profile; | ||
|
||
import io.spokestack.spokestack.PipelineProfile; | ||
import io.spokestack.spokestack.SpeechPipeline; | ||
|
||
/** | ||
* A speech pipeline profile that uses TensorFlow Lite for wakeword detection | ||
* and Android's {@code SpeechRecognizer} API for ASR. Properties related to | ||
* signal processing are tuned for the "Spokestack" wakeword. | ||
* | ||
* <p> | ||
* Wakeword detection requires configuration to locate the models used for | ||
* classification; these properties must be set elsewhere: | ||
* </p> | ||
* | ||
* <ul> | ||
* <li> | ||
* <b>wake-filter-path</b> (string, required): file system path to the | ||
* "filter" Tensorflow-Lite model, which is used to calculate a mel | ||
* spectrogram frame from the linear STFT; its inputs should be shaped | ||
* [fft-width], and its outputs [mel-width] | ||
* </li> | ||
* <li> | ||
* <b>wake-encode-path</b> (string, required): file system path to the | ||
* "encode" Tensorflow-Lite model, which is used to perform each | ||
* autoregressive step over the mel frames; its inputs should be shaped | ||
* [mel-length, mel-width], and its outputs [encode-width], with an | ||
* additional state input/output shaped [state-width] | ||
* </li> | ||
* <li> | ||
* <b>wake-detect-path</b> (string, required): file system path to the | ||
* "detect" Tensorflow-Lite model; its inputs shoudld be shaped | ||
* [encode-length, encode-width], and its outputs [1] | ||
* </li> | ||
* </ul> | ||
* | ||
* <p> | ||
* Using Android's built-in ASR requires that an Android {@code Context} object | ||
* be attached to the speech pipeline using it. This must be done separately | ||
* from profile application, using | ||
* {@link SpeechPipeline.Builder#setAndroidContext(android.content.Context)}. | ||
* </p> | ||
* | ||
* @see io.spokestack.spokestack.android.AndroidSpeechRecognizer | ||
* @see io.spokestack.spokestack.wakeword.WakewordTrigger | ||
*/ | ||
public class TFWakewordAndroidASR implements PipelineProfile { | ||
@Override | ||
public SpeechPipeline.Builder apply(SpeechPipeline.Builder builder) { | ||
return builder | ||
.setInputClass( | ||
"io.spokestack.spokestack.android.MicrophoneInput") | ||
.addStageClass( | ||
"io.spokestack.spokestack.webrtc.AcousticNoiseSuppressor") | ||
.setProperty("ans-policy", "aggressive") | ||
.addStageClass( | ||
"io.spokestack.spokestack.webrtc.AutomaticGainControl") | ||
.setProperty("agc-target-level-dbfs", 3) | ||
.setProperty("agc-compression-gain-db", 15) | ||
.addStageClass( | ||
"io.spokestack.spokestack.webrtc.VoiceActivityDetector") | ||
.setProperty("vad-mode", "very-aggressive") | ||
.setProperty("vad-fall-delay", 800) | ||
.addStageClass( | ||
"io.spokestack.spokestack.wakeword.WakewordTrigger") | ||
.setProperty("wake-threshold", 0.9) | ||
.setProperty("pre-emphasis", 0.97) | ||
.addStageClass("io.spokestack.spokestack.ActivationTimeout") | ||
.setProperty("active-min", 2000) | ||
.addStageClass( | ||
"io.spokestack.spokestack.android.AndroidSpeechRecognizer"); | ||
} | ||
} |
80 changes: 80 additions & 0 deletions
80
src/main/java/io/spokestack/spokestack/profile/TFWakewordGoogleASR.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
package io.spokestack.spokestack.profile; | ||
|
||
import io.spokestack.spokestack.PipelineProfile; | ||
import io.spokestack.spokestack.SpeechPipeline; | ||
|
||
/** | ||
* A speech pipeline profile that uses TensorFlow Lite for wakeword detection | ||
* and Google Speech for ASR. Properties related to signal processing are tuned | ||
* for the "Spokestack" wakeword. | ||
* | ||
* <p> | ||
* Wakeword detection requires configuration to locate the models used for | ||
* classification; these properties must be set separately from this profile: | ||
* </p> | ||
* | ||
* <ul> | ||
* <li> | ||
* <b>wake-filter-path</b> (string, required): file system path to the | ||
* "filter" Tensorflow-Lite model, which is used to calculate a mel | ||
* spectrogram frame from the linear STFT; its inputs should be shaped | ||
* [fft-width], and its outputs [mel-width] | ||
* </li> | ||
* <li> | ||
* <b>wake-encode-path</b> (string, required): file system path to the | ||
* "encode" Tensorflow-Lite model, which is used to perform each | ||
* autoregressive step over the mel frames; its inputs should be shaped | ||
* [mel-length, mel-width], and its outputs [encode-width], with an | ||
* additional state input/output shaped [state-width] | ||
* </li> | ||
* <li> | ||
* <b>wake-detect-path</b> (string, required): file system path to the | ||
* "detect" Tensorflow-Lite model; its inputs shoudld be shaped | ||
* [encode-length, encode-width], and its outputs [1] | ||
* </li> | ||
* </ul> | ||
* | ||
* <p> | ||
* Google Speech also requires configuration: | ||
* </p> | ||
* | ||
* <ul> | ||
* <li> | ||
* <b>google-credentials</b> (string): json-stringified google service | ||
* account credentials, used to authenticate with the speech API | ||
* </li> | ||
* <li> | ||
* <b>locale</b> (string): language code for speech recognition | ||
* </li> | ||
* </ul> | ||
* | ||
* @see io.spokestack.spokestack.wakeword.WakewordTrigger | ||
* @see io.spokestack.spokestack.google.GoogleSpeechRecognizer | ||
*/ | ||
public class TFWakewordGoogleASR implements PipelineProfile { | ||
@Override | ||
public SpeechPipeline.Builder apply(SpeechPipeline.Builder builder) { | ||
return builder | ||
.setInputClass( | ||
"io.spokestack.spokestack.android.MicrophoneInput") | ||
.addStageClass( | ||
"io.spokestack.spokestack.webrtc.AcousticNoiseSuppressor") | ||
.setProperty("ans-policy", "aggressive") | ||
.addStageClass( | ||
"io.spokestack.spokestack.webrtc.AutomaticGainControl") | ||
.setProperty("agc-target-level-dbfs", 3) | ||
.setProperty("agc-compression-gain-db", 15) | ||
.addStageClass( | ||
"io.spokestack.spokestack.webrtc.VoiceActivityDetector") | ||
.setProperty("vad-mode", "very-aggressive") | ||
.setProperty("vad-fall-delay", 800) | ||
.addStageClass( | ||
"io.spokestack.spokestack.wakeword.WakewordTrigger") | ||
.setProperty("wake-threshold", 0.9) | ||
.setProperty("pre-emphasis", 0.97) | ||
.addStageClass("io.spokestack.spokestack.ActivationTimeout") | ||
.setProperty("active-min", 2000) | ||
.addStageClass( | ||
"io.spokestack.spokestack.google.GoogleSpeechRecognizer"); | ||
} | ||
} |
Oops, something went wrong.