This repository has been archived by the owner on May 6, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #30 from spokestack/jz-platform-asr
Support ASR via Android's built-in SpeechRecognizer
- Loading branch information
Showing
11 changed files
with
538 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
199 changes: 199 additions & 0 deletions
199
src/main/java/io/spokestack/spokestack/android/AndroidSpeechRecognizer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,199 @@ | ||
package io.spokestack.spokestack.android; | ||
|
||
import android.content.Context; | ||
import android.content.Intent; | ||
import android.os.Bundle; | ||
import android.speech.RecognitionListener; | ||
import android.speech.RecognizerIntent; | ||
import android.speech.SpeechRecognizer; | ||
import io.spokestack.spokestack.SpeechConfig; | ||
import io.spokestack.spokestack.SpeechContext; | ||
import io.spokestack.spokestack.SpeechProcessor; | ||
import io.spokestack.spokestack.util.TaskHandler; | ||
|
||
import java.nio.ByteBuffer; | ||
import java.util.ArrayList; | ||
|
||
|
||
/** | ||
* Speech recognition using built-in Android APIs. | ||
* | ||
* <p> | ||
* This component uses the built-in Android {@code SpeechRecognizer} to process | ||
* user speech. | ||
* </p> | ||
* | ||
* <p> | ||
* As part of normal operation, {@code SpeechRecognizer} plays system sounds | ||
* both when it starts and stops actively listening to the user, just like the | ||
* built-in Google Assistant. This behavior is not optional; it can be | ||
* suppressed by having the {@code AudioManager} mute the music stream, but it | ||
* muting and restoring the volume of that stream at exactly the right times is | ||
* error-prone, so such behavior has been omitted from this component. | ||
* </p> | ||
* | ||
* <p> | ||
* Note that this component requires an Android {@code Context} to be attached | ||
* to the pipeline that has created it. If the pipeline is meant to persist | ||
* across different {@code Activity}s, the {@code Context} used must either be | ||
* the <em>application</em> context, or it must be re-set on the pipeline's | ||
* {@code SpeechContext} object when the Activity context changes. | ||
* </p> | ||
* | ||
* <p> | ||
* Implementation of {@code SpeechRecognizer} is left up to devices, and even | ||
* though the API exists, an actual recognizer may not be present on all | ||
* devices. If using this component, it's a good idea to call {@code | ||
* SpeechRecognizer.isRecognitionAvailable()} before adding it to the pipeline | ||
* to determine whether it will be viable on the current device. | ||
* </p> | ||
* | ||
* <p> | ||
* In addition, testing has shown that some older devices may return {@code | ||
* true} for the preceding call but have outdated implementations that | ||
* consistently throw errors. For this reason, it's a good idea to have an | ||
* {@link io.spokestack.spokestack.OnSpeechEventListener} set up to detect | ||
* {@link SpeechRecognizerError}s and have an appropriate fallback strategy in | ||
* place. | ||
* </p> | ||
*/ | ||
public final class AndroidSpeechRecognizer implements SpeechProcessor { | ||
private boolean streaming; | ||
private SpeechRecognizer speechRecognizer; | ||
private TaskHandler taskHandler; | ||
|
||
/** | ||
* Initializes a new recognizer. | ||
* | ||
* @param speechConfig Spokestack pipeline configuration | ||
*/ | ||
@SuppressWarnings("unused") | ||
public AndroidSpeechRecognizer(SpeechConfig speechConfig) { | ||
this.streaming = false; | ||
this.taskHandler = new TaskHandler(true); | ||
} | ||
|
||
/** | ||
* Create an instance of the recognizer with an injected {@link | ||
* TaskHandler}. Used for testing. | ||
* | ||
* @param speechConfig Spokestack pipeline configuration | ||
* @param handler The task handler used to interact with the speech | ||
* recognizer. | ||
*/ | ||
AndroidSpeechRecognizer(SpeechConfig speechConfig, | ||
TaskHandler handler) { | ||
this(speechConfig); | ||
this.taskHandler = handler; | ||
} | ||
|
||
@Override | ||
public void process(SpeechContext context, ByteBuffer frame) { | ||
if (this.speechRecognizer == null) { | ||
createRecognizer(context); | ||
} | ||
|
||
if (context.isActive()) { | ||
if (!this.streaming) { | ||
begin(); | ||
this.streaming = true; | ||
} | ||
} else { | ||
this.streaming = false; | ||
} | ||
} | ||
|
||
private void createRecognizer(SpeechContext context) { | ||
this.taskHandler.run(() -> { | ||
Context androidContext = context.getAndroidContext(); | ||
this.speechRecognizer = | ||
SpeechRecognizer.createSpeechRecognizer(androidContext); | ||
this.speechRecognizer.setRecognitionListener( | ||
new SpokestackListener(context)); | ||
}); | ||
} | ||
|
||
private void begin() { | ||
this.taskHandler.run(() -> { | ||
Intent recognitionIntent = createRecognitionIntent(); | ||
this.speechRecognizer.startListening(recognitionIntent); | ||
}); | ||
} | ||
|
||
private Intent createRecognitionIntent() { | ||
Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH); | ||
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, | ||
RecognizerIntent.LANGUAGE_MODEL_FREE_FORM); | ||
// added in API level 23 | ||
intent.putExtra("android.speech.extra.PREFER_OFFLINE", true); | ||
return intent; | ||
} | ||
|
||
@Override | ||
public void close() { | ||
this.taskHandler.run(() -> this.speechRecognizer.destroy()); | ||
} | ||
|
||
/** | ||
* An internal listener used to dispatch events from the Android speech | ||
* recognizer to the Spokestack {@link SpeechContext}. | ||
*/ | ||
private static class SpokestackListener implements RecognitionListener { | ||
private final SpeechContext context; | ||
|
||
SpokestackListener(SpeechContext speechContext) { | ||
this.context = speechContext; | ||
} | ||
|
||
@Override | ||
public void onError(int error) { | ||
this.context.setError(new SpeechRecognizerError(error)); | ||
this.context.dispatch(SpeechContext.Event.ERROR); | ||
} | ||
|
||
@Override | ||
public void onResults(Bundle results) { | ||
String transcript = extractTranscript(results); | ||
float confidence = extractConfidence(results); | ||
this.context.setTranscript(transcript); | ||
this.context.setConfidence(confidence); | ||
this.context.dispatch(SpeechContext.Event.RECOGNIZE); | ||
} | ||
|
||
private String extractTranscript(Bundle results) { | ||
ArrayList<String> nBest = results.getStringArrayList( | ||
SpeechRecognizer.RESULTS_RECOGNITION); | ||
return nBest.get(0); | ||
} | ||
|
||
private float extractConfidence(Bundle results) { | ||
float[] confidences = results.getFloatArray( | ||
SpeechRecognizer.CONFIDENCE_SCORES); | ||
return confidences.length > 0 ? confidences[0] : 0.0f; | ||
} | ||
|
||
// other methods required by RecognitionListener but useless for our | ||
// current purposes | ||
|
||
@Override | ||
public void onReadyForSpeech(Bundle params) { } | ||
|
||
@Override | ||
public void onBeginningOfSpeech() { } | ||
|
||
@Override | ||
public void onRmsChanged(float rmsdB) { } | ||
|
||
@Override | ||
public void onBufferReceived(byte[] buffer) { } | ||
|
||
@Override | ||
public void onEndOfSpeech() { } | ||
|
||
@Override | ||
public void onPartialResults(Bundle partialResults) { } | ||
|
||
@Override | ||
public void onEvent(int eventType, Bundle params) { } | ||
} | ||
} |
50 changes: 50 additions & 0 deletions
50
src/main/java/io/spokestack/spokestack/android/SpeechRecognizerError.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package io.spokestack.spokestack.android; | ||
|
||
/** | ||
* A simple exception class that wraps error codes from {@code | ||
* android.speech.SpeechRecognizer}. | ||
*/ | ||
public class SpeechRecognizerError extends Exception { | ||
|
||
/** | ||
* Create a new SpeechRecognizerError from an error code provided by the | ||
* Android system. | ||
* | ||
* @param errorCode The Android system error code. | ||
*/ | ||
public SpeechRecognizerError(int errorCode) { | ||
super("SpeechRecognizer error code " + errorCode + ": " | ||
+ SpeechRecognizerError.errorDescription(errorCode)); | ||
} | ||
|
||
private static String errorDescription(int errorCode) { | ||
if (errorCode < Description.VALUES.length) { | ||
return Description.VALUES[errorCode].toString(); | ||
} else { | ||
return Description.UNKNOWN_ERROR.toString(); | ||
} | ||
} | ||
|
||
/** | ||
* An enumeration of the SpeechRecognizer error descriptions aligned with | ||
* their integer constant values. | ||
*/ | ||
@SuppressWarnings("checkstyle:javadocvariable") | ||
public enum Description { | ||
UNKNOWN_ERROR, | ||
NETWORK_TIMEOUT, | ||
NETWORK_ERROR, | ||
AUDIO_RECORDING_ERROR, | ||
SERVER_ERROR, | ||
CLIENT_ERROR, | ||
SPEECH_TIMEOUT, | ||
NO_RECOGNITION_MATCH, | ||
RECOGNIZER_BUSY, | ||
INSUFFICIENT_PERMISSIONS; | ||
|
||
/** | ||
* A cache of the error descriptions to reduce overhead accessing them. | ||
*/ | ||
public static final Description[] VALUES = values(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.