Skip to content
This repository has been archived by the owner on May 6, 2022. It is now read-only.

Commit

Permalink
Merge pull request #30 from spokestack/jz-platform-asr
Browse files Browse the repository at this point in the history
Support ASR via Android's built-in SpeechRecognizer
  • Loading branch information
space-pope committed Jan 13, 2020
2 parents c75da2e + 24113a3 commit 1446c54
Show file tree
Hide file tree
Showing 11 changed files with 538 additions and 16 deletions.
2 changes: 1 addition & 1 deletion src/main/java/io/spokestack/spokestack/SpeechConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ public int getInteger(String key) {
}

/**
* fetches an string value, coercing if needed.
* fetches a double value, coercing if needed.
* @param key key to look up
* @param defaultValue value to return if not found
* @return the double configuration value if found, defaultValue otherwise
Expand Down
20 changes: 20 additions & 0 deletions src/main/java/io/spokestack/spokestack/SpeechContext.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package io.spokestack.spokestack;

import android.content.Context;
import androidx.annotation.Nullable;

import java.util.Deque;
import java.util.List;
import java.util.ArrayList;
Expand Down Expand Up @@ -68,6 +71,7 @@ public int value() {

private final List<OnSpeechEventListener> listeners = new ArrayList<>();
private final int traceLevel;
private Context appContext;
private Deque<ByteBuffer> buffer;
private boolean speech;
private boolean active;
Expand All @@ -86,6 +90,22 @@ public SpeechContext(SpeechConfig config) {
TraceLevel.NONE.value());
}

/**
* @return the Android context if set
*/
@Nullable
public Context getAndroidContext() {
return appContext;
}

/**
* sets the Android context.
* @param androidContext The Android context
*/
public void setAndroidContext(@Nullable Context androidContext) {
this.appContext = androidContext;
}

/** @return speech frame buffer */
public Deque<ByteBuffer> getBuffer() {
return this.buffer;
Expand Down
18 changes: 18 additions & 0 deletions src/main/java/io/spokestack/spokestack/SpeechPipeline.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package io.spokestack.spokestack;

import android.content.Context;

import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
Expand Down Expand Up @@ -83,6 +85,7 @@ private SpeechPipeline(Builder builder) {
this.stageClasses = builder.stageClasses;
this.config = builder.config;
this.context = new SpeechContext(this.config);
this.context.setAndroidContext(builder.appContext);
this.stages = new ArrayList<>();

for (OnSpeechEventListener l : builder.listeners) {
Expand Down Expand Up @@ -255,6 +258,7 @@ public static final class Builder {
private String inputClass;
private List<String> stageClasses = new ArrayList<>();
private SpeechConfig config = new SpeechConfig();
private Context appContext;
private List<OnSpeechEventListener> listeners = new ArrayList<>();

/**
Expand Down Expand Up @@ -310,6 +314,20 @@ public Builder setConfig(SpeechConfig value) {
return this;
}

/**
* Sets the android context for the pipeline. Some components may
* require an application context instead of an activity context;
* see individual component documentation for details.
*
* @param androidContext the android context for the pipeline.
* @return this
* @see io.spokestack.spokestack.android.AndroidSpeechRecognizer
*/
public Builder setAndroidContext(Context androidContext) {
this.appContext = androidContext;
return this;
}

/**
* sets a pipeline configuration value.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
package io.spokestack.spokestack.android;

import android.content.Context;
import android.content.Intent;
import android.os.Bundle;
import android.speech.RecognitionListener;
import android.speech.RecognizerIntent;
import android.speech.SpeechRecognizer;
import io.spokestack.spokestack.SpeechConfig;
import io.spokestack.spokestack.SpeechContext;
import io.spokestack.spokestack.SpeechProcessor;
import io.spokestack.spokestack.util.TaskHandler;

import java.nio.ByteBuffer;
import java.util.ArrayList;


/**
* Speech recognition using built-in Android APIs.
*
* <p>
* This component uses the built-in Android {@code SpeechRecognizer} to process
* user speech.
* </p>
*
* <p>
* As part of normal operation, {@code SpeechRecognizer} plays system sounds
* both when it starts and stops actively listening to the user, just like the
* built-in Google Assistant. This behavior is not optional; it can be
* suppressed by having the {@code AudioManager} mute the music stream, but it
* muting and restoring the volume of that stream at exactly the right times is
* error-prone, so such behavior has been omitted from this component.
* </p>
*
* <p>
* Note that this component requires an Android {@code Context} to be attached
* to the pipeline that has created it. If the pipeline is meant to persist
* across different {@code Activity}s, the {@code Context} used must either be
* the <em>application</em> context, or it must be re-set on the pipeline's
* {@code SpeechContext} object when the Activity context changes.
* </p>
*
* <p>
* Implementation of {@code SpeechRecognizer} is left up to devices, and even
* though the API exists, an actual recognizer may not be present on all
* devices. If using this component, it's a good idea to call {@code
* SpeechRecognizer.isRecognitionAvailable()} before adding it to the pipeline
* to determine whether it will be viable on the current device.
* </p>
*
* <p>
* In addition, testing has shown that some older devices may return {@code
* true} for the preceding call but have outdated implementations that
* consistently throw errors. For this reason, it's a good idea to have an
* {@link io.spokestack.spokestack.OnSpeechEventListener} set up to detect
* {@link SpeechRecognizerError}s and have an appropriate fallback strategy in
* place.
* </p>
*/
public final class AndroidSpeechRecognizer implements SpeechProcessor {
private boolean streaming;
private SpeechRecognizer speechRecognizer;
private TaskHandler taskHandler;

/**
* Initializes a new recognizer.
*
* @param speechConfig Spokestack pipeline configuration
*/
@SuppressWarnings("unused")
public AndroidSpeechRecognizer(SpeechConfig speechConfig) {
this.streaming = false;
this.taskHandler = new TaskHandler(true);
}

/**
* Create an instance of the recognizer with an injected {@link
* TaskHandler}. Used for testing.
*
* @param speechConfig Spokestack pipeline configuration
* @param handler The task handler used to interact with the speech
* recognizer.
*/
AndroidSpeechRecognizer(SpeechConfig speechConfig,
TaskHandler handler) {
this(speechConfig);
this.taskHandler = handler;
}

@Override
public void process(SpeechContext context, ByteBuffer frame) {
if (this.speechRecognizer == null) {
createRecognizer(context);
}

if (context.isActive()) {
if (!this.streaming) {
begin();
this.streaming = true;
}
} else {
this.streaming = false;
}
}

private void createRecognizer(SpeechContext context) {
this.taskHandler.run(() -> {
Context androidContext = context.getAndroidContext();
this.speechRecognizer =
SpeechRecognizer.createSpeechRecognizer(androidContext);
this.speechRecognizer.setRecognitionListener(
new SpokestackListener(context));
});
}

private void begin() {
this.taskHandler.run(() -> {
Intent recognitionIntent = createRecognitionIntent();
this.speechRecognizer.startListening(recognitionIntent);
});
}

private Intent createRecognitionIntent() {
Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
// added in API level 23
intent.putExtra("android.speech.extra.PREFER_OFFLINE", true);
return intent;
}

@Override
public void close() {
this.taskHandler.run(() -> this.speechRecognizer.destroy());
}

/**
* An internal listener used to dispatch events from the Android speech
* recognizer to the Spokestack {@link SpeechContext}.
*/
private static class SpokestackListener implements RecognitionListener {
private final SpeechContext context;

SpokestackListener(SpeechContext speechContext) {
this.context = speechContext;
}

@Override
public void onError(int error) {
this.context.setError(new SpeechRecognizerError(error));
this.context.dispatch(SpeechContext.Event.ERROR);
}

@Override
public void onResults(Bundle results) {
String transcript = extractTranscript(results);
float confidence = extractConfidence(results);
this.context.setTranscript(transcript);
this.context.setConfidence(confidence);
this.context.dispatch(SpeechContext.Event.RECOGNIZE);
}

private String extractTranscript(Bundle results) {
ArrayList<String> nBest = results.getStringArrayList(
SpeechRecognizer.RESULTS_RECOGNITION);
return nBest.get(0);
}

private float extractConfidence(Bundle results) {
float[] confidences = results.getFloatArray(
SpeechRecognizer.CONFIDENCE_SCORES);
return confidences.length > 0 ? confidences[0] : 0.0f;
}

// other methods required by RecognitionListener but useless for our
// current purposes

@Override
public void onReadyForSpeech(Bundle params) { }

@Override
public void onBeginningOfSpeech() { }

@Override
public void onRmsChanged(float rmsdB) { }

@Override
public void onBufferReceived(byte[] buffer) { }

@Override
public void onEndOfSpeech() { }

@Override
public void onPartialResults(Bundle partialResults) { }

@Override
public void onEvent(int eventType, Bundle params) { }
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package io.spokestack.spokestack.android;

/**
* A simple exception class that wraps error codes from {@code
* android.speech.SpeechRecognizer}.
*/
public class SpeechRecognizerError extends Exception {

/**
* Create a new SpeechRecognizerError from an error code provided by the
* Android system.
*
* @param errorCode The Android system error code.
*/
public SpeechRecognizerError(int errorCode) {
super("SpeechRecognizer error code " + errorCode + ": "
+ SpeechRecognizerError.errorDescription(errorCode));
}

private static String errorDescription(int errorCode) {
if (errorCode < Description.VALUES.length) {
return Description.VALUES[errorCode].toString();
} else {
return Description.UNKNOWN_ERROR.toString();
}
}

/**
* An enumeration of the SpeechRecognizer error descriptions aligned with
* their integer constant values.
*/
@SuppressWarnings("checkstyle:javadocvariable")
public enum Description {
UNKNOWN_ERROR,
NETWORK_TIMEOUT,
NETWORK_ERROR,
AUDIO_RECORDING_ERROR,
SERVER_ERROR,
CLIENT_ERROR,
SPEECH_TIMEOUT,
NO_RECOGNITION_MATCH,
RECOGNIZER_BUSY,
INSUFFICIENT_PERMISSIONS;

/**
* A cache of the error descriptions to reduce overhead accessing them.
*/
public static final Description[] VALUES = values();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ public void onError(Throwable e) {
public void onCompleted() {
this.context.setTranscript(this.transcript);
this.context.setConfidence(this.confidence);
if (this.transcript != "")
if (!this.transcript.equals(""))
this.context.dispatch(SpeechContext.Event.RECOGNIZE);
else
this.context.dispatch(SpeechContext.Event.TIMEOUT);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@ public String getUrl() {
/**
* Wrapper class used for deserializing synthesis responses.
*/
private class ResponseData {
private static class ResponseData {
private ResponseMethod synthesizeText;
private ResponseMethod synthesizeSsml;
}

/**
* Wrapper class used for deserializing synthesis responses.
*/
private class ResponseMethod {
private static class ResponseMethod {
private String url;
}
}
Loading

0 comments on commit 1446c54

Please sign in to comment.