diff --git a/src/main/java/io/spokestack/spokestack/SpeechConfig.java b/src/main/java/io/spokestack/spokestack/SpeechConfig.java index 2ea132b..395cf3f 100644 --- a/src/main/java/io/spokestack/spokestack/SpeechConfig.java +++ b/src/main/java/io/spokestack/spokestack/SpeechConfig.java @@ -109,7 +109,7 @@ public int getInteger(String key) { } /** - * fetches an string value, coercing if needed. + * fetches a double value, coercing if needed. * @param key key to look up * @param defaultValue value to return if not found * @return the double configuration value if found, defaultValue otherwise diff --git a/src/main/java/io/spokestack/spokestack/SpeechContext.java b/src/main/java/io/spokestack/spokestack/SpeechContext.java index 307d4fa..a7fabf6 100644 --- a/src/main/java/io/spokestack/spokestack/SpeechContext.java +++ b/src/main/java/io/spokestack/spokestack/SpeechContext.java @@ -1,5 +1,8 @@ package io.spokestack.spokestack; +import android.content.Context; +import androidx.annotation.Nullable; + import java.util.Deque; import java.util.List; import java.util.ArrayList; @@ -68,6 +71,7 @@ public int value() { private final List listeners = new ArrayList<>(); private final int traceLevel; + private Context appContext; private Deque buffer; private boolean speech; private boolean active; @@ -86,6 +90,22 @@ public SpeechContext(SpeechConfig config) { TraceLevel.NONE.value()); } + /** + * @return the Android context if set + */ + @Nullable + public Context getAndroidContext() { + return appContext; + } + + /** + * sets the Android context. + * @param androidContext The Android context + */ + public void setAndroidContext(@Nullable Context androidContext) { + this.appContext = androidContext; + } + /** @return speech frame buffer */ public Deque getBuffer() { return this.buffer; diff --git a/src/main/java/io/spokestack/spokestack/SpeechPipeline.java b/src/main/java/io/spokestack/spokestack/SpeechPipeline.java index 4a92c92..a42d88a 100644 --- a/src/main/java/io/spokestack/spokestack/SpeechPipeline.java +++ b/src/main/java/io/spokestack/spokestack/SpeechPipeline.java @@ -1,5 +1,7 @@ package io.spokestack.spokestack; +import android.content.Context; + import java.util.ArrayList; import java.util.LinkedList; import java.util.List; @@ -83,6 +85,7 @@ private SpeechPipeline(Builder builder) { this.stageClasses = builder.stageClasses; this.config = builder.config; this.context = new SpeechContext(this.config); + this.context.setAndroidContext(builder.appContext); this.stages = new ArrayList<>(); for (OnSpeechEventListener l : builder.listeners) { @@ -255,6 +258,7 @@ public static final class Builder { private String inputClass; private List stageClasses = new ArrayList<>(); private SpeechConfig config = new SpeechConfig(); + private Context appContext; private List listeners = new ArrayList<>(); /** @@ -310,6 +314,20 @@ public Builder setConfig(SpeechConfig value) { return this; } + /** + * Sets the android context for the pipeline. Some components may + * require an application context instead of an activity context; + * see individual component documentation for details. + * + * @param androidContext the android context for the pipeline. + * @return this + * @see io.spokestack.spokestack.android.AndroidSpeechRecognizer + */ + public Builder setAndroidContext(Context androidContext) { + this.appContext = androidContext; + return this; + } + /** * sets a pipeline configuration value. * diff --git a/src/main/java/io/spokestack/spokestack/android/AndroidSpeechRecognizer.java b/src/main/java/io/spokestack/spokestack/android/AndroidSpeechRecognizer.java new file mode 100644 index 0000000..22c778d --- /dev/null +++ b/src/main/java/io/spokestack/spokestack/android/AndroidSpeechRecognizer.java @@ -0,0 +1,199 @@ +package io.spokestack.spokestack.android; + +import android.content.Context; +import android.content.Intent; +import android.os.Bundle; +import android.speech.RecognitionListener; +import android.speech.RecognizerIntent; +import android.speech.SpeechRecognizer; +import io.spokestack.spokestack.SpeechConfig; +import io.spokestack.spokestack.SpeechContext; +import io.spokestack.spokestack.SpeechProcessor; +import io.spokestack.spokestack.util.TaskHandler; + +import java.nio.ByteBuffer; +import java.util.ArrayList; + + +/** + * Speech recognition using built-in Android APIs. + * + *

+ * This component uses the built-in Android {@code SpeechRecognizer} to process + * user speech. + *

+ * + *

+ * As part of normal operation, {@code SpeechRecognizer} plays system sounds + * both when it starts and stops actively listening to the user, just like the + * built-in Google Assistant. This behavior is not optional; it can be + * suppressed by having the {@code AudioManager} mute the music stream, but it + * muting and restoring the volume of that stream at exactly the right times is + * error-prone, so such behavior has been omitted from this component. + *

+ * + *

+ * Note that this component requires an Android {@code Context} to be attached + * to the pipeline that has created it. If the pipeline is meant to persist + * across different {@code Activity}s, the {@code Context} used must either be + * the application context, or it must be re-set on the pipeline's + * {@code SpeechContext} object when the Activity context changes. + *

+ * + *

+ * Implementation of {@code SpeechRecognizer} is left up to devices, and even + * though the API exists, an actual recognizer may not be present on all + * devices. If using this component, it's a good idea to call {@code + * SpeechRecognizer.isRecognitionAvailable()} before adding it to the pipeline + * to determine whether it will be viable on the current device. + *

+ * + *

+ * In addition, testing has shown that some older devices may return {@code + * true} for the preceding call but have outdated implementations that + * consistently throw errors. For this reason, it's a good idea to have an + * {@link io.spokestack.spokestack.OnSpeechEventListener} set up to detect + * {@link SpeechRecognizerError}s and have an appropriate fallback strategy in + * place. + *

+ */ +public final class AndroidSpeechRecognizer implements SpeechProcessor { + private boolean streaming; + private SpeechRecognizer speechRecognizer; + private TaskHandler taskHandler; + + /** + * Initializes a new recognizer. + * + * @param speechConfig Spokestack pipeline configuration + */ + @SuppressWarnings("unused") + public AndroidSpeechRecognizer(SpeechConfig speechConfig) { + this.streaming = false; + this.taskHandler = new TaskHandler(true); + } + + /** + * Create an instance of the recognizer with an injected {@link + * TaskHandler}. Used for testing. + * + * @param speechConfig Spokestack pipeline configuration + * @param handler The task handler used to interact with the speech + * recognizer. + */ + AndroidSpeechRecognizer(SpeechConfig speechConfig, + TaskHandler handler) { + this(speechConfig); + this.taskHandler = handler; + } + + @Override + public void process(SpeechContext context, ByteBuffer frame) { + if (this.speechRecognizer == null) { + createRecognizer(context); + } + + if (context.isActive()) { + if (!this.streaming) { + begin(); + this.streaming = true; + } + } else { + this.streaming = false; + } + } + + private void createRecognizer(SpeechContext context) { + this.taskHandler.run(() -> { + Context androidContext = context.getAndroidContext(); + this.speechRecognizer = + SpeechRecognizer.createSpeechRecognizer(androidContext); + this.speechRecognizer.setRecognitionListener( + new SpokestackListener(context)); + }); + } + + private void begin() { + this.taskHandler.run(() -> { + Intent recognitionIntent = createRecognitionIntent(); + this.speechRecognizer.startListening(recognitionIntent); + }); + } + + private Intent createRecognitionIntent() { + Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH); + intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, + RecognizerIntent.LANGUAGE_MODEL_FREE_FORM); + // added in API level 23 + intent.putExtra("android.speech.extra.PREFER_OFFLINE", true); + return intent; + } + + @Override + public void close() { + this.taskHandler.run(() -> this.speechRecognizer.destroy()); + } + + /** + * An internal listener used to dispatch events from the Android speech + * recognizer to the Spokestack {@link SpeechContext}. + */ + private static class SpokestackListener implements RecognitionListener { + private final SpeechContext context; + + SpokestackListener(SpeechContext speechContext) { + this.context = speechContext; + } + + @Override + public void onError(int error) { + this.context.setError(new SpeechRecognizerError(error)); + this.context.dispatch(SpeechContext.Event.ERROR); + } + + @Override + public void onResults(Bundle results) { + String transcript = extractTranscript(results); + float confidence = extractConfidence(results); + this.context.setTranscript(transcript); + this.context.setConfidence(confidence); + this.context.dispatch(SpeechContext.Event.RECOGNIZE); + } + + private String extractTranscript(Bundle results) { + ArrayList nBest = results.getStringArrayList( + SpeechRecognizer.RESULTS_RECOGNITION); + return nBest.get(0); + } + + private float extractConfidence(Bundle results) { + float[] confidences = results.getFloatArray( + SpeechRecognizer.CONFIDENCE_SCORES); + return confidences.length > 0 ? confidences[0] : 0.0f; + } + + // other methods required by RecognitionListener but useless for our + // current purposes + + @Override + public void onReadyForSpeech(Bundle params) { } + + @Override + public void onBeginningOfSpeech() { } + + @Override + public void onRmsChanged(float rmsdB) { } + + @Override + public void onBufferReceived(byte[] buffer) { } + + @Override + public void onEndOfSpeech() { } + + @Override + public void onPartialResults(Bundle partialResults) { } + + @Override + public void onEvent(int eventType, Bundle params) { } + } +} diff --git a/src/main/java/io/spokestack/spokestack/android/SpeechRecognizerError.java b/src/main/java/io/spokestack/spokestack/android/SpeechRecognizerError.java new file mode 100644 index 0000000..33e1793 --- /dev/null +++ b/src/main/java/io/spokestack/spokestack/android/SpeechRecognizerError.java @@ -0,0 +1,50 @@ +package io.spokestack.spokestack.android; + +/** + * A simple exception class that wraps error codes from {@code + * android.speech.SpeechRecognizer}. + */ +public class SpeechRecognizerError extends Exception { + + /** + * Create a new SpeechRecognizerError from an error code provided by the + * Android system. + * + * @param errorCode The Android system error code. + */ + public SpeechRecognizerError(int errorCode) { + super("SpeechRecognizer error code " + errorCode + ": " + + SpeechRecognizerError.errorDescription(errorCode)); + } + + private static String errorDescription(int errorCode) { + if (errorCode < Description.VALUES.length) { + return Description.VALUES[errorCode].toString(); + } else { + return Description.UNKNOWN_ERROR.toString(); + } + } + + /** + * An enumeration of the SpeechRecognizer error descriptions aligned with + * their integer constant values. + */ + @SuppressWarnings("checkstyle:javadocvariable") + public enum Description { + UNKNOWN_ERROR, + NETWORK_TIMEOUT, + NETWORK_ERROR, + AUDIO_RECORDING_ERROR, + SERVER_ERROR, + CLIENT_ERROR, + SPEECH_TIMEOUT, + NO_RECOGNITION_MATCH, + RECOGNIZER_BUSY, + INSUFFICIENT_PERMISSIONS; + + /** + * A cache of the error descriptions to reduce overhead accessing them. + */ + public static final Description[] VALUES = values(); + } +} diff --git a/src/main/java/io/spokestack/spokestack/google/GoogleSpeechRecognizer.java b/src/main/java/io/spokestack/spokestack/google/GoogleSpeechRecognizer.java index 90fb108..97a3c9b 100644 --- a/src/main/java/io/spokestack/spokestack/google/GoogleSpeechRecognizer.java +++ b/src/main/java/io/spokestack/spokestack/google/GoogleSpeechRecognizer.java @@ -204,7 +204,7 @@ public void onError(Throwable e) { public void onCompleted() { this.context.setTranscript(this.transcript); this.context.setConfidence(this.confidence); - if (this.transcript != "") + if (!this.transcript.equals("")) this.context.dispatch(SpeechContext.Event.RECOGNIZE); else this.context.dispatch(SpeechContext.Event.TIMEOUT); diff --git a/src/main/java/io/spokestack/spokestack/tts/SpokestackSynthesisResponse.java b/src/main/java/io/spokestack/spokestack/tts/SpokestackSynthesisResponse.java index 458727e..3412a5f 100644 --- a/src/main/java/io/spokestack/spokestack/tts/SpokestackSynthesisResponse.java +++ b/src/main/java/io/spokestack/spokestack/tts/SpokestackSynthesisResponse.java @@ -30,7 +30,7 @@ public String getUrl() { /** * Wrapper class used for deserializing synthesis responses. */ - private class ResponseData { + private static class ResponseData { private ResponseMethod synthesizeText; private ResponseMethod synthesizeSsml; } @@ -38,7 +38,7 @@ private class ResponseData { /** * Wrapper class used for deserializing synthesis responses. */ - private class ResponseMethod { + private static class ResponseMethod { private String url; } } diff --git a/src/test/java/io/spokestack/spokestack/SpeechConfigTest.java b/src/test/java/io/spokestack/spokestack/SpeechConfigTest.java index 1496bb0..a628e69 100644 --- a/src/test/java/io/spokestack/spokestack/SpeechConfigTest.java +++ b/src/test/java/io/spokestack/spokestack/SpeechConfigTest.java @@ -1,9 +1,10 @@ package io.spokestack.spokestack; -import java.util.*; - import org.junit.Test; -import org.junit.jupiter.api.function.Executable; + +import java.util.HashMap; +import java.util.Map; + import static org.junit.jupiter.api.Assertions.*; public class SpeechConfigTest { @@ -28,9 +29,8 @@ public void testString() { // default value assertEquals("default", config.getString("string", "default")); - assertThrows(IllegalArgumentException.class, new Executable() { - public void execute() { config.getString("string"); } - }); + assertThrows(IllegalArgumentException.class, + () -> config.getString("string")); // null value config.put("string", null); @@ -54,9 +54,8 @@ public void testInteger() { // default value assertEquals(42, config.getInteger("integer", 42)); - assertThrows(IllegalArgumentException.class, new Executable() { - public void execute() { config.getInteger("double"); } - }); + assertThrows(IllegalArgumentException.class, + () -> config.getInteger("double")); // integer value config.put("integer", 1); @@ -80,9 +79,8 @@ public void testDouble() { // default value assertEquals(42.0, config.getDouble("double", 42.0)); - assertThrows(IllegalArgumentException.class, new Executable() { - public void execute() { config.getDouble("double"); } - }); + assertThrows(IllegalArgumentException.class, + () -> config.getDouble("double")); // double value config.put("double", 3.14); diff --git a/src/test/java/io/spokestack/spokestack/android/AndroidSpeechRecognizerTest.java b/src/test/java/io/spokestack/spokestack/android/AndroidSpeechRecognizerTest.java new file mode 100644 index 0000000..4729264 --- /dev/null +++ b/src/test/java/io/spokestack/spokestack/android/AndroidSpeechRecognizerTest.java @@ -0,0 +1,140 @@ +package io.spokestack.spokestack.android; + +import android.content.Context; +import android.content.ContextWrapper; +import android.content.Intent; +import android.os.Bundle; +import android.speech.RecognitionListener; +import android.speech.SpeechRecognizer; +import io.spokestack.spokestack.OnSpeechEventListener; +import io.spokestack.spokestack.SpeechConfig; +import io.spokestack.spokestack.SpeechContext; +import io.spokestack.spokestack.util.TaskHandler; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.powermock.core.classloader.annotations.PrepareForTest; +import org.powermock.modules.junit4.PowerMockRunner; + +import java.nio.ByteBuffer; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.any; +import static org.powermock.api.mockito.PowerMockito.*; + +@RunWith(PowerMockRunner.class) +@PrepareForTest({SpeechRecognizer.class, AndroidSpeechRecognizer.class, Bundle.class}) +public class AndroidSpeechRecognizerTest { + + private ContextWrapper emptyAppContext = mock(ContextWrapper.class); + private ContextWrapper mockContext = mock(ContextWrapper.class); + private SpeechRecognizer successfulRecognizer = mock(SpeechRecognizer.class); + private SpeechRecognizer unsuccessfulRecognizer = mock(SpeechRecognizer.class); + + @Before + public void before() throws Exception { + mockStatic(SpeechRecognizer.class); + + // NOTE: this mocking strategy (establishing results in order) + // creates a coupling between setup and test methods, but argument + // matchers have so far been ineffective at returning the intended + // objects + when(SpeechRecognizer.createSpeechRecognizer(any())) + .thenReturn(successfulRecognizer, unsuccessfulRecognizer); + whenNew(Intent.class).withAnyArguments().thenReturn(mock(Intent.class)); + configureRecognizer(successfulRecognizer, new MockRecognizer(true)); + configureRecognizer(unsuccessfulRecognizer, new MockRecognizer(false)); + when(emptyAppContext.getApplicationContext()).thenReturn(null); + when(mockContext.getApplicationContext()) + .thenReturn(mock(Context.class)); + } + + private void configureRecognizer(SpeechRecognizer target, + MockRecognizer mockRecognizer) { + // we can't subclass SpeechRecognizer because the stubbed constructor + // throws, so we have to proxy its relevant methods to our mock instead + doAnswer(invocation -> { + mockRecognizer.startListening(null); + return null; + } + ).when(target).startListening(any()); + doAnswer(invocation -> { + RecognitionListener listener = invocation.getArgument(0); + mockRecognizer.setRecognitionListener(listener); + return null; + } + ).when(target).setRecognitionListener(any()); + } + + @Test + public void testProcess() { + SpeechConfig config = new SpeechConfig(); + AndroidSpeechRecognizer speechRecognizer = + new AndroidSpeechRecognizer(config, new TaskHandler(false)); + SpeechContext context = new SpeechContext(config); + context.setAndroidContext(mockContext); + EventListener listener = new EventListener(); + context.addOnSpeechEventListener(listener); + ByteBuffer frame = ByteBuffer.allocateDirect(32); + + // ASR inactive + speechRecognizer.process(context, frame); + assertNull(listener.transcript); + assertNull(listener.error); + + // ASR active + listener.clear(); + context.setActive(true); + speechRecognizer.process(context, frame); + assertEquals(MockRecognizer.TRANSCRIPT, listener.transcript); + assertNull(listener.error); + + // ASR received an error + listener.clear(); + context.setActive(true); + speechRecognizer = + new AndroidSpeechRecognizer(config, new TaskHandler(false)); + speechRecognizer.process(context, frame); + assertNull(listener.transcript); + assertEquals(SpeechRecognizerError.class, listener.error.getClass()); + String expectedError = + SpeechRecognizerError.Description.SERVER_ERROR.toString(); + assertTrue(listener.error.getMessage().contains(expectedError)); + + // closing the component has no effect (doubly so because its internal + // system speech recognizer is mocked here) + speechRecognizer.close(); + } + + + private static class EventListener implements OnSpeechEventListener { + String transcript; + double confidence; + Throwable error; + + EventListener() { + } + + public void clear() { + this.transcript = null; + this.confidence = 0.0; + this.error = null; + } + + @Override + public void onEvent(SpeechContext.Event event, SpeechContext context) { + switch (event) { + case RECOGNIZE: + this.transcript = context.getTranscript(); + this.confidence = context.getConfidence(); + break; + case ERROR: + this.error = context.getError(); + break; + default: + break; + } + + } + } +} \ No newline at end of file diff --git a/src/test/java/io/spokestack/spokestack/android/MockRecognizer.java b/src/test/java/io/spokestack/spokestack/android/MockRecognizer.java new file mode 100644 index 0000000..53fe91a --- /dev/null +++ b/src/test/java/io/spokestack/spokestack/android/MockRecognizer.java @@ -0,0 +1,76 @@ +package io.spokestack.spokestack.android; + +import android.content.Intent; +import android.os.Bundle; +import android.speech.RecognitionListener; +import android.speech.SpeechRecognizer; + +import java.util.ArrayList; +import java.util.Arrays; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * A fake {@link SpeechRecognizer} used for testing. + */ +public class MockRecognizer { + public static final String TRANSCRIPT = "test"; + private boolean isSuccessful; + private RecognitionListener recognitionListener; + + MockRecognizer(boolean successful) { + this.isSuccessful = successful; + } + + /** + * Set a recognition listener to receive fake results/errors. + * + * @param listener The listener that should receive recognition results. + */ + public void setRecognitionListener(RecognitionListener listener) { + this.recognitionListener = listener; + } + + /** + * Immediately return either results or an error from a pretend speech + * recognition session. Note that since the listener expects a proper + * {@code Bundle} (which is a final class stubbed by Android for testing), + * this method will not work unless the test class it's used in includes + * PowerMock's {@link org.junit.runner.RunWith} and + * {@link org.powermock.core.classloader.annotations.PrepareForTest} + * annotations. + * + * @param recognitionIntent the intent used to start recognition. Unused by + * this mock. + */ + @SuppressWarnings("unused") + public void startListening(Intent recognitionIntent) { + if (this.isSuccessful) { + Bundle results = mock(Bundle.class); + ArrayList nBest = + new ArrayList<>( Arrays.asList(TRANSCRIPT, "testy")); + when(results + .getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)) + .thenReturn(nBest); + float[] confidences = new float[]{.85f, .15f}; + when(results + .getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES)) + .thenReturn(confidences); + recognitionListener.onResults(results); + } else { + recognitionListener.onError(4); + } + + // this is a terrible thing to do to just improve test coverage for + // unimplemented methods, but it will trigger a revisiting of this + // test if the methods are implemented in the future + recognitionListener.onReadyForSpeech(null); + recognitionListener.onBeginningOfSpeech(); + recognitionListener.onRmsChanged(0); + recognitionListener.onBufferReceived(new byte[]{}); + recognitionListener.onEndOfSpeech(); + recognitionListener.onPartialResults(null); + recognitionListener.onEvent(0, null); + } +} diff --git a/src/test/java/io/spokestack/spokestack/android/SpeechRecognizerErrorTest.java b/src/test/java/io/spokestack/spokestack/android/SpeechRecognizerErrorTest.java new file mode 100644 index 0000000..c1fcdbe --- /dev/null +++ b/src/test/java/io/spokestack/spokestack/android/SpeechRecognizerErrorTest.java @@ -0,0 +1,21 @@ +package io.spokestack.spokestack.android; + +import org.junit.Test; + +import static org.junit.jupiter.api.Assertions.*; + +public class SpeechRecognizerErrorTest { + + @Test + public void testErrorCodes() { + SpeechRecognizerError error = new SpeechRecognizerError(0); + assertTrue(error.getMessage().contains( + SpeechRecognizerError.Description.UNKNOWN_ERROR.toString())); + error = new SpeechRecognizerError(1); + assertTrue(error.getMessage().contains( + SpeechRecognizerError.Description.NETWORK_TIMEOUT.toString())); + error = new SpeechRecognizerError(13); + assertTrue(error.getMessage().contains( + SpeechRecognizerError.Description.UNKNOWN_ERROR.toString())); + } +} \ No newline at end of file