This repository has been archived by the owner on May 6, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
/
AndroidSpeechRecognizer.java
297 lines (266 loc) · 10.2 KB
/
AndroidSpeechRecognizer.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
package io.spokestack.spokestack.android;
import android.content.Context;
import android.content.Intent;
import android.os.Bundle;
import android.speech.RecognitionListener;
import android.speech.RecognizerIntent;
import android.speech.SpeechRecognizer;
import io.spokestack.spokestack.SpeechConfig;
import io.spokestack.spokestack.SpeechContext;
import io.spokestack.spokestack.SpeechProcessor;
import io.spokestack.spokestack.util.TaskHandler;
import java.nio.ByteBuffer;
import java.util.ArrayList;
/**
* Speech recognition using built-in Android APIs.
*
* <p>
* This component uses the built-in Android {@code SpeechRecognizer} to process
* user speech.
* </p>
*
* <p>
* As part of normal operation, {@code SpeechRecognizer} plays system sounds
* both when it starts and stops actively listening to the user, just like the
* built-in Google Assistant. This behavior is not optional; it can be
* suppressed by having the {@code AudioManager} mute the music stream, but it
* muting and restoring the volume of that stream at exactly the right times is
* error-prone, so such behavior has been omitted from this component.
* </p>
*
* <p>
* Note that this component requires an Android {@code Context} to be attached
* to the pipeline that has created it. If the pipeline is meant to persist
* across different {@code Activity}s, the {@code Context} used must either be
* the <em>application</em> context, or it must be re-set on the pipeline's
* {@code SpeechContext} object when the Activity context changes.
* </p>
*
* <p>
* Implementation of {@code SpeechRecognizer} is left up to devices, and even
* though the API exists, an actual recognizer may not be present on all
* devices. If using this component, it's a good idea to call {@code
* SpeechRecognizer.isRecognitionAvailable()} before adding it to the pipeline
* to determine whether it will be viable on the current device.
* </p>
*
* <p>
* In addition, testing has shown that some older devices may return {@code
* true} for the preceding call but have outdated implementations that
* consistently throw errors. For this reason, it's a good idea to have an
* {@link io.spokestack.spokestack.OnSpeechEventListener} set up to detect
* {@link SpeechRecognizerError}s and have an appropriate fallback strategy in
* place.
* </p>
*
* <p>
* This pipeline component supports the following configuration property, though
* it should be left at its default setting in most circumstances:
* </p>
* <ul>
* <li>
* <b>wake-active-min</b> (integer): the minimum length of time, in
* milliseconds, that the recognizer should wait for speech before timing
* out.
* </li>
* </ul>
*
* <p>
* The {@code wake-active-min} parameter merely sets a hint for the
* {@code Intent} used to start recognition, and Google
* <a href="https://developer.android.com/reference/android/speech/RecognizerIntent#EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS">
* does not guarantee</a> that this hint will be honored on all devices.
* </p>
*/
public class AndroidSpeechRecognizer implements SpeechProcessor {
private final int minActive;
private boolean streaming;
private SpeechRecognizer speechRecognizer;
private SpokestackListener listener;
private TaskHandler taskHandler;
/**
* Initializes a new recognizer.
*
* @param speechConfig Spokestack pipeline configuration
*/
public AndroidSpeechRecognizer(SpeechConfig speechConfig) {
this.streaming = false;
this.minActive = speechConfig.getInteger("wake-active-min", 0);
this.taskHandler = new TaskHandler(true);
}
/**
* Create an instance of the recognizer with an injected {@link
* TaskHandler}. Used for testing.
*
* @param speechConfig Spokestack pipeline configuration
* @param handler The task handler used to interact with the speech
* recognizer.
*/
AndroidSpeechRecognizer(SpeechConfig speechConfig,
TaskHandler handler) {
this(speechConfig);
this.taskHandler = handler;
}
/**
* @return The internal {@code RecognitionListener}. Used for testing.
*/
SpokestackListener getListener() {
return this.listener;
}
@Override
public void process(SpeechContext context, ByteBuffer frame) {
if (this.speechRecognizer == null) {
createRecognizer(context);
}
if (context.isActive()) {
if (!this.streaming) {
context.setManaged(true);
begin();
this.streaming = true;
}
} else {
this.streaming = false;
}
}
private void createRecognizer(SpeechContext context) {
this.taskHandler.run(() -> {
Context androidContext = context.getAndroidContext();
this.speechRecognizer =
SpeechRecognizer.createSpeechRecognizer(androidContext);
this.listener = new SpokestackListener(context);
this.speechRecognizer.setRecognitionListener(this.listener);
});
}
private void begin() {
this.taskHandler.run(() -> {
Intent recognitionIntent = createRecognitionIntent();
this.speechRecognizer.startListening(recognitionIntent);
});
}
Intent createRecognitionIntent() {
Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
intent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true);
if (this.minActive > 0) {
intent.putExtra(
RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS,
this.minActive);
}
return intent;
}
@Override
public void reset() {
close();
}
@Override
public void close() {
this.taskHandler.run(() -> {
if (this.speechRecognizer != null) {
this.speechRecognizer.destroy();
this.speechRecognizer = null;
}
});
}
/**
* An internal listener used to dispatch events from the Android speech
* recognizer to the Spokestack {@link SpeechContext}.
*/
static class SpokestackListener implements RecognitionListener {
private final SpeechContext context;
SpokestackListener(SpeechContext speechContext) {
this.context = speechContext;
}
@Override
public void onError(int error) {
SpeechRecognizerError speechErr = new SpeechRecognizerError(error);
this.context.traceDebug("AndroidSpeechRecognizer error " + error);
if (isTimeout(speechErr.description)) {
this.context.dispatch(SpeechContext.Event.TIMEOUT);
} else {
this.context.setError(speechErr);
this.context.dispatch(SpeechContext.Event.ERROR);
}
relinquishContext();
}
private boolean isTimeout(
SpeechRecognizerError.Description description) {
// the NO_RECOGNITION_MATCH condition appears to be a bug on
// Google's part that cropped up since this class was written,
// but we'll leave the workaround in place unless/until they fix it
return description
== SpeechRecognizerError.Description.SPEECH_TIMEOUT
|| description
== SpeechRecognizerError.Description.NO_RECOGNITION_MATCH;
}
@Override
public void onPartialResults(Bundle partialResults) {
dispatchRecognition(partialResults, false);
}
@Override
public void onResults(Bundle results) {
dispatchRecognition(results, true);
relinquishContext();
}
private void dispatchRecognition(Bundle results, boolean isFinal) {
SpeechContext.Event event = (isFinal)
? SpeechContext.Event.RECOGNIZE
: SpeechContext.Event.PARTIAL_RECOGNIZE;
String transcript = extractTranscript(results);
if (!transcript.equals("")) {
float confidence = extractConfidence(results);
this.context.setTranscript(transcript);
this.context.setConfidence(confidence);
this.context.dispatch(event);
} else if (isFinal) {
this.context.dispatch(SpeechContext.Event.TIMEOUT);
}
}
private String extractTranscript(Bundle results) {
ArrayList<String> nBest = results.getStringArrayList(
SpeechRecognizer.RESULTS_RECOGNITION);
return nBest.get(0);
}
private float extractConfidence(Bundle results) {
float[] confidences = results.getFloatArray(
SpeechRecognizer.CONFIDENCE_SCORES);
if (confidences == null || confidences.length == 0) {
return 0.0f;
}
return confidences[0];
}
private void relinquishContext() {
this.context.setSpeech(false);
this.context.setActive(false);
this.context.setManaged(false);
}
@Override
public void onReadyForSpeech(Bundle params) {
this.context.traceDebug(
"AndroidSpeechRecognizer ready for speech");
}
@Override
public void onBeginningOfSpeech() {
this.context.setSpeech(true);
this.context.traceDebug("AndroidSpeechRecognizer begin speech");
}
@Override
public void onRmsChanged(float rmsdB) {
this.context.traceDebug("AndroidSpeechRecognizer RMS %f", rmsdB);
}
@Override
public void onBufferReceived(byte[] buffer) {
this.context.traceDebug("AndroidSpeechRecognizer buffer received");
}
@Override
public void onEndOfSpeech() {
this.context.traceDebug("AndroidSpeechRecognizer end speech");
this.context.setSpeech(false);
}
@Override
public void onEvent(int eventType, Bundle params) {
this.context.traceDebug(
"AndroidSpeechRecognizer event: %d", eventType);
}
}
}