Skip to content
This repository has been archived by the owner on May 6, 2022. It is now read-only.

Commit

Permalink
Fix: reset pipeline stages when context unmanaged
Browse files Browse the repository at this point in the history
This addresses an issue where internal speech processor state
was available after the speech context left the "externally
managed" state, sometimes leading to a false reactivation if
a wakeword activation was pending before the stages' processing
of the audio buffer was interrupted.

Due to the new interface method required by this fix, it is a
breaking change.
  • Loading branch information
space-pope committed Aug 10, 2020
1 parent 38cdb55 commit 6fa5c80
Show file tree
Hide file tree
Showing 14 changed files with 115 additions and 31 deletions.
10 changes: 4 additions & 6 deletions src/main/java/io/spokestack/spokestack/ActivationTimeout.java
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,12 @@ private void deactivate(SpeechContext context) {
}

@Override
public void close() {
reset();
public void reset() {
close();
}

/**
* Reset the trigger's activity timer.
*/
public void reset() {
@Override
public void close() {
this.activeLength = 0;
}
}
14 changes: 12 additions & 2 deletions src/main/java/io/spokestack/spokestack/SpeechPipeline.java
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ public final class SpeechPipeline implements AutoCloseable {
private List<SpeechProcessor> stages;
private Thread thread;
private boolean running;
private boolean managed;

/**
* initializes a new speech pipeline instance.
Expand Down Expand Up @@ -228,9 +229,18 @@ private void dispatch() {
// fill the frame from the input
this.input.read(this.context, frame);

// dispatch the frame to the stages
if (!this.context.isManaged()) {
// when leaving the managed state, reset all stages internally
boolean isManaged = this.context.isManaged();
if (this.managed && !isManaged) {
for (SpeechProcessor stage : this.stages) {
stage.reset();
}
}
this.managed = isManaged;

// dispatch the frame to the stages
for (SpeechProcessor stage : this.stages) {
if (!this.managed) {
frame.rewind();
stage.process(this.context, frame);
}
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/io/spokestack/spokestack/SpeechProcessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,10 @@ public interface SpeechProcessor extends AutoCloseable {
* @throws Exception on error
*/
void process(SpeechContext context, ByteBuffer frame) throws Exception;

/**
* resets all state internal to the stage.
* @throws Exception on error
*/
void reset() throws Exception;
}
5 changes: 5 additions & 0 deletions src/main/java/io/spokestack/spokestack/SpeechSampler.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ public SpeechSampler(SpeechConfig config) throws Exception {
this.header.putInt(Integer.MAX_VALUE); // size of data chunk
}

@Override
public void reset() throws Exception {
close();
}

/**
* destroys the resources attached to the copmonent.
* @throws Exception on error
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,17 @@ Intent createRecognitionIntent() {
return intent;
}

@Override
public void reset() {
close();
}

@Override
public void close() {
this.taskHandler.run(() -> this.speechRecognizer.destroy());
this.taskHandler.run(() -> {
this.speechRecognizer.destroy();
this.speechRecognizer = null;
});
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,19 @@ public SpokestackCloudRecognizer(
.build();
}

@Override
public void reset() {
if (this.client.isConnected()) {
this.client.disconnect();
}
this.idleCount = 0;
this.active = false;
}

/**
* releases the resources associated with the recognizer.
*/
@Override
public void close() {
this.client.close();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,21 @@ private void configure(SpeechConfig speechConfig) throws Exception {
.build();
}

@Override
public void reset() {
if (this.request != null) {
this.request.onCompleted();
}
}

/**
* releases the resources associated with the recognizer.
* @throws Exception on error
*/
public void close() throws Exception {
if (this.request != null)
@Override
public void close() {
if (this.request != null) {
this.request.onCompleted();
}
this.client.close();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,15 @@ com.microsoft.cognitiveservices.speech.SpeechConfig createMsConfig(
return config;
}

@Override
public void reset() {
close();
}

/**
* releases the resources associated with the recognizer.
*/
@Override
public void close() {
if (this.audioStream != null) {
this.audioStream.close();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,25 @@ public void close() throws Exception {
this.detectModel.close();
}

@Override
public void reset() {
// empty the sample buffer, so that only contiguous
// speech samples are written to it
this.sampleWindow.reset();

// reset and fill the other buffers,
// which prevents them from lagging the detection
this.frameWindow.reset().fill(0);
this.encodeWindow.reset().fill(0);

// reset the encoder states
while (this.encodeModel.states().hasRemaining())
this.encodeModel.states().putFloat(0);

// reset the maximum posterior
this.posteriorMax = 0;
}

/**
* processes a frame of audio.
* @param context the current speech context
Expand All @@ -337,7 +356,7 @@ public void process(SpeechContext context, ByteBuffer buffer)
if (vadFall) {
if (!context.isActive())
trace(context);
reset(context);
reset();
}
}

Expand Down Expand Up @@ -460,24 +479,6 @@ private void activate(SpeechContext context) {
context.setActive(true);
}

private void reset(SpeechContext context) {
// empty the sample buffer, so that only contiguous
// speech samples are written to it
this.sampleWindow.reset();

// reset and fill the other buffers,
// which prevents them from lagging the detection
this.frameWindow.reset().fill(0);
this.encodeWindow.reset().fill(0);

// reset the encoder states
while (this.encodeModel.states().hasRemaining())
this.encodeModel.states().putFloat(0);

// reset the maximum posterior
this.posteriorMax = 0;
}

private void trace(SpeechContext context) {
context.traceInfo(String.format("wake: %f", this.posteriorMax));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,14 @@ else if (policyString.equals("very-aggressive"))
throw new OutOfMemoryError();
}

@Override
public void reset() {
}

/**
* destroys the unmanaged ans instance.
*/
@Override
public void close() {
destroy(this.ansHandle);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ public void close() {
destroy(this.agcHandle);
}

@Override
public void reset() {
}

/**
* processes a frame of audio.
* @param context the current speech context
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,12 @@ public void process(SpeechContext context, ByteBuffer frame) {
}
}

@Override
public void reset() {
this.runValue = false;
this.runLength = 0;
}

//-----------------------------------------------------------------------
// native interface
//-----------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,9 @@ public void process(SpeechContext context, ByteBuffer frame) {
this.isSpeech = context.isSpeech();
}
}

@Override
public void reset() {
this.isSpeech = false;
}
}
12 changes: 12 additions & 0 deletions src/test/java/io/spokestack/spokestack/SpeechPipelineTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,10 @@ public Stage(SpeechConfig config) {
open = true;
}

public void reset() {
close();
}

public void close() {
open = false;
}
Expand Down Expand Up @@ -322,6 +326,10 @@ public static class FailStage implements SpeechProcessor {
public FailStage(SpeechConfig config) {
}

public void reset() throws Exception {
close();
}

public void close() throws Exception {
throw new Exception("fail");
}
Expand All @@ -337,6 +345,10 @@ public ConfigRequiredStage(SpeechConfig config) {
config.getString("required-property");
}

public void reset() throws Exception {
close();
}

public void close() throws Exception {
throw new Exception("fail");
}
Expand Down

0 comments on commit 6fa5c80

Please sign in to comment.