feat: allow speech pipeline to be paused

This adds `pause` and `resume` methods to the speech pipeline to allow listening to be temporarily suspsended without fully releasing the pipeline's resources. In turn, `prepare` and `release` in the `Spokestack` wrapper have been removed, with `start` and `stop` controlling the resources of all modules instead of just the speech pipeline. `pause` and `resume` have been threaded through the wrapper and are automatically called by TTS events.
spokestack · Dec 16, 2020 · cc28147 · cc28147
1 parent 14123ce
commit cc28147
Show file tree

Hide file tree

Showing 5 changed files with 257 additions and 81 deletions.
diff --git a/src/main/java/io/spokestack/spokestack/SpeechPipeline.java b/src/main/java/io/spokestack/spokestack/SpeechPipeline.java
@@ -67,6 +67,7 @@ public final class SpeechPipeline implements AutoCloseable {
      */
     public static final int DEFAULT_BUFFER_WIDTH = 20;
 
+    private final Object lock = new Object();
     private final String inputClass;
     private final List<String> stageClasses;
     private final SpeechConfig config;
@@ -75,6 +76,7 @@ public final class SpeechPipeline implements AutoCloseable {
     private List<SpeechProcessor> stages;
     private Thread thread;
     private boolean running;
+    private boolean paused;
     private boolean managed;
 
     /**
@@ -117,10 +119,18 @@ public SpeechContext getContext() {
     }
 
     /**
-     * @return true if the pipeline has been started, false otherwise.
+     * @return true if the pipeline has been started and is not paused,
+     * false otherwise.
      */
     public boolean isRunning() {
-        return this.running;
+        return this.running && !isPaused();
+    }
+
+    /**
+     * @return true if the pipeline has been paused, false otherwise.
+     */
+    public boolean isPaused() {
+        return this.paused;
     }
 
     /** manually activate the speech pipeline. */
@@ -222,6 +232,40 @@ private void startThread() throws Exception {
         this.thread.start();
     }
 
+    /**
+     * Pauses the speech pipeline, temporarily stopping passive listening.
+     *
+     * <p>
+     * Note that active listening (an active ASR stage) cannot be paused, so
+     * the pipeline is deactivated before it is paused. This may prevent the
+     * delivery of a
+     * {@link io.spokestack.spokestack.SpeechContext.Event#RECOGNIZE} event if
+     * an ASR request is currently in progress.
+     * </p>
+     *
+     * <p>
+     * While paused, the pipeline will not respond to the wakeword, but
+     * in order to support a quick {@link #resume()}, it will retain control
+     * of the microphone. No audio is explicitly read or analyzed. To fully
+     * release the pipeline's resources, see {@link #stop()}.
+     * </p>
+     */
+    public void pause() {
+        deactivate();
+        this.paused = true;
+    }
+
+    /**
+     * Resumes a paused speech pipeline, returning the pipeline to a passive
+     * listening state.
+     */
+    public void resume() {
+        this.paused = false;
+        synchronized (lock) {
+            lock.notify();
+        }
+    }
+
     /**
      * stops the speech pipeline and releases all resources.
      */
@@ -238,9 +282,24 @@ public void stop() {
     }
 
     private void run() {
-        while (this.running)
+        synchronized (lock) {
+            while (this.running) {
+                step();
+            }
+            cleanup();
+        }
+    }
+
+    private void step() {
+        if (this.paused) {
+            try {
+                lock.wait();
+            } catch (InterruptedException e) {
+                this.running = false;
+            }
+        } else {
             dispatch();
-        cleanup();
+        }
     }
 
     private void dispatch() {

diff --git a/src/main/java/io/spokestack/spokestack/Spokestack.java b/src/main/java/io/spokestack/spokestack/Spokestack.java
@@ -9,6 +9,7 @@
 import io.spokestack.spokestack.nlu.tensorflow.parsers.IntegerParser;
 import io.spokestack.spokestack.nlu.tensorflow.parsers.SelsetParser;
 import io.spokestack.spokestack.tts.SynthesisRequest;
+import io.spokestack.spokestack.tts.TTSEvent;
 import io.spokestack.spokestack.tts.TTSManager;
 import io.spokestack.spokestack.util.AsyncResult;
 import io.spokestack.spokestack.util.EventTracer;
@@ -171,30 +172,90 @@ public SpeechPipeline getSpeechPipeline() {
     }
 
     /**
-     * Starts the speech pipeline in order to process user input via the
-     * microphone (or chosen input class).
+     * Prepares all registered Spokestack modules for use and starts the
+     * speech pipeline in passive listening mode.
      *
-     * @throws Exception if there is an error configuring or starting the speech
-     *                   pipeline.
+     * @throws Exception if there is an error configuring or starting a module.
      */
     public void start() throws Exception {
         if (this.speechPipeline != null) {
             this.speechPipeline.start();
         }
+        if (this.nlu != null) {
+            this.nlu.prepare();
+        }
+        if (this.tts != null) {
+            this.tts.prepare();
+        }
     }
 
     /**
-     * Stops the speech pipeline and releases all its internal resources.
+     * Pauses the speech pipeline, suspending passive listening. This can be
+     * useful for scenarios where you expect false positives for the wakeword
+     * to be possible.
+     *
+     * <p>
+     * This method will implicitly deactivate the pipeline, canceling any
+     * in-flight ASR requests.
+     * </p>
      *
      * <p>
-     * This is useful for stopping passive listening (listening for wakeword
-     * activation); for fully releasing <em>all</em> internal resources held by
-     * Spokestack, see {@link #release()}.
+     * This method is called automatically when Spokestack is playing a TTS
+     * prompt if Spokestack is managing audio playback.
      * </p>
+     *
+     * @see SpeechPipeline#pause()
      */
-    public void stop() {
+    public void pause() {
         if (this.speechPipeline != null) {
-            this.speechPipeline.stop();
+            this.speechPipeline.pause();
+        }
+    }
+
+    /**
+     * Resumes a paused speech pipeline, returning it to a passive listening
+     * state.
+     *
+     * <p>
+     * This method is called automatically when Spokestack finishes playing a
+     * TTS prompt if Spokestack is managing audio playback.
+     * </p>
+     *
+     * @see SpeechPipeline#resume()
+     */
+    public void resume() {
+        if (this.speechPipeline != null) {
+            this.speechPipeline.resume();
+        }
+    }
+
+    /**
+     * Stops the speech pipeline and releases internal resources held by all
+     * registered Spokestack modules.
+     *
+     * <p>
+     * In order to support restarting Spokestack (calling {@link #start()} after
+     * this method), this method does not clear registered
+     * listeners. To do this, close then destroy the current Spokestack
+     * instance and build a new one.
+     * </p>
+     */
+    public void stop() {
+        closeSafely(this.speechPipeline);
+        closeSafely(this.nlu);
+        closeSafely(this.tts);
+    }
+
+    private void closeSafely(AutoCloseable module) {
+        if (module == null) {
+            return;
+        }
+        try {
+            module.close();
+        } catch (Exception e) {
+            for (SpokestackAdapter listener : this.listeners) {
+                listener.onError(e);
+            }
         }
     }
 
@@ -365,82 +426,34 @@ private AsyncResult<NLUResult> classifyInternal(String text) {
         return result;
     }
 
-    /**
-     * Prepares all registered Spokestack modules for use.
-     *
-     * <p>
-     * Calling this method is only necessary if internal resources have been
-     * released via {@link #close()} or {@link #release()}.
-     * </p>
-     *
-     * <p>
-     * The speech pipeline is not modified by this method since it
-     * manages its own resources via {@link #start()} and {@link #stop()},
-     * and some of its components are designed to be used immediately after
-     * construction.
-     * </p>
-     *
-     * @throws Exception if there is an error configuring or starting a module.
-     */
-    public void prepare() throws Exception {
-        if (this.nlu != null) {
-            this.nlu.prepare();
-        }
-        if (this.tts != null) {
-            this.tts.prepare();
+    @Override
+    public void eventReceived(@NotNull TTSEvent event) {
+        switch (event.type) {
+            case PLAYBACK_STARTED:
+                pause();
+                break;
+            case PLAYBACK_STOPPED:
+                resume();
+                break;
+            default:
+                // do nothing
+                break;
         }
     }
 
     /**
      * Release internal resources held by all registered Spokestack modules.
      *
      * <p>
-     * If Spokestack is needed again after this method is called,
-     * {@link #prepare()} <em>must</em> be called to reconstruct the modules.
-     * </p>
-     *
-     * <p>
-     * In order to support such restarts, this method does not clear registered
+     * In order to support restarting Spokestack (calling {@link #start()} after
+     * this method), this method does not clear registered
      * listeners. To do this, close then destroy the current Spokestack
      * instance and build a new one.
      * </p>
      */
     @Override
     public void close() {
-        release();
-    }
-
-    /**
-     * Release internal resources held by all registered Spokestack modules.
-     *
-     * <p>
-     * If Spokestack is needed again after this method is called,
-     * {@link #prepare()} <em>must</em> be called to reconstruct the modules.
-     * </p>
-     *
-     * <p>
-     * In order to support such restarts, this method does not clear registered
-     * listeners. To do this, close then destroy the current Spokestack
-     * instance and build a new one.
-     * </p>
-     */
-    public void release() {
-        closeSafely(this.speechPipeline);
-        closeSafely(this.nlu);
-        closeSafely(this.tts);
-    }
-
-    private void closeSafely(AutoCloseable module) {
-        if (module == null) {
-            return;
-        }
-        try {
-            module.close();
-        } catch (Exception e) {
-            for (SpokestackAdapter listener : this.listeners) {
-                listener.onError(e);
-            }
-        }
+        stop();
     }
 
     /**

diff --git a/src/test/java/io/spokestack/spokestack/SpeechPipelineTest.java b/src/test/java/io/spokestack/spokestack/SpeechPipelineTest.java
@@ -12,6 +12,7 @@
 import org.junit.Test;
 import org.junit.jupiter.api.function.Executable;
 import static org.junit.jupiter.api.Assertions.*;
+import static io.spokestack.spokestack.SpeechTestUtils.FreeInput;
 
 public class SpeechPipelineTest implements OnSpeechEventListener {
     private static final List<Class<?>> PROFILES = Arrays.asList(
@@ -162,6 +163,41 @@ public void testStartStop() throws Exception {
         assertFalse(Stage.open);
     }
 
+    @Test
+    public void testPause() throws Exception {
+        final SpeechPipeline pipeline = new SpeechPipeline.Builder()
+              .setInputClass("io.spokestack.spokestack.SpeechTestUtils$FreeInput")
+              .setProperty("sample-rate", 16000)
+              .setProperty("frame-width", 20)
+              .setProperty("buffer-width", 300)
+              .setProperty("trace-level", EventTracer.Level.DEBUG.value())
+              .build();
+
+        // startup
+        int frames = FreeInput.counter;
+        assertEquals(frames, 0);
+        pipeline.start();
+        assertTrue(pipeline.isRunning());
+        Thread.sleep(5);
+        assertTrue(FreeInput.counter > frames);
+
+        // we won't get any more frames if we're paused
+        pipeline.pause();
+
+        // wait for the pause to take effect
+        Thread.sleep(10);
+        frames = FreeInput.counter;
+
+        // wait some more to make sure we don't get any more frames
+        Thread.sleep(15);
+        assertEquals(FreeInput.counter, frames);
+
+        // after resuming, frames should start increasing almost immediately
+        pipeline.resume();
+        Thread.sleep(5);
+        assertTrue(FreeInput.counter > frames);
+    }
+
     @Test
     public void testInputFailure() throws Exception {
         SpeechPipeline pipeline = new SpeechPipeline.Builder()

diff --git a/src/test/java/io/spokestack/spokestack/SpeechTestUtils.java b/src/test/java/io/spokestack/spokestack/SpeechTestUtils.java
@@ -0,0 +1,26 @@
+package io.spokestack.spokestack;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Test classes related to the speech pipeline used in more than one test
+ * suite.
+ */
+public class SpeechTestUtils {
+
+    public static class FreeInput implements SpeechInput {
+        public static int counter;
+
+        public FreeInput(SpeechConfig config) {
+            counter = 0;
+        }
+
+        public void close() {
+            counter = -1;
+        }
+
+        public void read(SpeechContext context, ByteBuffer frame) {
+            frame.putInt(0, ++counter);
+        }
+    }
+}