In [8]:
!pip install gradio numpy vosk sounddevice
import gradio as gr
from vosk import Model, KaldiRecognizer
import json
import numpy as np
import queue
import sys
import vosk
import sounddevice as sd

# Change this to the path of your downloaded Vosk model
MODEL_PATH = "/Users/anitaonnuvel/Downloads/vosk-model-small-en-us-0.15" 

# Load the Vosk model
model = vosk.Model(MODEL_PATH)

# Set up a queue to store microphone data
q = queue.Queue()

# Define the sample rate for audio capture
samplerate = 16000

# Callback function for sounddevice to put audio data into the queue
def audio_callback(indata, frames, time, status):
    if status:
        print(status, file=sys.stderr)
    q.put(bytes(indata))

# The generator function for Gradio streaming
def transcribe_stream():
    # Initialize the Vosk recognizer inside the generator
    recognizer = vosk.KaldiRecognizer(model, samplerate)
    transcribed_text = ""

    # Start the audio stream
    with sd.RawInputStream(
        samplerate=samplerate,
        blocksize=8000,
        dtype="int16",
        channels=1,
        callback=audio_callback,
    ):
        while True:
            # Get audio chunks from the queue
            data = q.get()
            if recognizer.AcceptWaveform(data):
                # When a final result is recognized, update the text
                result = recognizer.Result()
                full_text = json.loads(result).get("text", "")
                if full_text:
                    transcribed_text += " " + full_text
                    yield transcribed_text.strip()
            else:
                # For partial results, update the text in real-time
                partial_result = recognizer.PartialResult()
                partial_text = json.loads(partial_result).get("partial", "")
                if partial_text:
                    yield transcribed_text.strip() + " " + partial_text
            
            # This is necessary for Gradio to update the output
            # yield None # This can be used to prevent rapid flickering if desired

# Build the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Live Vosk Speech-to-Text")
    gr.Markdown("Click 'Start Streaming' and speak into your microphone.")
    
    # Textbox to display the transcription
    output_textbox = gr.Textbox(label="Transcription", interactive=False)
    
    # Streaming Audio component for microphone input
    audio_input = gr.Audio(sources=["microphone"], streaming=True, visible=False)
    
    # The button to trigger the streaming function
    start_btn = gr.Button("Start Streaming")
    
    # Event listener to start the transcription stream when the button is clicked
    start_btn.click(
        fn=transcribe_stream,
        inputs=None,
        outputs=output_textbox,
    )
    
    # Example of how to use the `stream` event on an Audio component instead
    # audio_input.stream(fn=transcribe_stream, inputs=audio_input, outputs=output_textbox, show_progress="hidden")


    
demo.launch()



LOG (VoskAPI:ReadDataFiles():model.cc:213) Decoding params beam=10 max-active=3000 lattice-beam=2
LOG (VoskAPI:ReadDataFiles():model.cc:216) Silence phones 1:2:3:4:5:6:7:8:9:10
LOG (VoskAPI:RemoveOrphanNodes():nnet-nnet.cc:948) Removed 0 orphan nodes.
LOG (VoskAPI:RemoveOrphanComponents():nnet-nnet.cc:847) Removing 0 orphan components.
LOG (VoskAPI:ReadDataFiles():model.cc:248) Loading i-vector extractor from /Users/anitaonnuvel/Downloads/vosk-model-small-en-us-0.15/ivector/final.ie
LOG (VoskAPI:ComputeDerivedVars():ivector-extractor.cc:183) Computing derived variables for iVector extractor
LOG (VoskAPI:ComputeDerivedVars():ivector-extractor.cc:204) Done.
LOG (VoskAPI:ReadDataFiles():model.cc:282) Loading HCL and G from /Users/anitaonnuvel/Downloads/vosk-model-small-en-us-0.15/graph/HCLr.fst /Users/anitaonnuvel/Downloads/vosk-model-small-en-us-0.15/graph/Gr.fst
LOG (VoskAPI:ReadDataFiles():model.cc:303) Loading winfo /Users/anitaonnuvel/Downloads/vosk-model-small-en-us-0.15/graph/phone

* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.


