In [12]:
pip install sounddevice numpy scipy google-cloud-speech ipywidgets

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import os
import time
import numpy as np
import sounddevice as sd
from scipy.io.wavfile import write
from google.cloud import speech
import io
import ipywidgets as widgets
from IPython.display import display, clear_output
import threading

# Set your Google Cloud credentials path here
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "peak-argon-465313-n8-23d40d862fe5.json"

fs = 44100  # sample rate
recording = False
audio_data = []

# Output widget
output_area = widgets.Output()

# Record for fixed duration
def record_fixed_duration(b):
    global recording
    if recording:
        return
    duration = duration_input.value
    filename = f"recording_{int(time.time())}.wav"
    with output_area:
        clear_output()
        print(f"Recording for {duration} seconds...")
    recording = True
    recording_data = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
    sd.wait()
    write(filename, fs, recording_data)
    with output_area:
        print(f"Recording saved as {filename}")
        print("Transcribing...")
    try:
        transcript = transcribe_google_speech(filename)
        with output_area:
            print("Transcription:", transcript)
    except Exception as e:
        with output_area:
            print("Error during transcription:", e)
    recording = False

# Manual recording
def start_manual_recording(b):
    global recording, audio_data
    if recording:
        return
    audio_data = []
    recording = True
    with output_area:
        clear_output()
        print("Recording... (Press Stop to finish)")
    thread = threading.Thread(target=manual_record)
    thread.start()

def manual_record():
    global recording, audio_data
    with sd.InputStream(samplerate=fs, channels=1, dtype='int16', callback=append_audio):
        while recording:
            sd.sleep(100)
    # Once recording stops, save audio and transcribe
    audio_np = np.concatenate(audio_data, axis=0)
    filename = f"recording_manual_{int(time.time())}.wav"
    write(filename, fs, audio_np)
    with output_area:
        print(f"Recording saved as {filename}")
        print("Transcribing...")
    try:
        transcript = transcribe_google_speech(filename)
        with output_area:
            print("Transcription:", transcript)
    except Exception as e:
        with output_area:
            print("Error during transcription:", e)

def append_audio(indata, frames, time_, status):
    global audio_data
    audio_data.append(indata.copy())

def stop_manual_recording(b):
    global recording
    if recording:
        recording = False
        with output_area:
            print("Stopped recording.")

# Google Speech-to-Text transcription function
def transcribe_google_speech(filename):
    client = speech.SpeechClient()
    with io.open(filename, "rb") as audio_file:
        content = audio_file.read()
    audio = speech.RecognitionAudio(content=content)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=fs,
        language_code="en-US",            
        enable_automatic_punctuation=True
    )
    response = client.recognize(config=config, audio=audio)
    transcript = ""
    for result in response.results:
        transcript += result.alternatives[0].transcript + " "
    return transcript.strip()

# Widgets setup
duration_input = widgets.IntSlider(value=5, min=1, max=30, description="Duration (sec):")
record_btn = widgets.Button(description="Record Fixed Duration")
manual_btn = widgets.Button(description="Record with Stop Button")
stop_btn = widgets.Button(description="Stop Recording", disabled=True)

record_btn.on_click(record_fixed_duration)
manual_btn.on_click(start_manual_recording)
stop_btn.on_click(stop_manual_recording)

# Display widgets and output
display(duration_input, record_btn, manual_btn, stop_btn, output_area)

IntSlider(value=5, description='Duration (sec):', max=30, min=1)

Button(description='Record Fixed Duration', style=ButtonStyle())

Button(description='Record with Stop Button', style=ButtonStyle())

Button(description='Stop Recording', disabled=True, style=ButtonStyle())

Output()