In [1]:
import sounddevice as sd
import numpy as np
import scipy.io.wavfile as wav

In [2]:
# --- Settings ---
FS = 44100               # Sampling frequency
THRESHOLD = 50          # Volume threshold for silence (adjust this)
SILENCE_DURATION = 1.5   # Seconds of silence before stopping (adjust this)
CHUNK_SIZE = 1024        # Process audio in chunks for efficiency
MIN_SPEECH_DURATION = 1  # Minimum duration of speech in seconds

In [3]:
# --- Functions ---
def is_silent(data):
    rms = np.sqrt(np.mean(data**2))
    print("RMS: ", rms)
    return rms < THRESHOLD

In [4]:
def record_speech():
    print("Recording... Speak now!")
    audio_data = np.array([], dtype=np.int16)  # Initialize empty array
    silent_chunks = 0
    speech_started = 0  # Flag to track if speech has started

    with sd.InputStream(samplerate=FS, channels=1, dtype='int16') as stream:
        while True:
            chunk, overflowed = stream.read(CHUNK_SIZE)
            if overflowed:
                print("Warning: Input overflowed!")

            # Start counting silent chunks only after speech has started
            if speech_started == 0:
                if not is_silent(chunk):
                    speech_started = True
                    print("Speech detected, starting silence detection.")
            else:  # Speech has started
                audio_data = np.append(audio_data, chunk)
                if is_silent(chunk):
                    silent_chunks += 1
                else:
                    silent_chunks = 0  # Reset silent chunk counter if speech detected

            if silent_chunks > int(SILENCE_DURATION * FS / CHUNK_SIZE):
                print("Silence detected, stopping recording.")
                break
    
    wav.write("TEST.wav", FS, audio_data)

    return "TEST.wav"

In [6]:
# --- Save Recording ---
e = record_speech()
print("Recording saved to 'recording.wav'")

Recording... Speak now!
RMS:  15.71769380149327
RMS:  11.797940763116248
RMS:  8.877860454523939
RMS:  14.145173646866269
RMS:  10.257761923904258
RMS:  8.144927485865052
RMS:  7.242722640692518
RMS:  7.75541645400168
RMS:  7.365128456958507
RMS:  7.72791712720834
RMS:  8.538801602830459
RMS:  10.99826868477489
RMS:  7.746029728996655
RMS:  6.1141485506977995
RMS:  5.925481256826318
RMS:  7.4401912147807066
RMS:  7.78219120813669
RMS:  8.478085260688289
RMS:  9.195415382814417
RMS:  7.434085350599628
RMS:  9.649977736373282
RMS:  8.395962571379174
RMS:  6.582039363867403
RMS:  6.548347835523095
RMS:  5.894747397047646
RMS:  8.250414288840288
RMS:  12.927645703588105
RMS:  10.854776454400154
RMS:  8.078835196053946
RMS:  6.955284074895863
RMS:  8.221244868479468
RMS:  9.441276148514035
RMS:  10.550011107814058
RMS:  11.796202381487019
RMS:  13.030800351954595
RMS:  6.901228377977358
RMS:  12.681186852972399
RMS:  5.87308811976289
RMS:  6.9761731764270305
RMS:  6.970781654520532
RMS:  9.

  rms = np.sqrt(np.mean(data**2))


RMS:  26.435154299010627
RMS:  51.51258948063085
RMS:  48.70504738345914
RMS:  34.64377875246868
RMS:  31.051305830029115
RMS:  31.22669443460995
RMS:  33.29978943720966
RMS:  55.998944953789085
RMS:  44.21850706647048
RMS:  45.553614960148884
RMS:  39.3323057223639
RMS:  32.72009309564232
RMS:  21.364123694876884
RMS:  nan
RMS:  27.183943732928082
RMS:  nan
RMS:  nan
RMS:  12.110010063992515
RMS:  54.043051053650736
RMS:  59.22669801544317
RMS:  43.41295394594222
RMS:  44.376694509900126
RMS:  nan
RMS:  30.417248139419186
RMS:  27.597589625273436
RMS:  52.56533323220257
RMS:  69.0348997450746
RMS:  65.61078715138997
RMS:  66.93158383052429
RMS:  57.81672281874856
RMS:  30.41703945262425
RMS:  28.177347118296996
RMS:  40.34770749373501
RMS:  63.67155866485836
RMS:  61.94390891413376
RMS:  58.96904809145642
RMS:  19.738253033450047
RMS:  nan
RMS:  24.771337077860775
RMS:  32.87537131444297
RMS:  47.06606892178271
RMS:  53.763833612266716
RMS:  51.28173560099151
RMS:  65.73118641244429
R