In [1]:
import sounddevice as sd
import numpy as np
import scipy.io.wavfile as wav

In [2]:
# --- Settings ---
FS = 44100               # Sampling frequency
THRESHOLD = 50          # Volume threshold for silence (adjust this)
SILENCE_DURATION = 1.5   # Seconds of silence before stopping (adjust this)
CHUNK_SIZE = 1024        # Process audio in chunks for efficiency
MIN_SPEECH_DURATION = 1  # Minimum duration of speech in seconds

In [3]:
# --- Functions ---
def is_silent(data):
    rms = np.sqrt(np.mean(data**2))
    print("RMS: ", rms)
    return rms < THRESHOLD

In [4]:
def record_speech():
    print("Recording... Speak now!")
    audio_data = np.array([], dtype=np.int16)  # Initialize empty array
    silent_chunks = 0
    speech_started = 0  # Flag to track if speech has started

    with sd.InputStream(samplerate=FS, channels=1, dtype='int16') as stream:
        while True:
            chunk, overflowed = stream.read(CHUNK_SIZE)
            if overflowed:
                print("Warning: Input overflowed!")

            # Start counting silent chunks only after speech has started
            if speech_started == 0:
                if not is_silent(chunk):
                    speech_started = True
                    print("Speech detected, starting silence detection.")
            else:  # Speech has started
                audio_data = np.append(audio_data, chunk)
                if is_silent(chunk):
                    silent_chunks += 1
                else:
                    silent_chunks = 0  # Reset silent chunk counter if speech detected

            if silent_chunks > int(SILENCE_DURATION * FS / CHUNK_SIZE):
                print("Silence detected, stopping recording.")
                break
    
    wav.write("TEST.wav", FS, audio_data)

    return "TEST.wav"

In [5]:
# --- Save Recording ---
e = record_speech()
print("Recording saved to 'recording.wav'")

Recording... Speak now!
RMS:  25.314371072821462
RMS:  10.131219154795735
RMS:  12.708726086827113
RMS:  18.4261617679863
RMS:  11.373755082975016
RMS:  10.858239598917496
RMS:  18.9082385317882
RMS:  10.876257111249256
RMS:  10.119983325085077
RMS:  14.019691787357523
RMS:  15.308385926919271
RMS:  16.81729137116022
RMS:  20.961669818087014
RMS:  23.444124063931245
RMS:  21.182959506110095
RMS:  19.384271974980127
RMS:  15.972663414800301
RMS:  15.56940108032419
RMS:  15.71669966747154
RMS:  24.941259115168986
RMS:  18.04951955731786
RMS:  25.016576535619336
RMS:  21.345511457329852
RMS:  17.972010573736597
RMS:  21.39847030163605
RMS:  18.83594884124503
RMS:  17.95194866685787
RMS:  18.678640249292773
RMS:  15.195117688339897
RMS:  15.113858241279756
RMS:  14.292835836530132
RMS:  16.57682163820918
RMS:  14.019482816334559
RMS:  16.182914408412348
RMS:  13.859225266947645
RMS:  12.469650656694437
RMS:  13.703365519553946
RMS:  17.70364214822758
RMS:  14.16393968851887
RMS:  7.4438654

  rms = np.sqrt(np.mean(data**2))


RMS:  37.62414347031704
RMS:  23.87688147036794
RMS:  7.183014904968526
RMS:  6.864480588507772
RMS:  45.2999120446166
RMS:  21.89224766469857
RMS:  28.36808148033631
RMS:  36.538386813562255
RMS:  27.576951993467297
RMS:  35.75738833181892
RMS:  18.974129179622448
RMS:  36.33908772761088
RMS:  29.962981718071717
RMS:  30.975781240147924
RMS:  43.88325687975996
RMS:  54.118829036205135
RMS:  62.11157425351575
RMS:  62.77868337212481
RMS:  38.61444283312269
RMS:  18.8050493967046
RMS:  16.636861370162343
RMS:  9.957870238534944
RMS:  10.091040658054053
RMS:  11.850838907541524
RMS:  16.272820514588123
RMS:  15.22548618887095
RMS:  8.171501545003832
RMS:  12.030397697083833
RMS:  6.721293123350595
RMS:  9.213928824068482
RMS:  9.236370871451623
RMS:  9.512430190282608
RMS:  11.773911839316616
RMS:  12.395146164426622
RMS:  11.997517646892627
RMS:  7.52398508521249
RMS:  10.14412738053402
RMS:  10.111246445789956
RMS:  7.268363059348371
RMS:  10.155721140076661
RMS:  8.243368546898774
RMS