In [2]:
import sounddevice as sd
import soundfile as sf
import io
from pydub import AudioSegment
from pydub.silence import detect_nonsilent
import time
import numpy as np

# Constants
RATE = 16000
CHANNELS = 1
CHUNK = 480
SILENCE_THRESHOLD = -50
REQUIRED_NONSILENT = 100
RECORD_SECONDS = 1.0  # Record in longer chunks
SILENCE_PADDING_TIME = 0.5  # Time to keep recording after silence is detected

def is_speech(audio_data):
    audio_segment = AudioSegment(audio_data, sample_width=2, channels=CHANNELS, frame_rate=RATE)
    nonsilent_ranges = detect_nonsilent(audio_segment, min_silence_len=100, silence_thresh=SILENCE_THRESHOLD)

    if nonsilent_ranges and nonsilent_ranges[-1][1] - nonsilent_ranges[0][0] >= REQUIRED_NONSILENT:
        return True
    return False

def record_audio():
    audio_data = sd.rec(frames=int(RECORD_SECONDS * RATE), samplerate=RATE, channels=CHANNELS, dtype='int16')
    sd.wait()
    return audio_data

def save_to_byte_stream(audio_data):
    with io.BytesIO() as byte_stream:
        with sf.SoundFile(byte_stream, 'w', samplerate=RATE, channels=CHANNELS, format='WAV', subtype='PCM_16') as wav_file:
            wav_file.write(audio_data)
        byte_stream.seek(0)
        audio_segment = AudioSegment.from_file(byte_stream, format="wav")
    return audio_segment

def play_audio(audio_segment):
    print("Playing audio...")
    audio_data = np.array(audio_segment.get_array_of_samples(), dtype=np.int16)
    sd.play(audio_data, samplerate=RATE)
    sd.wait()

def main():
    while True:
        recorded_audio = AudioSegment.empty()
        recording = False
        silence_start_time = None

        while True:
            audio_data = record_audio()
            audio_segment = save_to_byte_stream(audio_data)

            if is_speech(audio_segment.raw_data):
                if not recording:
                    print("Voice detected")
                    recording = True
                recorded_audio += audio_segment
                silence_start_time = None
            else:
                if recording:
                    if silence_start_time is None:
                        silence_start_time = time.time()
                    elif time.time() - silence_start_time >= SILENCE_PADDING_TIME:
                        print("Silence detected")
                        break

        if len(recorded_audio) > 0:
            play_audio(recorded_audio)
        else:
            print("No voice detected")

        #time.sleep(0.5)  # Optional: Add a short delay between iterations

if __name__ == "__main__":
    main()

Voice detected
Silence detected
Playing audio...
Voice detected
Silence detected
Playing audio...
Voice detected
Silence detected
Playing audio...
Voice detected
Silence detected
Playing audio...
Voice detected
Silence detected
Playing audio...
Voice detected
Silence detected
Playing audio...
Voice detected
Silence detected
Playing audio...
Voice detected
Silence detected
Playing audio...
Voice detected
Silence detected
Playing audio...
Voice detected
Silence detected
Playing audio...


KeyboardInterrupt: 