In [1]:
import pyaudio
import os
import struct
import matplotlib.pyplot as plt
import time
import soundfile as sf
import numpy as np
from collections import deque


CHANNELS = 1
RATE = 16000
CHUNK = 1024 
THRESHOLD = -50    # THRESHOLD of the input audio for starting the recording in dB
SILENCE_LIMIT = 2
PREV_AUDIO = 0.5
FORMAT = pyaudio.paFloat32

In [2]:
def save_speech(data):
    """ Saves mic data to temporary WAV file. Returns filename of saved 
        file """
    from scipy.io.wavfile import write
    
    filename = 'output_'+str(int(time.time()))
    
    write(filename + 'output.wav', RATE, data)
    
    return filename + 'output.wav'

In [3]:
def db_np(x):
    e = np.sqrt(np.divide(np.sum(np.square(x)),CHUNK))
    dv_val = 20.0 * np.log10(e)
    return dv_val

In [4]:
def audio_int(num_samples=100):
    """ Gets average audio intensity of your mic sound.
    """
    print ("Getting intensity values from mic.")
    p = pyaudio.PyAudio()

    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)
    dv_val_tot = 0.
    for i in range(num_samples):
        cur_data = stream.read(CHUNK, exception_on_overflow = False)
        cur_data = np.fromstring(cur_data, dtype=np.float32)
        cur_data = np.clip(cur_data, -1, 1)
        
        
        dv_val = db_np(cur_data)
        if (dv_val != -np.inf):
            dv_val_tot = dv_val_tot + dv_val

    print (" Finished ")
    print (" Average audio db is ", dv_val_tot/num_samples)
    stream.close()
    p.terminate()
    return dv_val_tot/num_samples

In [5]:
def listen_for_speech(threshold=THRESHOLD, num_phrases=-1):
    
    """
    Listens to Microphone and records the audio sample
    """

    
    p = pyaudio.PyAudio()
    
    stream = p.open(format=pyaudio.paFloat32,
                channels=CHANNELS,
                rate=RATE, 
                output = True,
                input=True,
                frames_per_buffer=CHUNK)
    
    print ("* Listening mic. ")
    audio2send = []
    cur_data = ''  # current chunk  of audio data
    
    rel = int(RATE/CHUNK)  #16000/1024 = 15.625
    slid_win = deque(maxlen=SILENCE_LIMIT * rel)
    #Prepend audio from 0.5 seconds before noise was detected
    prev_audio = deque(maxlen=int(PREV_AUDIO * rel)) 
    
    started = False
    n = num_phrases
    response = []
    
    
    while (num_phrases == -1 or n > 0):
        
        cur_data = stream.read(CHUNK, exception_on_overflow = False)
        cur_data = np.fromstring(cur_data, dtype=np.float32)
        cur_data = np.clip(cur_data, -1, 1) 
        
        slid_win.append(db_np(cur_data))
                
        if(sum([x > THRESHOLD for x in slid_win]) > 0):
            if(not started):
                print ("Starting record of phrase")
                started = True
                
            audio2send.append(cur_data)
            
        elif (started is True):
            print ("Finished")
            
            # The limit was reached, finish capture and deliver.

            final_audio = list(prev_audio) + audio2send
  
            #save the audio for approporiate process @@@@@
            print ("audio prepared and saved")
            final_audio = np.asarray(final_audio, dtype = np.float32)
            final_audio = np.concatenate(final_audio)
            print (save_speech(final_audio))
            print (final_audio.shape)
            plt.plot (final_audio)
            plt.show()

                
            # Remove temp varialble
                
            del final_audio
            
            # Reset all
            
            started = False
            slid_win = deque(maxlen=SILENCE_LIMIT * rel)
            prev_audio = deque(maxlen=int(PREV_AUDIO * rel)) 
            audio2send = []
            
            n -= 1
            print ("Listening ...")
            
        else:
            
            prev_audio.append(cur_data)
            
    print ("* Done recording")
    stream.close()
    p.terminate()
    
    return response
            

In [6]:
audio_int()

Getting intensity values from mic.




 Finished 
 Average audio db is  -84.39243405972223


-84.39243405972223

In [None]:
listen_for_speech()