In [4]:
# Function for processing data chunks
# 1) Normalize between -1 and 1
# 2) Bandpass 50-5000 Hz 
# 3) Extract MFCCs (40 of them)

from scipy import signal
import librosa
import numpy as np

RATE = 22050
sos = signal.butter(5, [50, 5000], 'bandpass', fs=RATE, output='sos')

def get_mfccs(audio): 
   
    try:
        audio = 2*((audio-min(audio))/(max(audio)-min(audio)))-1

        #Filter
        audio = signal.sosfilt(sos, audio) 

        mfccs = librosa.feature.mfcc(y=audio, sr=RATE, n_mfcc=40)

    except Exception as e:
            print("Error extracting features")
            return None
     
    return mfccs

In [5]:
# Initialize pyaudio settings

import pyaudio
#import struct
#import time
#import librosa.display

RATE = 22050
CHUNK = RATE*3
FORMAT = pyaudio.paInt16
CHANNELS = 1

# Identify which input is the mic - use to change input_device_index below
p = pyaudio.PyAudio()

chosen_device_index = -1
for x in range(0,p.get_device_count()):
    info = p.get_device_info_by_index(x)
    print(p.get_device_info_by_index(x))
  #  if info["name"] == "Microphone (Realtek High Defini":
  #      chosen_device_index = info["index"]
  #      print("Chosen index: ", chosen_device_index)

{'index': 0, 'structVersion': 2, 'name': 'HDA Intel PCH: ALC255 Analog (hw:0,0)', 'hostApi': 0, 'maxInputChannels': 2, 'maxOutputChannels': 2, 'defaultLowInputLatency': 0.005804988662131519, 'defaultLowOutputLatency': 0.005804988662131519, 'defaultHighInputLatency': 0.034829931972789115, 'defaultHighOutputLatency': 0.034829931972789115, 'defaultSampleRate': 44100.0}
{'index': 1, 'structVersion': 2, 'name': 'HDA NVidia: HDMI 0 (hw:1,3)', 'hostApi': 0, 'maxInputChannels': 0, 'maxOutputChannels': 8, 'defaultLowInputLatency': -1.0, 'defaultLowOutputLatency': 0.005804988662131519, 'defaultHighInputLatency': -1.0, 'defaultHighOutputLatency': 0.034829931972789115, 'defaultSampleRate': 44100.0}
{'index': 2, 'structVersion': 2, 'name': 'HDA NVidia: HDMI 1 (hw:1,7)', 'hostApi': 0, 'maxInputChannels': 0, 'maxOutputChannels': 8, 'defaultLowInputLatency': -1.0, 'defaultLowOutputLatency': 0.005804988662131519, 'defaultHighInputLatency': -1.0, 'defaultHighOutputLatency': 0.034829931972789115, 'defaul

In [7]:
# load model

from keras.models import load_model

modelSave = '/home/praveen/Desktop/siren/siren_detector_V2.h5' # location of the trained model
model = load_model(modelSave)

2022-10-06 20:38:51.043778: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-06 20:38:51.044751: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-10-06 20:38:51.044929: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2022-10-06 20:38:51.045093: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2022-10-06 20:38:51.045260: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Co

In [9]:
# Real time detection
# 1) reads in 3-second audio chunk (defined by CHUNK)
# 2) gets MFCCs
# 3) runs MFCCs through keras model to predict probabilities 
# 4) Determines whether siren was present based on threshold

stream = p.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input_device_index=16, # CHANGE
    input=True,
    output=True,
    frames_per_buffer=CHUNK
 )


import sounddevice as sd

num_rows = 40
num_columns = 130
num_channels = 1

#alldata = [] # Used to play back audio for testing 

prob_thresh = 0.98 # probability threshold for detecting the siren

while True:
#for i in range(0,5):
    data = stream.read(CHUNK, exception_on_overflow=False)
    data_int = np.frombuffer(data, dtype=np.int16)
    
    #alldata = np.append(alldata, data_int) 
    
    data_int = get_mfccs(data_int)

    prediction_feature = data_int.reshape(1, num_rows, num_columns, num_channels)

    predicted_vector = model.predict(prediction_feature)
    # predicted_proba_vector = model.predict_proba(prediction_feature)

    #print(predicted_vector)
    
    print("not_siren: ", predicted_vector[0][0], "siren: ", predicted_vector[0][1])
    
    if predicted_vector[0][1] > prob_thresh:
        print('SIREN!!!')
    else:
        print('No siren. Carry on.')


not_siren:  1.0 siren:  5.14726e-25
No siren. Carry on.
not_siren:  1.0 siren:  3.9633612e-11
No siren. Carry on.


  audio = 2*((audio-min(audio))/(max(audio)-min(audio)))-1


not_siren:  0.035083972 siren:  0.964916
No siren. Carry on.
not_siren:  0.7322984 siren:  0.2677017
No siren. Carry on.
not_siren:  0.012261801 siren:  0.98773825
SIREN!!!
not_siren:  3.5570192e-05 siren:  0.9999645
SIREN!!!
not_siren:  0.019715784 siren:  0.9802843
SIREN!!!
not_siren:  0.09576182 siren:  0.9042382
No siren. Carry on.
not_siren:  0.9972052 siren:  0.0027947698
No siren. Carry on.
not_siren:  1.0 siren:  5.907053e-09
No siren. Carry on.
not_siren:  0.9999869 siren:  1.3063466e-05
No siren. Carry on.
not_siren:  1.0 siren:  3.7241244e-13
No siren. Carry on.
not_siren:  1.0 siren:  2.5668764e-19
No siren. Carry on.
not_siren:  1.0 siren:  8.9518636e-12
No siren. Carry on.
not_siren:  1.0 siren:  5.2659174e-14
No siren. Carry on.
not_siren:  0.82952917 siren:  0.17047085
No siren. Carry on.
not_siren:  0.99999666 siren:  3.3883914e-06
No siren. Carry on.


KeyboardInterrupt: 

In [10]:
# play back recorded audio for testing purposes

import sounddevice as sd
sd.play(alldata, RATE)



NameError: name 'alldata' is not defined