In [16]:
from keras.models import load_model
import tensorflow as tf
import numpy as np
from vggish_input import waveform_to_examples, wavfile_to_examples
import ubicoustics
import pyaudio
from pathlib import Path
import time
import argparse
import wget
from scipy.io import wavfile

# Variables
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = RATE
MICROPHONES_DESCRIPTION = []
FPS = 60.0

###########################
# Checl Microphone
###########################
print("=====")
print("1 / 2: Checking Microphones... ")
print("=====")

import microphones
desc, mics, indices = microphones.list_microphones()
if (len(mics) == 0):
    print("Error: No microphone found.")
    exit()

#############
# Read Command Line Args
#############
MICROPHONE_INDEX = indices[0]
# parser = argparse.ArgumentParser()
# parser.add_argument("-m", "--mic", help="Select which microphone / input device to use")
# args = parser.parse_args()
# try:
#     if args.mic:
#         MICROPHONE_INDEX = int(args.mic)
#         print("User selected mic: %d" % MICROPHONE_INDEX)
#     else:
#         mic_in = input("Select microphone [%d]: " % MICROPHONE_INDEX).strip()
#         if (mic_in!=''):
#             MICROPHONE_INDEX = int(mic_in)
# except:
#     print("Invalid microphone")
#     exit()

# Find description that matches the mic index
mic_desc = ""
for k in range(len(indices)):
    i = indices[k]
    if (i==MICROPHONE_INDEX):
        mic_desc = mics[k]
print("Using mic: %s" % mic_desc)



=====
1 / 2: Checking Microphones... 
=====
Using mic: # 0 - Microsoft Sound Mapper - Input


In [2]:
###########################
# Download model, if it doesn't exist
###########################
MODEL_URL = "https://www.dropbox.com/s/cq1d7uqg0l28211/example_model.hdf5?dl=1"
MODEL_PATH = "models/example_model.hdf5"
print("=====")
print("2 / 2: Checking model... ")
print("=====")
model_filename = "models/example_model.hdf5"
ubicoustics_model = Path(model_filename)
if (not ubicoustics_model.is_file()):
    print("Downloading example_model.hdf5 [867MB]: ")
    wget.download(MODEL_URL,MODEL_PATH)

##############################
# Load Deep Learning Model
##############################
print("Using deep learning model: %s" % (model_filename))
model = load_model(model_filename)
graph = tf.get_default_graph()
context = ubicoustics.everything

label = dict()
for k in range(len(context)):
    label[k] = context[k]


=====
2 / 2: Checking model... 
=====
Using deep learning model: models/example_model.hdf5
--label
{0: 'dog-bark', 1: 'drill', 2: 'hazard-alarm', 3: 'phone-ring', 4: 'speech', 5: 'vacuum', 6: 'baby-cry', 7: 'chopping', 8: 'cough', 9: 'door', 10: 'water-running', 11: 'knock', 12: 'microwave', 13: 'shaver', 14: 'toothbrush', 15: 'blender', 16: 'dishwasher', 17: 'doorbell', 18: 'flush', 19: 'hair-dryer', 20: 'laugh', 21: 'snore', 22: 'typing', 23: 'hammer', 24: 'car-horn', 25: 'engine', 26: 'saw', 27: 'cat-meow', 28: 'alarm-clock', 29: 'cooking'}




In [24]:
##############################
# Setup Audio Callback
##############################
def audio_samples(in_data, frame_count, time_info, status_flags):
    global graph
    np_wav = np.fromstring(in_data, dtype=np.int16) / 32768.0 # Convert to [-1.0, +1.0]
    x = waveform_to_examples(np_wav, RATE)
    predictions = []
    with graph.as_default():
        if x.shape[0] != 0:
            x = x.reshape(len(x), 96, 64, 1)
            pred = model.predict(x)
            predictions.append(pred)

        for prediction in predictions:
            m = np.argmax(prediction[0])
            if (m < len(label)):
                p = label[m]
                print("Prediction: %s (%0.2f)" % (ubicoustics.to_human_labels[label[m]], prediction[0,m]))
                n_items = prediction.shape[1]
            else:
                print("KeyError: %s" % m)

    return (in_data, pyaudio.paContinue)

##############################
# Prediction Function
##############################
def predict_wav(in_data, sample_rate=RATE):
    global graph
    
#     np_wav = np.fromstring(in_data, dtype=np.int16) / 32768.0 # Convert to [-1.0, +1.0]
#     x = waveform_to_examples(np_wav, sample_rate)
    
    assert in_data.dtype == np.int16, 'Bad sample type: %r' % in_data.dtype
    np_wav = in_data / 32768.0  # Convert to [-1.0, +1.0]
    x = waveform_to_examples(np_wav, sample_rate)
    
    predictions = []
    output_list = []
    with graph.as_default():

        x = x.reshape(len(x), 96, 64, 1)
        predictions = model.predict(x)

        for k in range(len(predictions)):
            prediction = predictions[k]
            m = np.argmax(prediction)
#             print("Prediction: %s (%0.2f)" % (ubicoustics.to_human_labels[label[m]], prediction[m]))
            output_list.append([ubicoustics.to_human_labels[label[m]], prediction[m]])

    return output_list

In [26]:
##############################
# Prediction Function Usage
##############################
sr, wav_data = wavfile.read('example.wav')
out = predict_wav(wav_data, sr)
print(out)



  after removing the cwd from sys.path.


[['Coughing', 1.0], ['Coughing', 0.9999722], ['Coughing', 0.9982481], ['Coughing', 0.9999887], ['Coughing', 0.99977094], ['Toilet Flushing', 1.0], ['Toilet Flushing', 1.0], ['Toilet Flushing', 1.0], ['Water Running', 0.99865097], ['Water Running', 0.9999087], ['Water Running', 0.99999833], ['Water Running', 0.9994715], ['Water Running', 0.99999034], ['Water Running', 0.9999994], ['Water Running', 0.9999937], ['Water Running', 0.9999968], ['Knocking', 0.99953187], ['Knocking', 0.98939604], ['Knocking', 0.9062936]]
