In [1]:
import wave
import os
import io
import pathlib
from pathlib import Path
from scipy.io import wavfile
import sounddevice as sd
import time
import librosa as lb
import librosa.display as dsp
import numpy as np
import matplotlib.pyplot as plt
from os import path, listdir
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, models

In [2]:
model = keras.models.load_model('model')

In [3]:
model.summary()

Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_57 (Conv2D)           (None, 30, 30, 32)        320       
_________________________________________________________________
max_pooling2d_41 (MaxPooling (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_58 (Conv2D)           (None, 13, 13, 64)        18496     
_________________________________________________________________
max_pooling2d_42 (MaxPooling (None, 6, 6, 64)          0         
_________________________________________________________________
conv2d_59 (Conv2D)           (None, 4, 4, 64)          36928     
_________________________________________________________________
max_pooling2d_43 (MaxPooling (None, 2, 2, 64)          0         
_________________________________________________________________
conv2d_60 (Conv2D)           (None, 1, 1, 64)        

In [4]:
CHUNK_SIZE = 1000
SAMPLING_FREQUENCY = 16000
REAL_CATEGORIES = ["bed", "bird", "cat", "dog", "down", "eight", "five", "four","go","happy","house","left","marvin","nine","no","off","on","one","right","seven","sheila","six","stop","three","tree","two","up","wow","yes","zero"]
revDict = dict()
for i in range(len(REAL_CATEGORIES)):
    revDict[i] = REAL_CATEGORIES[i]

In [5]:
def densest(array, size):
    density = np.convolve(array, np.ones([size]), mode='valid')
    return np.argmax(density)

In [8]:
def run_model(audio,start_idx):
    print("started guessing")
    data = np.roll(audio, - start_idx * CHUNK_SIZE)
    
    dense_start_index = densest(np.abs(data), SAMPLING_FREQUENCY)
    data = data[dense_start_index:dense_start_index + SAMPLING_FREQUENCY]
    
    sd.play(data, 16000)
    time.sleep(1.0)
    sd.stop()
    
    mfcc_sample = lb.feature.mfcc(y = data, sr = SAMPLING_FREQUENCY, n_mfcc = 32)
    
    pred = model.predict(mfcc_sample.reshape(1,32,32,1))
        
    print('prediction:', revDict[np.argmax(pred)])
    #print("softmax: ", pred)
    #print(pred.shape)
    
    pred = pred.ravel()
    print("stopped guessing")
    return revDict[np.argmax(pred)]

In [9]:
start_idx = 0
wait = False
wait_count = 0
energy_count = 0

r = np.zeros((CHUNK_SIZE * 32))

print(r[start_idx * CHUNK_SIZE: (start_idx + 1) * CHUNK_SIZE].shape)

audio_in = sd.InputStream(samplerate = SAMPLING_FREQUENCY, channels = 1)
audio_in.start()
time_diff = 0
time_last = 0

while(True):
    
    data_chunk, _ = audio_in.read(CHUNK_SIZE)
    data_chunk = data_chunk.ravel()
        
    r[start_idx * CHUNK_SIZE: (start_idx + 1) * CHUNK_SIZE] = data_chunk
    
    start_idx = (start_idx + 1) % 32

    if wait and wait_count <= 60:
        wait_count += 1
        
    if wait and wait_count > 60:
        wait = False
        
    if not wait:
        print("...")
        energy = np.sqrt(np.sum(np.square(r)))
        if energy > 5:
            energy_count += 1

    if energy_count > 8 and not wait:
        wait = True
        wait_count = 0
        energy_count = 0
        #que = queue.Queue()
        #print("****************")
        #print("start: ", start_idx)
        #print(energy)
        #print(r)
        #print("****************")
        
        predicted = run_model(r, start_idx)


(1000,)
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
started guessing
prediction: on
stopped guessing
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
started guessing
prediction: up
stopped guessing
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
started guessing
prediction: eight
stopped guessing
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
..

KeyboardInterrupt: 