In [6]:
import pyaudio
import wave
import pandas as pd
import librosa
import glob 
import librosa.display
import os
import numpy as np
import tensorflow as tf
from threading import Thread
import time
from collections import deque
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
import pickle 

class Mic(Thread):
    def __init__(self, Format, Chunk, Channels, Rate, Sec, FileName):
        ''' Constructor. '''
        
        Thread.__init__(self)
        
        self.p = pyaudio.PyAudio()
        chuck_per_second = 20480
        self.Channels = Channels
        self.Format = Format
        self.Rate = Rate
        self.Running = True
        self.FileName = FileName
        self.Chunk = Chunk
        self.stream = self.p.open(format=Format,
                        channels=Channels,
                        rate=Rate,
                        input=True,
                        frames_per_buffer=Chunk)
        self.queue = deque(maxlen=(round(chuck_per_second*15/Chunk)))
 
    def run(self):
        try:
            print("Start acquisition")
            while self.Running:
                data = self.stream.read(Chunk, exception_on_overflow = False)
                self.queue.append(data)
        except:
            print("run() Error")
        
            
    def getAudioFile(self):
        try:   
            wf = wave.open(self.FileName, 'wb')
            wf.setnchannels(self.Channels)
            wf.setsampwidth(self.p.get_sample_size(self.Format))
            wf.setframerate(self.Rate)
            wf.writeframes(b''.join(self.queue))
            wf.close()
            features = []
            features = extract_features('test.flac')
            result = np.concatenate((features[0], features[1], features[2], features[3], features[4]), axis=0)
            return result
        except:
            print("getAudioFile() Error")
            return ""
        
        
    def stop(self):
        self.Running = False

In [7]:
def extract_features(files):
    
    # Sets the name to be the path to where the file is in my computer
    file_name = os.path.join('test.flac')

    # Loads the audio file as a floating point time series - default sample rate is set to 22050 by default
    X, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 

    #X, index = librosa.effects.trim(y=X, top_db=60) 

    # Generate Mel-frequency cepstral coefficients (MFCCs) from a time series 
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)

    # Generates a Short-time Fourier transform (STFT) to use in the chroma_stft
    stft = np.abs(librosa.stft(X))

    # Computes a chromagram from a waveform or power spectrogram.
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)

    # Computes a mel-scaled spectrogram.
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)

    # Computes spectral contrast
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)

    # Computes the tonal centroid features (tonnetz)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),sr=sample_rate).T,axis=0)
    
    return mfccs, chroma, mel, contrast, tonnetz #np.concatenate((mfccs, chroma, mel, contrast, tonnetz), axis=0)

In [11]:
Chunk = 1024
Format = pyaudio.paInt16
Channels = 1
Rate = 22050
Sec = 15
FileName = "test.flac"

f_encoder = open('LabelEncoder.pkl', 'rb') 
lb = pickle.load(f_encoder)

f_scaler = open('StandardScaler.pkl', 'rb') 
ss = pickle.load(f_scaler)

model = tf.keras.models.load_model('saved_model/my_model')

mic = Mic(Format, Chunk, Channels, Rate, Sec,FileName)
mic.start()

for i in range(0,10):
    time.sleep(5)
    features = mic.getAudioFile()
    #print(ss.transform([features]))
    pred = model.predict_classes(ss.transform([features]))
    prob = model.predict_proba(ss.transform([features]))
    print(">>>>>"+str(lb.inverse_transform(pred)))
    print(">>>>>"+str(max(prob[0])))
mic.stop()

Start acquisition
>>>>>[' Silence']
>>>>>0.73088515
>>>>>[' Silence']
>>>>>0.60204417
>>>>>[' Tommaso Elia']
>>>>>0.9949864
>>>>>[' Tommaso Elia']
>>>>>0.9995647
>>>>>[' Tommaso Elia']
>>>>>0.99953926
>>>>>[' Silence']
>>>>>0.92409307
>>>>>[' Tommaso Elia']
>>>>>0.6959503
>>>>>[' Tommaso Elia']
>>>>>0.65927166
>>>>>[' Silence']
>>>>>0.4344425



KeyboardInterrupt



array([' Albatross', ' Ancient mariner', ' Andy', ' Aringguth',
       ' C F de Rosset', ' CaprishaPage', ' Carolin Ksr',
       ' Charlene V. Smith', ' Chelsea Baker', ' Chip', ' Chris Hughes',
       ' Chuck Williamson', ' Drew Johnson', ' Ernst Pattynama',
       ' Gilles Lehoux', ' Hannah Dowell', ' HarryInk', ' JamesMcAndrew',
       ' Jim Mowatt', ' Kendall Ashyby', ' Kenneth Sergeant Gaghan',
       ' KevS', ' Kristen Zaza', ' Kristin Gjerlw', ' Lanerd',
       ' Laura M.D.', ' Mariah Lyons', ' MaryModern', ' Matthew Walker',
       ' Matthew Walton', ' Mike Vendetti', ' Millbeach', ' Mindy H',
       ' Patrick Wells', ' Pete Milan', ' Rachel Craig', ' Raybrite',
       ' Richard Beck', ' Robert Keiper', ' Sharon C.', ' Silence',
       ' Steph', ' Steve Belleguelle', ' Tim Makarios', ' Tom Causby',
       ' Troy Bond', ' Uday Sagar', ' brrrrrr6', ' mb', ' mjbrichant',
       ' penboy7000', ' rachaelg', ' redabrus', ' shana', ' shihping',
       ' sparks0314', ' tabithat', ' web

[' Anka']
