In [25]:
import pyaudio
import numpy as np
import matplotlib.pyplot as plt
import time
import wave
import IPython.display as ipd
import librosa.display
import sklearn
from scipy.stats import kurtosis, skew
import csv
import os
import pandas as pd

class Audio():
    def __init__(self):
        super(Audio, self).__init__()
        
        self.SHORT_NORMALIZE = (1.0/32768.0)
        self.RATE = 44100  
        self.INPUT_BLOCK_TIME = 0.05
        self.CHUNK = int(self.RATE*self.INPUT_BLOCK_TIME)
        self.CHANNEL = 2
        
        self.microphone_name = "3- Andrea PureAudio"

    def initiate_mic(self):
        self.pa = pyaudio.PyAudio()
        device_index = self.discover_mic()  
        self.mic = self.pa.open(format = pyaudio.paInt16,
                    channels = self.CHANNEL,
                    rate =  self.RATE ,
                    input = True,
                    input_device_index = device_index,
                    frames_per_buffer = self.CHUNK
                    )
        
    def discover_mic(self):

        for i in range(self.pa.get_device_count() ):     
            devinfo = self.pa.get_device_info_by_index(i)   

            if self.microphone_name in devinfo["name"]:
                    device_index = i
                    return device_index

    def record_audio(self, filename, runtime):

        blocks = []

        print("Recording starts..")
        st = time.time()
        while True:
            block = self.mic.read(self.CHUNK)
            blocks.append(block)

            t = time.time() - st

            if t>runtime:        
                self.save_audio(filename, blocks)        
                self.close_mic()
                break

        print("Recording done! Saved as ", filename)

    def save_audio(self, filename, blocks):
        wf = wave.open(filename, 'wb')
        wf.setnchannels(self.CHANNEL)
        wf.setsampwidth(self.pa.get_sample_size(pyaudio.paInt16))
        wf.setframerate(self.RATE)
        wf.writeframes(b''.join(blocks))
        wf.close()

    def close_mic(self):
        self.mic.stop_stream()
        self.mic.close()
        self.pa.terminate()  
        
class Features():
    def __init__(self):
        super(Features, self).__init__()
        
    def load_audio_file(self, filename):
        self.x, self.sr = librosa.load(filename)
        return self.x, self.sr
    
    def load_data_file(self, path):
    
        if os.path.exists(path):
            return pd.read_csv(path, index_col=False)

        else:
            header = 'filename spec_centroid spec_rolloff spec_bandwidth rmse chroma tempogram max_amplitude min_amplitude \
             mean_amplitude standard_deviation variance skew kurtosis zero_crossing_rate quantiles_10 quantiles_25 \
             quantiles_50 quantiles_75'

            for i in range(1, 20):
                header += f' mfcc{i}'
            header += ' label'
            header = header.split()

            file = open(path, 'w', newline='')
            with file:
                writer = csv.writer(file)
                writer.writerow(header)

            return pd.read_csv(path, index_col=False)

    def tempogram(self):
        oenv = librosa.onset.onset_strength(y=self.x, sr=self.sr)
        tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=self.sr)       
        return tempogram

    def get_features(self):
        feature = {}
        
        feature['spec_centroid'] = np.mean(librosa.feature.spectral_centroid(y=self.x, sr=self.sr))
        feature['spec_rolloff'] = np.mean(librosa.feature.spectral_rolloff(y=self.x, sr=self.sr))
        feature['spec_bandwidth'] = np.mean(librosa.feature.spectral_bandwidth(y=self.x, sr=self.sr))
        feature['rmse'] = np.mean(librosa.feature.rms(y=self.x))
        feature['chroma'] = np.mean(librosa.feature.chroma_stft(y=self.x, sr=self.sr))
        feature['tempogram'] = np.mean(self.tempogram())
        feature['max_amplitude'] = np.mean(np.max(self.x))
        feature['min_amplitude']  = np.mean(np.min(self.x))
        feature['mean_amplitude']  = np.mean(np.mean(self.x))
        feature['standard_deviation']  =  np.std(self.x)
        feature['variance']  = np.var(self.x)
        feature['skew']  = skew(self.x)
        feature['kurtosis']  = kurtosis(self.x)
        feature['zero_crossing_rate']  = sum(librosa.zero_crossings(self.x, pad=False))
        feature['quantiles_10']  = np.quantile(self.x, 0.10)
        feature['quantiles_25']  = np.quantile(self.x, 0.25)
        feature['quantiles_50']  = np.quantile(self.x, 0.50)
        feature['quantiles_75']  = np.quantile(self.x, 0.75)
        
        mfcc = librosa.feature.mfcc(y=self.x, sr=sr)
        for i in range(1, 20):
            feature[f'mfcc{i}'] = np.mean(mfcc[i])
        
        return feature

## Record audio

In [None]:
filename = "test.wav"
runtime = 5 #sec

audio = Audio()
audio.initiate_mic()
audio.record_audio(filename, runtime)
ipd.Audio(filename)

## Create features from audio and add it to file

In [20]:
f = Features()

path = 'data/audio_feature.csv'
df = f.load_data_file(path)

files = ["test.wav", "test.wav","test.wav"]
for file in files:
    x, sr = f.load_audio_file(file)
    
    feature = f.get_features()

    row = pd.DataFrame([list(feature.values())], columns = list(feature.keys()))
    row.insert(loc=0, column='filename', value=[file])

    df = pd.concat([df, row])
    df.to_csv(path, index=False)

## Model