In [4]:
# Import libraries
import sys
import os
import numpy as np
import pandas as pd
import sklearn as sk
from os import listdir
from os.path import isfile, join
from timeit import default_timer as timer

from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from six.moves import cPickle as pickle
from six.moves import range

import librosa
import soundfile as sf

In [5]:
def extract_feature(file_name: str) -> tuple:
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz

In [7]:
#Extract features for one file
start_time = timer()
mfccs,chroma,mel,contrast,tonnetz = extract_feature('0_dB_fan/id_00/normal/00000000.wav')
end_time = timer()
print('time to extract features from one file: {:.3f}sec'.format((end_time-start_time)/60))

time to extract features from one file: 0.036sec


In [17]:
#Automated extraction of features for one folder
start_time = timer()
features = []
path = '0_dB_fan/id_00/normal/'
for root, dirs, files in os.walk(path, topdown = True):
    for name in files:
        file = path + name
        mfccs,chroma,mel,contrast,tonnetz = extract_feature(file)
        features.append([file,mfccs,chroma,mel,contrast,tonnetz])
end_time = timer()
print('time to extract features: {:.3f}sec'.format((end_time-start_time)/60))

time to extract features: 23.273sec


In [18]:
df = pd.DataFrame(features, columns=["file","mfccs","chroma","mel","contrast","tonnetz"])
df.to_csv('Features_0_dB_fan_id00_normal.csv', index=False)
s = start_time
e = end_time
e-s

1396.3924257030012

In [19]:
#Read and add target column
df_normal = pd.read_csv('Features_0_dB_fan_id00_normal.csv')
df_abnormal = pd.read_csv('Features_0_dB_fan_id00_abnormal.csv')
df_normal = df_normal.head(407)
df_normal.shape
target_abnormal = ['abnormal']*407
df_abnormal['target'] = target_abnormal
df_abnormal
target_normal = ['normal']*407
df_normal['target'] = target_normal
#concatenate
df_0dBfan_id00 = pd.concat([df_normal,df_abnormal])
df_0dBfan_id00.to_csv('df_0dBfan_id00.csv', index=False)