### Experimental notebook to extract feature from sound file in order to use simpler anomaly detection machine learning models (e.g. isolation forest) on them

The ide is to use librosa to extract sound related features anf tsfresh to extract time-series features

### 1) Install and import packages

In [1]:
!pip install tsfresh

import os
import glob
import tqdm
import numpy as np
import pandas as pd
import librosa
import tsfresh




  import pandas.util.testing as tm


### 2) Mount Google Drive

In [2]:
from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


### 3) Extract features of the input audio files and write them on a csv file

#### 3.1) Some global variables (ToDo: read them from conf files)

In [3]:
# Paths to directories 
BASE_DIR = os.path.join('/gdrive', 'MyDrive', 'DSR_SOF')
RAW_DATA_DIR = os.path.join(BASE_DIR, 'dataset')
PROJ_DIR = os.path.join(BASE_DIR, 'sound-of-failure')

os.chdir(PROJ_DIR)

# Parameters
DB = '6dB'
MACHINE_TYPE = 'valve'
MACHINE_ID = 'id_00'

#### 3.2) Directories of input and output files

In [4]:
# Make the subdirectories for output files 

def make_feature_dirs(base_dir, db, machine_type, machine_id, machine_status):
    """
    Generates a "data" directory and "extracted_sound_features" subdirectory in the base directory.
    In "data/extracted_sound_features" subdirectories like in the MIMII dataset are created, e.g.
    - "data/extracted_sound_features/6dB/valve/id_00/normal"
    - "data/extracted_sound_features/6dB/valve/id_00/abnormal"
    If directories exist they are not overwritten.

    :param base_dir (str): path to directory where a "data" directory shall be created
    :param db (str): noise level, takes values '6dB', '0dB' or 'min6dB'
    :param machine_type (str): type of machine, takes values 'fan', 'pump', 'slider', 'valve'
    :param machine_id (str): id of machine, takes values 'id_00', 'id_02' etc.
    :param machine_status (str): status of the machine, i.e. 'normal' or 'abnormal'
    """
    data_dir = base_dir
    for dir in ['data', 'extracted_sound_features', db, machine_type, machine_id, machine_status]:
        data_dir = os.path.join(data_dir, dir)
        if not os.path.exists(data_dir):
            os.mkdir(data_dir)
    print(f'Directory created: {data_dir}')

In [5]:
# Get the subdirectories for output files
def get_feature_dirs(base_dir, db, machine_type, machine_id, machine_status):
    """
    Retrieves the directories where csv files of extracted sound features are stored
    :param base_dir (str): path to directory where the "data" directory was or shall be created
    :param db (str): noise level, takes values '6dB', '0dB' or 'min6dB'
    :param machine_type (str): type of machine, takes values 'fan', 'pump', 'slider', 'valve'
    :param machine_id (str): id of machine, takes values 'id_00', 'id_02' etc.
    :param machine_status (str): status of the machine, i.e. 'normal' or 'abnormal'

    :return: path to directory containing csv files of extracted sound features from wav files
    """
    data_dir = base_dir
    for dir in ['data', 'extracted_sound_features', db, machine_type, machine_id, machine_status]:
        data_dir = os.path.join(data_dir, dir)

    if not os.path.exists(data_dir):
        print(f'Directory {data_dir} does not exist.\n' + \
              f'Please first run make_feature_dirs({base_dir}, {db}, {machine_type}, {machine_id}, {machine_status}) to create it.')
        data_dir = None

    return data_dir

In [6]:
# Get the input wav files
def get_wav_files(raw_data_dir, db, machine_type, machine_id, machine_status):
    """
    Returns list of sound wav files for given signal/noise ratio, machine type and id and status
    :param raw_data_dir (str): path to directory containing MIMII dataset
    :param db (str): noise level, takes values '6dB', '0dB' or 'min6dB'
    :param machine_type (str): type of machine, takes values 'fan', 'pump', 'slider', 'valve'
    :param machine_id (str): id of machine, takes values 'id_00', 'id_02' etc.
    :param machine_status (str): status of the machine, i.e. 'normal' or 'abnormal'

    :return: sorted list of wav files
    """
    dir = os.path.join(raw_data_dir, db, machine_type, machine_id, machine_status)
    if not os.path.exists(dir):
        print(f'Directory {dir} does not exist.')
        return []
    
    return sorted(glob.glob(os.path.join(dir, '*.wav')))

#### 3.3) Features extracted via librosa and some additional via tsfresh library

Code for this part is mostle taken from: https://github.com/sergeyvoronin/multi_resolution_classification

In [7]:
def extract_sound_features(raw_data_dir, base_dir, 
                           db, machine_type, machine_id, machine_status):

  # Get the input wav files
  inputfiles = get_wav_files(raw_data_dir, db, machine_type, machine_id, machine_status)
  print(f'Input files are: {len(inputfiles)} wav files')
  
  # Open an output file in the proper output directory
  make_feature_dirs(base_dir, db, machine_type, machine_id, machine_status)
  out_dir = get_feature_dirs(base_dir, db, machine_type, machine_id, machine_status)
  outfile_name = os.path.join(out_dir, 'extracted_features.cvs')
  print(f'Output file is: {outfile_name}')
  
  with open(outfile_name, 'w') as outfile:

    # Loop over input files
    for infile in tqdm.tqdm(inputfiles):

      outfile.write(str(infile))

      ###### Features extraction by librosa (i.e. sound features) ######

      # Load the input sound 
      y, sr = librosa.load(infile, sr=None, mono=True)

      # Mel-frequency cepstral coefficients
      mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc = 14)
      for num in range(2,14):
        mfcci = mfcc[num]
        mfcci = mfcci.reshape(-1,1)
        #mfcci = prep.normalize(mfcci) # Perhaps normalization is not good for anomaly detection
        mfcci_mean = np.mean(mfcci)
        mfcci_std = np.std(mfcci)
        outfile.write("%5.4e,  %5.4e,  " % (mfcci_mean, mfcci_std))
    
      # Spectral centroid
      cent = librosa.feature.spectral_centroid(y=y, sr=sr)
      #cent = prep.normalize(cent)
      cent_mean = np.mean(cent)
      cent_std = np.std(cent)
      outfile.write("%5.4e,  %5.4e,  " % (cent_mean, cent_std))

      # Spectral rolloff
      rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
      #rolloff = prep.normalize(rolloff)
      rolloff_mean = np.mean(rolloff)
      rolloff_std = np.std(rolloff)
      outfile.write("%5.4e,  %5.4e,  " % (rolloff_mean, rolloff_std))

      # Tonal centroid features
      tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
      #tonnetz = prep.normalize(tonnetz)
      tonnetz_mean = np.mean(tonnetz)
      tonnetz_std = np.std(tonnetz)
      outfile.write("%5.4e,  %5.4e,  " % (tonnetz_mean, tonnetz_std))

      # Zero crossing rate
      zcr = librosa.feature.zero_crossing_rate(y)
      #zcr = prep.normalize(zcr)
      zcr_mean = np.mean(zcr)
      zcr_std = np.std(zcr)
      outfile.write("%5.4e,  %5.4e,  " % (zcr_mean, zcr_std))

      # Root-mean-square energy for each frame
      rms = librosa.feature.rms(y=y)
      #rmse = prep.normalize(rms)
      rms_mean = np.mean(rms)
      rms_std = np.std(rms)
      outfile.write("%5.4e,  %5.4e,  " % (rms_mean, rms_std))

      # tempo
      tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
      outfile.write("%5.4e,  " % (tempo))

      ###### Features extraction by tsfresh (i.e. time series features) ######

      msd = tsfresh.feature_extraction.feature_calculators.mean_second_derivative_central(y)
      outfile.write("%5.4e,  " % (msd)) 

      mc = tsfresh.feature_extraction.feature_calculators.mean_change(y)
      outfile.write("%5.4e,  " % (mc)) 

      sam = tsfresh.feature_extraction.feature_calculators.longest_strike_above_mean(y)
      outfile.write("%5.4e,  " % (sam)) 

      sbm = tsfresh.feature_extraction.feature_calculators.longest_strike_below_mean(y)
      outfile.write("%5.4e,  " % (sbm)) 

      kurtosis = tsfresh.feature_extraction.feature_calculators.kurtosis(y)
      outfile.write("%5.4e,  " % (kurtosis)) 

      skewness = tsfresh.feature_extraction.feature_calculators.skewness(y)
      outfile.write("%5.4e,  " % (skewness)) 

      timerev = tsfresh.feature_extraction.feature_calculators.time_reversal_asymmetry_statistic(y, 10)
      outfile.write("%5.4e\n" % (timerev)) 

      #import pdb
      #pdb.set_trace()
      #break


In [8]:
extract_sound_features(RAW_DATA_DIR, PROJ_DIR, DB, MACHINE_TYPE, MACHINE_ID, 'abnormal')

  0%|          | 0/119 [00:00<?, ?it/s]

Input files are: 119 wav files
Directory created: /gdrive/MyDrive/DSR_SOF/sound-of-failure/data/extracted_sound_features/6dB/valve/id_00/abnormal
Output file is: /gdrive/MyDrive/DSR_SOF/sound-of-failure/data/extracted_sound_features/6dB/valve/id_00/abnormal/extracted_features.cvs


  "Empty filters detected in mel frequency basis. "
100%|██████████| 119/119 [01:10<00:00,  1.70it/s]


In [None]:
extract_sound_features(RAW_DATA_DIR, PROJ_DIR, DB, MACHINE_TYPE, MACHINE_ID, 'normal')

  0%|          | 0/991 [00:00<?, ?it/s]

Input files are: 991 wav files
Directory created: /gdrive/MyDrive/DSR_SOF/sound-of-failure/data/extracted_sound_features/6dB/valve/id_00/normal
Output file is: /gdrive/MyDrive/DSR_SOF/sound-of-failure/data/extracted_sound_features/6dB/valve/id_00/normal/extracted_features.cvs


  "Empty filters detected in mel frequency basis. "
100%|██████████| 991/991 [09:52<00:00,  1.67it/s]


### 4) Make data frames for train and test data

In [None]:
def train_test_data_for_baseline(base_dir, db, machine_type, machine_id):

  # Read the csv files containing extracted sound features into data frames 
  abnormal_dir = get_feature_dirs(base_dir, db, machine_type, machine_id, 'abnormal')
  abnormal_file = os.path.join(abnormal_dir, 'extracted_features.cvs')
  df_abnormal = pd.read_csv(abnormal_file, header=None)
  n_abnormal_samples = df_abnormal.shape[0]
  print('No. of abnormal samples:', n_abnormal_samples)

  normal_dir = get_feature_dirs(base_dir, db, machine_type, machine_id, 'normal')
  normal_file = os.path.join(normal_dir, 'extracted_features.cvs')
  df_normal = pd.read_csv(normal_file, header=None)
  n_normal_samples = df_normal.shape[0]
  print('No. of normal samples:', n_normal_samples)

  # Drop the first column, which is the path and name of the wav files
  df_abnormal = df_abnormal.drop(df_abnormal.columns[0], axis=1)
  df_normal = df_normal.drop(df_normal.columns[0], axis=1)

  # Split normal sound data into train and test parts
  # Test data must have equal number of normal and abnormal sounds,
  # while train data only consist of normal sounds.
  df_test_normal = df_normal.iloc[(-1*n_abnormal_samples):]
  df_train = df_normal.iloc[:(n_normal_samples-n_abnormal_samples)]
  print('No. of train samples:', df_train.shape[0])
  print('No. of train features:', df_train.shape[1])
  
  # Add target column (abnormal = 1, normal = 0) to test normal and abnormal data
  df_abnormal['Target'] = 1
  df_test_normal['Target'] = 0

  # Join test normal and abnormal data frames into the test data frame
  df_test = pd.concat([df_abnormal, df_test_normal])
  print('No. of test samples:', df_test.shape[0])
  print('No. of test features + target:', df_test.shape[1])

  #ToDo: fix the index of df_test

  return df_train, df_test


In [None]:
df_train, df_test = train_test_data_for_baseline(PROJ_DIR, DB, MACHINE_TYPE, MACHINE_ID)

No. of abnormal samples: 119
No. of normal samples: 991
No. of train samples: 872
No. of train features: 41
No. of test samples: 238
No. of test features + target: 42


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [None]:
df_train.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41
0,12.942,17.341,9.6462,11.976,5.1144,14.452,4.1876,1.0285,5.2099,4.3619,5.9473,1.8753,3.7642,4.5796,5.4305,-5.6629,3.6321,-0.53049,4.5031,-2.2628,3.2572,0.91587,4.5487,1513.3,875.16,3046.8,1544.9,-0.012172,0.059682,0.079894,0.054647,0.003024,0.001335,110.29,2.108e-07,-2.7836e-07,175.0,169.0,54.229,0.23311,-7.0452e-09
1,10.549,17.779,11.156,-1.6284,7.0889,12.16,3.5512,-5.1801,4.159,3.7172,4.7458,2.8731,3.6564,1.5518,5.6896,-4.1128,3.598,2.5074,3.5293,1.6651,2.814,-1.663,3.1446,1856.5,698.17,3912.3,1055.6,-0.015945,0.046091,0.12481,0.049083,0.002972,0.001329,110.29,-1.6451e-09,-1.545e-08,129.0,115.0,55.04,0.69532,-1.9693e-09
2,10.45,-1.7067,5.3703,4.9252,3.8584,4.7624,2.9637,-1.825,6.2764,0.98198,5.2889,-3.4453,3.7713,5.5442,5.3499,7.9421,5.8164,8.6159,3.3559,1.4852,2.9121,-5.6317,2.7188,1831.5,755.55,3634.0,1272.8,-0.030438,0.04892,0.12613,0.053025,0.002751,0.001374,110.29,1.3471e-09,-1.0061e-08,83.0,74.0,71.944,0.65858,-6.6695e-09
3,12.439,20.568,11.927,3.5935,6.69,12.268,3.3583,-3.4669,4.1657,1.5012,4.4617,-2.2614,3.5413,4.1812,4.6686,-3.6765,3.4955,0.39619,3.7377,0.23326,2.8228,-1.7302,3.3399,1553.8,814.07,3177.9,1382.4,-0.011005,0.030633,0.09462,0.052677,0.003008,0.001345,110.29,2.1816e-09,-8.5593e-09,161.0,159.0,52.947,0.38925,3.934e-09
4,14.939,14.01,8.2507,12.565,5.2948,13.901,3.8673,3.1873,6.4371,3.7317,5.646,1.66,4.3149,4.0996,6.5864,-4.3408,3.8291,-0.2765,4.8714,-0.2527,3.1921,-0.21705,4.1805,1378.6,929.86,2659.1,1757.9,-0.010216,0.05039,0.0706,0.054329,0.002981,0.001396,110.29,-4.6492e-10,-4.9615e-08,181.0,176.0,54.442,0.46768,3.5133e-09


In [None]:
df_train.tail()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41
867,6.9456,21.379,7.5501,12.246,3.6749,11.793,2.9708,1.9313,4.0453,5.0396,3.73,2.3757,3.2493,6.1539,3.9302,0.050163,3.239,-0.12122,3.6497,-2.6477,3.296,2.7482,3.9251,1897.0,465.68,4219.9,668.96,-0.009725,0.059559,0.11224,0.035559,0.002538,0.000911,55.147,3.2187e-09,5.3883e-09,174.0,182.0,54.018,0.19509,5.931e-09
868,9.8071,14.525,8.526,6.5914,4.5749,13.624,3.1581,-1.6306,4.7205,-4.1243,3.7853,5.2049,4.5467,8.5733,4.1952,1.2557,3.8509,3.4033,3.9611,1.492,3.3423,-0.84854,4.3203,1353.1,713.35,2674.0,1311.4,-0.012345,0.057199,0.076829,0.053504,0.002525,0.001163,110.29,3.779e-09,-2.5726e-08,176.0,187.0,53.926,0.46788,3.1843e-09
869,8.5826,15.237,9.6041,-0.43048,5.3924,3.2878,3.6329,3.9397,7.2157,-2.843,3.6621,-2.5531,3.6523,16.764,3.7685,0.82307,4.2053,6.0496,3.7496,6.9701,3.2959,-2.9577,3.5186,1864.4,548.91,4115.2,796.32,-0.017011,0.065987,0.10291,0.037551,0.002732,0.001113,110.29,6.3182e-10,1.2803e-08,91.0,95.0,43.516,0.7222,2.5256e-09
870,9.9783,24.501,9.4309,3.6107,5.3345,12.672,2.6666,-1.3883,3.9003,1.8143,3.478,-1.0599,3.3793,3.5691,3.9896,-1.1438,3.3452,-1.217,3.4173,1.1791,2.7928,-1.6297,3.1496,1232.3,615.59,2472.9,1116.8,-0.005219,0.034526,0.072302,0.038499,0.002648,0.000871,110.29,4.4108e-10,-1.0395e-08,146.0,155.0,43.296,0.083161,8.2232e-09
871,11.366,24.375,10.307,3.6601,5.596,12.859,2.8467,-1.6816,3.8572,1.6239,3.5886,-0.27158,3.6652,4.0424,4.6452,-1.3408,3.5252,-0.79253,3.3017,1.3407,2.8316,-1.3838,3.395,1296.0,678.81,2607.2,1224.3,-0.006243,0.035143,0.075549,0.045876,0.002718,0.000974,110.29,-3.6359e-09,5.126e-09,139.0,149.0,46.953,0.54944,1.4109e-09


In [None]:
df_test.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,Target
0,5.8959,15.713,6.8103,-7.22,4.8844,7.9639,4.1579,-0.76707,3.2024,3.6377,3.5471,-2.8844,4.2703,-2.9135,3.706,-1.7438,3.3718,5.1961,3.1324,2.3824,3.1129,0.45209,3.2167,2442.6,263.78,4666.0,443.29,0.007549,0.034049,0.23087,0.020863,0.001864,0.000257,55.147,2.0623e-09,-4.1938e-08,40.0,40.0,3.636,0.068261,2.4876e-10,1
1,7.9467,18.019,8.7594,10.888,6.3992,20.011,5.2844,6.333,4.2707,3.1756,3.3421,-0.9928,3.2774,4.1128,3.0395,4.5014,3.6238,2.6491,3.2765,-2.7476,3.957,0.07683,3.116,1413.8,505.59,3086.5,967.51,0.014631,0.040486,0.070559,0.022225,0.002389,0.000272,55.147,6.199e-10,-7.8679e-10,144.0,175.0,1.9389,0.093553,2.5756e-10,1
2,5.893,-0.34355,8.1468,3.1755,6.2995,3.3828,4.5439,3.4727,3.8448,10.188,3.6195,6.0546,3.5311,1.2684,3.8983,-1.8508,3.6465,-0.18315,3.9126,3.7522,3.5619,1.8341,3.6627,1628.5,512.31,3469.1,951.23,-0.009859,0.056578,0.10171,0.02042,0.002354,0.000383,55.147,3.5406e-09,5.3859e-08,75.0,80.0,2.2726,0.026237,6.2431e-12,1
3,6.1594,19.162,6.3216,4.0565,5.8114,7.9598,4.2023,-4.0562,3.6572,1.0988,3.2723,1.1471,2.9737,2.1548,3.2136,-3.6156,3.254,1.2542,3.1671,0.37355,2.8889,3.1344,2.957,2348.7,292.53,4823.4,414.88,-0.00548,0.029385,0.1956,0.021476,0.001987,0.000249,55.147,8.905e-09,-4.0055e-09,75.0,72.0,3.3521,0.037634,1.5979e-10,1
4,9.8286,28.746,9.941,0.65383,7.3385,14.454,5.7842,-0.75962,4.0082,3.7907,3.573,-1.1744,3.4369,3.5356,3.3645,-2.6932,3.8883,-0.63623,3.6871,-0.36112,3.5165,-1.0324,3.0044,1424.4,501.62,3119.3,925.56,-0.006442,0.041417,0.081498,0.02546,0.00244,0.000255,55.147,2.5034e-10,8.9408e-09,131.0,157.0,1.987,0.15589,-1.6161e-10,1


In [None]:
df_test.tail()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,Target
986,11.204,24.857,10.425,5.4769,4.6556,13.247,2.8405,-1.304,3.6454,2.3954,3.7758,1.2855,3.2776,3.7075,4.55,-1.1744,3.5257,0.91047,3.471,3.3235,2.7511,-0.037532,3.3372,1283.8,679.97,2606.3,1279.9,0.001361,0.043245,0.072191,0.041948,0.002738,0.000893,110.29,-4.7684e-10,-2.5392e-08,186.0,222.0,33.546,0.40799,1.8245e-09,0
987,9.2449,25.76,9.9763,3.5571,5.2194,14.555,2.6763,-1.1027,3.6455,4.6108,4.3483,0.36927,3.1064,3.5988,4.1057,-3.2244,3.3482,-1.392,3.5164,-0.31565,2.6216,-2.2176,2.9234,1397.2,623.39,2939.3,1085.7,-0.004462,0.041762,0.080584,0.040406,0.002736,0.000932,110.29,-1.3352e-09,-8.6308e-09,164.0,158.0,43.952,0.25332,3.8017e-09,0
988,7.9239,15.854,7.0197,7.973,3.885,22.332,3.136,6.4171,5.132,0.88662,3.503,-3.0124,3.5011,1.7827,4.8069,5.5676,4.372,3.5793,3.1483,-4.0436,3.495,-2.8699,3.1699,1257.5,598.03,2488.3,1135.0,0.00838,0.037006,0.077024,0.039142,0.002584,0.000815,110.29,2.873e-09,-5.4837e-09,136.0,144.0,38.912,0.55598,9.8634e-10,0
989,8.8524,17.21,8.2064,11.589,4.0755,19.229,3.4617,4.7482,5.1241,6.4311,4.912,4.0832,3.4085,3.5207,4.8309,-3.3853,3.7575,-3.9553,4.3272,-0.060884,3.2196,-0.93744,3.2559,1373.2,679.41,2705.2,1261.0,-0.015478,0.046282,0.08232,0.047556,0.002482,0.000956,110.29,-3.3856e-09,2.5821e-08,141.0,138.0,51.02,0.74466,-3.2581e-09,0
990,9.7634,13.575,8.2839,-3.8608,4.9086,9.1006,2.8944,-0.27255,4.422,2.1427,4.3273,-1.94,3.1327,1.0056,4.1566,-1.9676,4.1272,0.68075,3.1666,-0.61065,3.1067,-1.2055,2.8835,1671.5,615.13,3486.3,1168.9,-0.001859,0.039466,0.11578,0.044455,0.002644,0.000937,110.29,7.0811e-09,-5.889e-09,137.0,145.0,44.479,0.35183,4.0722e-09,0
