In [1]:
#Concise feature extraction
import librosa
from librosa import feature
import numpy as np
from sklearn.preprocessing import normalize
from functools import reduce

def get_percussion(y):
    har, perc =   librosa.decompose.hpss(np.abs(librosa.stft(y)), margin=16)
    return perc


fn_list_i = [
   # feature.chroma_stft,
    feature.spectral_centroid,
    feature.spectral_bandwidth,
    feature.spectral_rolloff, 
    librosa.onset.onset_strength,
    librosa.feature.mfcc
]

fn_list_ii = [
#    feature.rmse,
    feature.zero_crossing_rate,
    get_percussion
]


def get_feature_vector(y, sr):
   feat_vect_i = [np.mean(funct(y, sr)) for funct in fn_list_i]
   feat_vect_ii = [np.mean(funct(y)) for funct in fn_list_ii]
   feature_vector = feat_vect_i + feat_vect_ii
   return feature_vector


In [2]:
#Classify features
# The naming convention for files:
#Anything with *D.wav is a a water leak - ht, medium, slow, fast drips
#Anything with *N.wav is white  noise / ambient nouse
#Anything with *R?.wav or HW is running water or running tap or falling from a height

run = ['RW', 'RT', 'HW']
leak = ['FD', 'MD', 'SD', 'SP', 'HD']
noise = ['WN', 'AN']

def classify_datafile(datafile): 
    file, ext = os.path.splitext(datafile)
    [rest, cl_str] = file.rsplit("_", 1)
    #print(datafile)
    if cl_str in leak: 
        classify =  ['leak']
    # elif cl_str in run :
    #     classify =  ['running water']
    else:
        classify = ['running water/noise']
    #endif
    #print(classify)
    return classify


In [3]:
#main cell 
#reads each file, extracts features
#and saves in a ''/Users/ns/development/iisc/WLDS/data/out/dataset_1.csv'
#this dataset should be used for training  
###---------------------------------------------
from tqdm import tqdm
import os
import pandas as pd

# read each file
# extract features in a dict
# put them in a list
# convert to data frame
import warnings
warnings.filterwarnings('ignore')

data_file_path = '/Users/ns/development/iisc/WLDS2/data/WAV/'
out_file_path = '/Users/ns/development/iisc/WLDS2/data/out/'
header = ['Sample#', 'spectral_centroid', 'spectral_bandwidth',
          'spectral_rolloff', 'onset_strength',
          'mfcc', 'zero_crossing_rate', 'percussion', 'label']
f_header = header[1:7] #extract only features
samples = os.listdir(data_file_path)
#print(samples)
feature_list = []
for datafile in tqdm(samples):
    data_f = data_file_path + datafile
    print('Extracting Features for  ',data_f)
    y, sr = librosa.load(data_f)
    feature_vector = get_feature_vector(y, sr) 
    label = classify_datafile(datafile)
    Sno = samples.index(datafile) 
    feature_list.append([Sno] + feature_vector + label)
#end for

#normalize and save the dataset. 

#print(feature_list) 
feature_df = pd.DataFrame(feature_list, columns=header)
#feature_df[f_header] = normalize(feature_df[f_header].to_numpy())
feature_df.to_csv(out_file_path + 'dataset_1.csv', index=False)

##end


  0%|          | 0/59 [00:00<?, ?it/s]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_47_AN.wav


  2%|▏         | 1/59 [00:03<03:46,  3.91s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_53_HW.wav


  3%|▎         | 2/59 [00:06<03:03,  3.22s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_59_AN.wav


  5%|▌         | 3/59 [00:09<02:48,  3.01s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_24_AN.wav


  7%|▋         | 4/59 [00:11<02:31,  2.75s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_27_RT.wav


  8%|▊         | 5/59 [00:14<02:25,  2.69s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_1_RT.wav


 10%|█         | 6/59 [00:15<01:47,  2.02s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_11_RW.wav


 12%|█▏        | 7/59 [00:17<01:55,  2.21s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_39_FD.wav


 14%|█▎        | 8/59 [00:20<01:56,  2.28s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_44_RW.wav


 15%|█▌        | 9/59 [00:22<01:54,  2.29s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_48_RW.wav


 17%|█▋        | 10/59 [00:25<02:08,  2.62s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_60_FD.wav


 19%|█▊        | 11/59 [00:28<02:02,  2.54s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_5_WN.wav


 20%|██        | 12/59 [00:29<01:48,  2.30s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_13_FD.wav


 22%|██▏       | 13/59 [00:32<01:43,  2.25s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_26_AN.wav


 24%|██▎       | 14/59 [00:34<01:50,  2.45s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_38_AN.wav


 25%|██▌       | 15/59 [00:37<01:52,  2.55s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_34_AN.wav


 27%|██▋       | 16/59 [00:42<02:18,  3.21s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_3_SD.wav


 29%|██▉       | 17/59 [00:44<01:59,  2.85s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_29_RT.wav


 31%|███       | 18/59 [00:47<01:55,  2.83s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_46_RT.wav


 32%|███▏      | 19/59 [00:49<01:45,  2.63s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_66_FD.wav


 34%|███▍      | 20/59 [00:52<01:51,  2.87s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_17_RW.wav


 36%|███▌      | 21/59 [00:55<01:42,  2.70s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_21_FD.wav


 37%|███▋      | 22/59 [00:58<01:44,  2.83s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_7_FD.wav


 39%|███▉      | 23/59 [01:01<01:42,  2.85s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_33_SP.wav


 41%|████      | 24/59 [01:03<01:36,  2.74s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_9_FD.wav


 42%|████▏     | 25/59 [01:06<01:39,  2.91s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_43_AN.wav


 44%|████▍     | 26/59 [01:09<01:32,  2.80s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_63_HD.wav


 46%|████▌     | 27/59 [01:12<01:27,  2.72s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_51_AN.wav


 47%|████▋     | 28/59 [01:14<01:20,  2.59s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_23_FD.wav


 49%|████▉     | 29/59 [01:16<01:14,  2.47s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_31_FD.wav


 51%|█████     | 30/59 [01:18<01:10,  2.42s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_19_RW.wav


 53%|█████▎    | 31/59 [01:21<01:08,  2.43s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_64_RW.wav


 54%|█████▍    | 32/59 [01:23<01:01,  2.28s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_10_FD.wav


 56%|█████▌    | 33/59 [01:25<00:59,  2.30s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_61_FD.wav


 58%|█████▊    | 34/59 [01:28<01:01,  2.47s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_45_FD.wav


 59%|█████▉    | 35/59 [01:30<00:58,  2.45s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_49_RT.wav


 61%|██████    | 36/59 [01:33<00:55,  2.43s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_37_AN.wav


 63%|██████▎   | 37/59 [01:35<00:53,  2.44s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_25_AN.wav


 64%|██████▍   | 38/59 [01:39<00:56,  2.71s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_58_AN.wav


 66%|██████▌   | 39/59 [01:43<01:05,  3.28s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_28_RT.wav


 68%|██████▊   | 40/59 [01:46<00:58,  3.08s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_ 20_FD.wav


 69%|██████▉   | 41/59 [01:49<00:54,  3.02s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_35_AN.wav


 71%|███████   | 42/59 [01:51<00:48,  2.87s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_6_MD.wav


 73%|███████▎  | 43/59 [01:56<00:53,  3.35s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_Y_32_FD.wav


 75%|███████▍  | 44/59 [01:58<00:47,  3.16s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_52_AN.wav


 76%|███████▋  | 45/59 [02:01<00:40,  2.91s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_67_RT.wav


 78%|███████▊  | 46/59 [02:04<00:40,  3.12s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_36_MD.wav


 80%|███████▉  | 47/59 [02:07<00:35,  2.99s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_54_HW.wav


 81%|████████▏ | 48/59 [02:10<00:31,  2.90s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_12_MD.wav


 83%|████████▎ | 49/59 [02:14<00:33,  3.35s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_2_WN.wav


 85%|████████▍ | 50/59 [02:17<00:29,  3.31s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_22_FD.wav


 86%|████████▋ | 51/59 [02:20<00:24,  3.00s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_65_FD.wav


 88%|████████▊ | 52/59 [02:22<00:19,  2.81s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_18_FD.wav


 90%|████████▉ | 53/59 [02:24<00:16,  2.69s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_14_FD.wav


 92%|█████████▏| 54/59 [02:27<00:13,  2.66s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_8_RW.wav


 93%|█████████▎| 55/59 [02:29<00:10,  2.57s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_4_RW.wav


 95%|█████████▍| 56/59 [02:31<00:06,  2.30s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_62_HD.wav


 97%|█████████▋| 57/59 [02:33<00:04,  2.28s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_50_AN.wav


 98%|█████████▊| 58/59 [02:35<00:02,  2.18s/it]

Extracting Features for   /Users/ns/development/iisc/WLDS2/data/WAV/data_30_RT.wav


100%|██████████| 59/59 [02:38<00:00,  2.68s/it]


<!--
 Copyright 2022 ns
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
 
     http://www.apache.org/licenses/LICENSE-2.0
 
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-->

