In [1]:
import os
from tqdm import tqdm
import librosa, librosa.display
import pandas as pd
import sklearn
import numpy as np
import glob2
from collections import defaultdict

In [2]:
SR = 44100           # Sampling frequency
N_MELS = 40          # Mel band parameters   

def extract_features(file_name):
    try:
        file_name = file_name.replace('\\', '/')
        audio, sample_rate = librosa.load(file_name, mono=True, sr=SR, res_type='kaiser_fast')
        n_fft = int(sample_rate * 0.02) 
        hop_length = n_fft//2
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=N_MELS, hop_length=hop_length, n_fft=n_fft)
        mfccs_scaled = np.mean(mfccs.T, axis=0)
    except Exception as e:
        print(f'Error encountered while parsing file: {file_name}')
        return None
    return mfccs_scaled

In [3]:
def save_features(file_path):
    feature = []
    for bird, path in enumerate(tqdm(glob2.glob(pathname=file_path))):
        class_label = path.split("\\")[1]
        data = extract_features(path)
        feature.append([data, class_label])
    return feature

In [4]:
file_path = './data/*/*.wav'
features = save_features(file_path)
features_df = pd.DataFrame(features, columns=['feature', 'class_label'])
features_df

100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [04:39<00:00,  3.58it/s]


Unnamed: 0,feature,class_label
0,"[-415.42465, 118.90836, -104.75949, 24.294622,...",cliswa
1,"[-392.9839, 108.54773, -108.75539, 7.0109396, ...",cliswa
2,"[-383.60422, 102.93317, -117.18468, 23.236774,...",cliswa
3,"[-434.53595, 132.37169, -105.35982, 30.331165,...",cliswa
4,"[-421.94064, 119.54328, -112.59305, 40.792915,...",cliswa
...,...,...
995,"[-543.2739, 97.48041, -79.73211, -31.352812, -...",foxspa
996,"[-684.93677, 72.11933, -48.00181, 28.039211, -...",foxspa
997,"[-494.66534, 5.5761166, -223.52647, -50.89052,...",foxspa
998,"[-536.7199, 76.12634, -16.417595, 61.409256, 2...",foxspa


In [5]:
features_df['feature'][0].shape

(40,)

In [6]:
class_label = features_df['class_label'].unique()
class_label

array(['cliswa', 'doccor', 'dowoo', 'dusfly', 'easblu', 'easmea',
       'eucdov', 'eursta', 'evegro', 'foxspa'], dtype=object)

In [7]:
class_number = []
for i, label in enumerate(class_label):
    for idx, row in features_df.iterrows():
        if row['class_label'] == label:
            class_number.append(i)

class_number = pd.Series(class_number)

In [8]:
features_df['class_number'] = class_number

In [9]:
features_df

Unnamed: 0,feature,class_label,class_number
0,"[-415.42465, 118.90836, -104.75949, 24.294622,...",cliswa,0
1,"[-392.9839, 108.54773, -108.75539, 7.0109396, ...",cliswa,0
2,"[-383.60422, 102.93317, -117.18468, 23.236774,...",cliswa,0
3,"[-434.53595, 132.37169, -105.35982, 30.331165,...",cliswa,0
4,"[-421.94064, 119.54328, -112.59305, 40.792915,...",cliswa,0
...,...,...,...
995,"[-543.2739, 97.48041, -79.73211, -31.352812, -...",foxspa,9
996,"[-684.93677, 72.11933, -48.00181, 28.039211, -...",foxspa,9
997,"[-494.66534, 5.5761166, -223.52647, -50.89052,...",foxspa,9
998,"[-536.7199, 76.12634, -16.417595, 61.409256, 2...",foxspa,9


In [10]:
features_df.to_csv('bird_audio.csv',index=False)

In [11]:
features_df['feature'][0]

array([-4.1542465e+02,  1.1890836e+02, -1.0475949e+02,  2.4294622e+01,
       -1.9978064e+01, -4.8570576e-01, -1.1980801e+01,  1.1485501e+01,
       -2.6256177e-01, -7.2262869e+00,  6.8554492e+00, -4.2383418e+00,
        1.1867721e+01, -2.7769375e+00,  2.1217377e+00, -2.6373386e+00,
       -3.6719444e-01,  1.6173469e-01, -1.0012254e+00,  1.1583397e+00,
       -4.1946769e+00,  5.7366657e+00, -2.8120406e+00,  3.1989953e-01,
        1.9093671e+00,  6.3231957e-01, -1.1854887e+00, -2.4036403e+00,
        1.4426509e+00, -5.7172656e-01,  2.2449334e+00, -8.5855013e-01,
        7.0267028e-01, -3.1985015e-01,  4.1044122e-01, -2.5187333e+00,
       -2.7885652e+00,  2.6166978e+00, -1.4519862e+00, -2.2733076e-01],
      dtype=float32)