In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import soundfile as sf
import seaborn as sns

import os
from tensorflow import keras
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from scipy.fft import fft
from functools import partial
import multiprocessing
from sklearn.preprocessing import normalize
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import shap
import glob
from pathlib import Path

In [2]:
TRAIN_FILES_DIR='./train_short_audio/'
NJOBS=10
FS=32000

In [3]:
def get_audio_files():
    return [str(path) for path in Path(TRAIN_FILES_DIR).rglob('*.ogg')]


In [15]:
def run_fft_onesig(N, signal, label):
    i = N
    bins_columns = [('bin_'+str(i)) for i in range(0,N//2)]
    list_i = list()
    
    while i < len(signal):
        list_i.append(i)
        i+=N

    df_sig = pd.DataFrame()
    for i in list_i:
        array_fft = np.abs(
            fft(signal[i-N:i])
        )[:N//2]
        
        df_temp = pd.DataFrame(
            index = [i],
            data = dict(
                zip(bins_columns, array_fft)
            )
        )
        df_temp['partition'] = '{}_{}'.format(i-N,i-1)
        df_sig = df_sig.append(df_temp)
    df_sig['bird'] = label

    return df_sig.reset_index(drop=True)


def run_fft(N, filepaths):
    
    data_fft = pd.DataFrame()
    # for direc in train_dirs:
    # train_files = glob.glob(direc+'/*')
    # bird_spec = direc.split('/')[-1]
    for audio_f in filepaths:
        bird_spec = audio_f.split('/')[-2]
        audio_sig, _ = sf.read(audio_f)
        # import pdb;pdb.set_trace()
        if N>len(audio_sig):
            break
        
        df_sig = run_fft_onesig(
            N=N,
            signal=audio_sig,
            label=bird_spec
        )
        df_sig['filename']=audio_f.split('/')[-1]

        data_fft = data_fft.append(df_sig)
            
    data_fft = data_fft.reset_index(drop=True)
    return data_fft

def get_dataset(N, train_files):
    
    exec_fn = partial(run_fft, N)
    files_splits = np.array_split(ary=train_files,indices_or_sections=NJOBS)
    pool = multiprocessing.Pool(NJOBS)
    results = pool.map(exec_fn, files_splits)
    
    dataset = pd.DataFrame()
    for df_bird in results:
        dataset = dataset.append(df_bird)
    
    return dataset.reset_index(drop=True)

In [14]:
%%time
CALCULATE=True
FFT_SIZE=2048
SAVE_PATH='fft_features_{}.csv'.format(FFT_SIZE)

if CALCULATE:
    df = get_dataset(
        N=FFT_SIZE,
        train_files=get_audio_files()
    )
    df.to_csv(SAVE_PATH, index=False)
else:
    df = pd.read_csv(SAVE_PATH)


CPU times: user 6.7 s, sys: 380 ms, total: 7.08 s
Wall time: 48.7 s


In [11]:
df

Unnamed: 0,bin_0,bin_1,bin_2,bin_3,bin_4,bin_5,bin_6,bin_7,bin_8,bin_9,...,bin_1017,bin_1018,bin_1019,bin_1020,bin_1021,bin_1022,bin_1023,partition,bird,filename
0,0.109109,0.104525,0.090810,0.067982,0.043354,0.036528,0.059847,0.083071,0.090390,0.073011,...,0.010870,0.012636,0.005364,0.003192,0.001908,0.001179,0.000577,0_2047,grnjay,XC130154.ogg
1,0.032139,0.120178,0.163079,0.065829,0.286840,0.099731,0.070909,0.057269,0.104555,0.059639,...,0.001202,0.015968,0.019710,0.019694,0.013142,0.011388,0.021091,2048_4095,grnjay,XC130154.ogg
2,0.011692,0.031680,0.058241,0.010519,0.318981,0.031242,0.081802,0.128852,0.053376,0.064341,...,0.018336,0.020024,0.008126,0.026105,0.024679,0.044281,0.008388,4096_6143,grnjay,XC130154.ogg
3,0.084990,0.124008,0.148006,0.070163,0.167342,0.048375,0.079276,0.091263,0.054477,0.084815,...,0.002002,0.005793,0.002173,0.010228,0.017924,0.024366,0.026924,6144_8191,grnjay,XC130154.ogg
4,0.031638,0.060009,0.198577,0.089389,0.312165,0.061294,0.062317,0.020651,0.124128,0.061435,...,0.022278,0.039680,0.008317,0.009546,0.022766,0.010771,0.006605,8192_10239,grnjay,XC130154.ogg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13911,0.009594,0.009572,0.009554,0.009555,0.009493,0.009414,0.009550,0.009250,0.008539,0.008980,...,0.001476,0.000754,0.001108,0.001678,0.003120,0.001137,0.001968,1042432_1044479,grnjay,XC442569.ogg
13912,0.008446,0.008454,0.008464,0.008479,0.008514,0.008465,0.009185,0.010342,0.009518,0.008650,...,0.002326,0.000963,0.001815,0.003255,0.000207,0.000745,0.002223,1044480_1046527,grnjay,XC442569.ogg
13913,0.000873,0.000869,0.000866,0.000870,0.000867,0.000839,0.000741,0.001582,0.002086,0.001198,...,0.001241,0.001416,0.000398,0.001793,0.002299,0.001175,0.002281,1046528_1048575,grnjay,XC442569.ogg
13914,0.001317,0.001320,0.001327,0.001332,0.001341,0.001399,0.001379,0.002095,0.000515,0.002210,...,0.001462,0.002288,0.001505,0.001784,0.001004,0.001296,0.002625,1048576_1050623,grnjay,XC442569.ogg


In [21]:
a

'train_short_audio/grnjay/XC130154.ogg'