In [1]:
from scipy.io import wavfile
import numpy as np
import yaafelib
from matplotlib import pyplot as plt
from scipy.signal import filtfilt, argrelmax, find_peaks, correlate
import pandas as pd
import librosa as lr
from tqdm import tqdm
from scipy.stats import skew, kurtosis
from essentia.essentia import standard
from gammatone import filters

In [2]:
version = 1
freqs = filters.erb_space(low_freq=100, high_freq=11025.0, num=48)
gmtf = filters.make_erb_filters(44100, freqs, 1)

In [3]:
tqdm.pandas(desc="Feature extraction")

In [4]:
prefix = '/dev/null/'
prefix_train = prefix + 'audio_train/'
prefix_test = prefix + 'audio_test/'

output_prefix = ''

In [5]:
labels_list = ["Acoustic_guitar", "Applause", "Bark", "Bass_drum",
               "Burping_or_eructation", "Bus", "Cello", "Chime", "Clarinet",
               "Computer_keyboard", "Cough", "Cowbell", "Double_bass", "Drawer_open_or_close",
               "Electric_piano", "Fart", "Finger_snapping", "Fireworks", "Flute",
               "Glockenspiel", "Gong", "Gunshot_or_gunfire", "Harmonica",
               "Hi-hat", "Keys_jangling", "Knock", "Laughter", "Meow",
               "Microwave_oven", "Oboe", "Saxophone", "Scissors", "Shatter",
               "Snare_drum", "Squeak", "Tambourine", "Tearing", "Telephone", "Trumpet",
               "Violin_or_fiddle", "Writing"]

In [6]:
version = 2

In [7]:
N_MFCC = 15
N_BASIC = 13

fp = yaafelib.FeaturePlan(sample_rate=44100)

if version == 1:
    fp.addFeature('autocorr: AutoCorrelation ACNbCoeffs=500')
    fp.addFeature('mfcc: MFCC CepsNbCoeffs=' + str(N_MFCC))
    fp.addFeature('am: AmplitudeModulation')
    fp.addFeature('energy: Energy')
    fp.addFeature('barkloudness: Loudness')
    fp.addFeature('percsharp: PerceptualSharpness')
    fp.addFeature('percspread: PerceptualSpread')
    fp.addFeature('spectralflux: SpectralFlux')
    fp.addFeature('spectralflatness: SpectralFlatness')
    fp.addFeature('spectralrolloff: SpectralRolloff')
    fp.addFeature('spectralvariation: SpectralVariation')
elif version == 2:
    fp.addFeature('spectralshape: SpectralShapeStatistics blockSize=512  stepSize=256')
    fp.addFeature('obsir: OBSIR blockSize=512  stepSize=256')
    fp.addFeature('spectralrolloff: SpectralRolloff blockSize=512  stepSize=256')
engine = yaafelib.Engine()
engine.load(fp.getDataFlow())

True

In [8]:
def load_file_librosa(fname):
    return lr.core.load(fname, sr=44100)[0]

In [9]:
def load_file(fname):
    return wavfile.read(fname)[1].astype('float64').reshape(1, -1)

In [10]:
def extract_yaafe_raw(audio):
    return engine.processAudio(audio)

In [11]:
def length(array, sample_rate=44100):
    return array.shape[1] / sample_rate

In [12]:
def wav_features(array):
    if len(array) == 0:
        return [0.] * 16
    else:
        argmin = np.argmin(array)
        argmax = np.argmax(array)
        std = np.std(array)
        rms = np.sqrt(np.mean(array**2))
        return [argmin / array.shape[0], argmax / array.shape[0],
                array[argmin], array[argmax],
                np.mean(array), std,
                np.percentile(array, 10), np.percentile(array, 25),
                np.percentile(array, 50),
                np.percentile(array, 75), np.percentile(array, 90),
                skew(array), kurtosis(array),
                rms, rms / std,
                array[argmax] / array[argmin]]

In [13]:
def get_basic_features(array):
    
    if len(array) == 0:
        return [0.] * N_BASIC
    else:
        argmin = np.argmin(array)
        argmax = np.argmax(array)
        # median = np.percentile(array,50,interpolation='nearest')
        return [argmin / array.shape[0], argmax / array.shape[0],
                array[argmin], array[argmax],
                np.mean(array), np.std(array),
                np.percentile(array, 10), np.percentile(array, 25),
                np.percentile(array, 50),
                np.percentile(array, 75), np.percentile(array, 90),
                skew(array), kurtosis(array)]

In [14]:
def process_autocorrelation(array):
    summed_arr = array.sum(axis=0)
    peaks = find_peaks(summed_arr)
    if len(peaks[0]) == 0:
        peakpos = 1000
        peakval = 1
    else:
        peakpos = peaks[0][0]
        peakval = summed_arr[peaks[0][0]] / summed_arr[0]
    return {'autocorr_peak_position': peakpos,
            'autocorr_peak_value_normalized': peakval,
            'autocorr_ZCR': np.sum(lr.core.zero_crossings(summed_arr)) / summed_arr.shape[0]}

In [15]:
def wav_autocorrelation(array):
    try:
        autocorr = correlate(array, array)
        autocorr = autocorr[autocorr.shape[0]//2:]
        peaks = find_peaks(autocorr[:800])
        if len(peaks[0]) == 0:
            peakpos = 1000
            peakval = 1
        else:
            peakpos = peaks[0][0]
            peakval = autocorr[peaks[0][0]] / autocorr[0]
        return {'wav_autocorr_peak_position': peakpos / 44100,
                'wav_autocorr_peak_value_normalized': peakval,
                'wav_autocorr_ZCR': np.sum(lr.core.zero_crossings(autocorr[:800])) / 800}
    except ValueError as e:
        return {'wav_autocorr_peak_position': 1000 / 44100,
                'wav_autocorr_peak_value_normalized': 1,
                'wav_autocorr_ZCR': 0}

In [16]:
def get_zcr(array, sample_rate=44100):
    return np.sum(lr.core.zero_crossings(array[0, :])) / array.shape[1]

In [17]:
def extract_features_from_file_v1(fname):
    audio = load_file(fname)
    output = {'length, s': length(audio)}
    output['ZCR'] = get_zcr(audio)
    
    yaafe_feats = extract_yaafe_raw(audio)
    
    output['stft length'] = yaafe_feats['energy'].shape[0]
    
    for i in range(8):
        output['amplitudemod basic ' + str(i)] = get_basic_features(yaafe_feats['am'][:, i])
        
    output['wav features'] = wav_features(audio[0, :])
#     output['wav autocorr'] = wav_autocorrelation(audio[0, :])
    output['energy basic'] = get_basic_features(yaafe_feats['energy'][:, 0])
    output['percsharp basic'] = get_basic_features(yaafe_feats['percsharp'][:, 0])
    output['percspread basic'] = get_basic_features(yaafe_feats['percspread'][:, 0])
    output['spectralrolloff basic'] = get_basic_features(yaafe_feats['spectralrolloff'][:, 0])
    
    output['spectralflux basic'] = get_basic_features(yaafe_feats['spectralflux'][:, 0])
    output['spectralflatness basic'] = get_basic_features(yaafe_feats['spectralflatness'][:, 0])
    output['spectralvariation basic'] = get_basic_features(yaafe_feats['spectralvariation'][:, 0])
    
    try:
        output['dspectralflux/dt basic'] = get_basic_features(np.gradient(yaafe_feats['spectralflux'][:, 0]))
    except ValueError as e:
        output['dspectralflux/dt basic'] = [0.0] * N_BASIC
    try:
        output['dspectralflatness/dt basic'] = get_basic_features(np.gradient(yaafe_feats['spectralflatness'][:, 0]))
    except ValueError as e:
        output['dspectralflatness/dt basic'] = [0.0] * N_BASIC
    
    try:
        output['dspectralvariation/dt basic'] = get_basic_features(np.gradient(yaafe_feats['spectralvariation'][:, 0]))
    except ValueError as e:
        output['dspectralvariation/dt basic'] = [0.0] * N_BASIC
        
    output['autocorr features'] = process_autocorrelation(yaafe_feats['autocorr'])
    
    for i in range(N_MFCC):
        output['MFCC basic ' + str(i)] = get_basic_features(yaafe_feats['mfcc'][:, i])
        
        try:
            gradarr = np.gradient(yaafe_feats['mfcc'][:, i])
            output['dMFCC/dt basic ' + str(i)] = get_basic_features(gradarr)

            gradarr = np.gradient(gradarr)
            output['d2MFCC/dt2 basic ' + str(i)] = get_basic_features(gradarr)
        except ValueError as e:
            output['dMFCC/dt basic ' + str(i)] = [0.0] * N_BASIC
            output['d2MFCC/dt2 basic ' + str(i)] = [0.0] * N_BASIC
        
    for i in range(24):
        output['bark basic ' + str(i)] = get_basic_features(yaafe_feats['barkloudness'][:, i])
        try:
            gradarr = np.gradient(yaafe_feats['barkloudness'][:, i])
            output['dbark/dt basic ' + str(i)] = get_basic_features(gradarr)
        except ValueError as e:
            output['dbark/dt basic ' + str(i)] = [0.0] * N_BASIC
            
    return output

In [18]:
def process_names_v1(df):
    all_names = list(df['feats'][0].keys())
    
    oned_features = ['length, s', 'ZCR', 'stft length']
    for featname in oned_features:
        df[featname] = df['feats'].apply(lambda x: x[featname])
    
    
    for featname in ['autocorr_peak_position', 'autocorr_peak_value_normalized', 'autocorr_ZCR']:
        df[featname] = df['feats'].apply(lambda x: x['autocorr features'][featname])
    oned_features.append('autocorr features')
    
    
    for i, funcname in enumerate(['argmin_rel', 'argmax_rel', 'min', 'max',
                              'mean', 'std', 'perc10', 'perc25', 'perc50',
                              'perc75', 'perc90', 'skew', 'kurtosis', 'rms', 'rms_div_std',
                              'max_div_min']):
        df[' '.join(('wav', funcname))] = df['feats'].apply(lambda x: x['wav features'][i])
    
    oned_features.append('wav features')
    
    basic_features = [fn for fn in all_names if fn not in oned_features]
    for featname in basic_features:
        for i, funcname in enumerate(['argmin_rel', 'argmax_rel', 'min', 'max',
                                      'mean', 'std', 'perc10', 'perc25', 'perc50',
                                      'perc75', 'perc90', 'skew', 'kurtosis']):
            df[' '.join((featname, funcname))] = df['feats'].apply(lambda x: x[featname][i])
    

    df.drop(['feats'], axis=1, inplace=True)

In [19]:
def extract_features_from_file_v2(fname):
    audio = load_file(fname)
    output = {}
    yaafe_feats = extract_yaafe_raw(audio)
    for i in range(4):
        output['spectral shape basic ' + str(i)] = get_basic_features(yaafe_feats['spectralshape'][:, i])
        try:
            gradarr = np.gradient(yaafe_feats['spectralshape'][:, i])
            output['dspectral shape/dt basic ' + str(i)] = get_basic_features(gradarr)
        except ValueError as e:
            output['dspectral shape/dt basic ' + str(i)] = [0.0] * N_BASIC
    
    for i in range(9):
        output['obsir basic ' + str(i)] = get_basic_features(yaafe_feats['obsir'][:, i])
        
        try:
            gradarr = np.gradient(yaafe_feats['obsir'][:, i])
            output['dobsir/dt basic ' + str(i)] = get_basic_features(gradarr)
        except ValueError as e:
            output['dobsir/dt basic ' + str(i)] = [0.0] * N_BASIC
            
    try:
        gradarr = np.gradient(yaafe_feats['spectralrolloff'][:, 0])
        output['dspectralrolloff/dt basic ' + str(i)] = get_basic_features(gradarr)
    except ValueError as e:
        output['dspectralrolloff/dt basic ' + str(i)] = [0.0] * N_BASIC
            
    return output

In [20]:
def process_names_v2(df):
    all_names = list(df['feats'][0].keys())    
    basic_features = [fn for fn in all_names]
    for featname in basic_features:
        for i, funcname in enumerate(['argmin_rel', 'argmax_rel', 'min', 'max',
                                      'mean', 'std', 'perc10', 'perc25', 'perc50',
                                      'perc75', 'perc90', 'skew', 'kurtosis']):
            df[' '.join((featname, funcname))] = df['feats'].apply(lambda x: x[featname][i])
    

    df.drop(['feats'], axis=1, inplace=True)

In [21]:
def extract_features_from_file_v3(fname):
    audio = load_file_librosa(fname)
    output = {'wav autocorr': wav_autocorrelation(audio)}
    if audio.shape[0] == 0:
        output['derivative SFX 0'] = 0
        output['derivative SFX 1'] = 0
        output['flatness SFX'] = 0
        output['logattack 0'] = 0
        output['logattack 1'] = 0
        output['logattack 2'] = 0
        output['strongdecay'] = 0
        output['TCToTotal'] = 0
        output['HFC'] = 0
        output['salience'] = 0
        output['inharmonicity'] = 0
        output['dissonance'] = 0
    else:
        
        envelope = standard.Envelope()
        deriv_SFX = standard.DerivativeSFX()
        flatn_SFX = standard.FlatnessSFX()
        logattack = standard.LogAttackTime()
        strongdecay = standard.StrongDecay()
        tctototal = standard.TCToTotal()
        hfc = standard.HFC()
        spectrum = standard.Spectrum()
        specpeaks = standard.SpectralPeaks()
        inharmonicity = standard.Inharmonicity()
        dissonance = standard.Dissonance()
        salience = standard.PitchSalience()
        
        audio_spectrum = spectrum(audio)
        audio_env = envelope(audio)
        
        audio_dSFX = deriv_SFX(audio_env)
        output['derivative SFX 0'] = audio_dSFX[0]
        output['derivative SFX 1'] = audio_dSFX[1]

        output['flatness SFX'] = flatn_SFX(audio_env)
        audio_logattack = logattack(audio_env)  # dimension 3

        output['logattack 0'] = audio_logattack[0]
        output['logattack 1'] = audio_logattack[1]
        output['logattack 2'] = audio_logattack[2]

        output['strongdecay'] = strongdecay(audio)
        output['TCToTotal'] = tctototal(audio_env)
        output['HFC'] = hfc(audio)

        audio_specpeaks = specpeaks(audio_spectrum)
        audio_specpeak_freq = audio_specpeaks[0]
        audio_specpeak_magn = audio_specpeaks[1]
        output['salience'] = salience(audio_spectrum)

        if audio_specpeak_freq[0] == 0:
            output['inharmonicity'] = inharmonicity(audio_specpeak_freq[1:],
                                                    audio_specpeak_magn[1:])
            output['dissonance'] = dissonance(audio_specpeak_freq[1:],
                                              audio_specpeak_magn[1:])
        else:
            output['inharmonicity'] = inharmonicity(audio_specpeak_freq,
                                                    audio_specpeak_magn)
            output['dissonance'] = dissonance(audio_specpeak_freq,
                                              audio_specpeak_magn)

    return output

In [22]:
def process_names_v3(df):
    all_names = list(df['feats'][0].keys())    
    
    for featname in ['wav_autocorr_peak_position', 'wav_autocorr_peak_value_normalized',
                 'wav_autocorr_ZCR']:
        df[featname] = df['feats'].apply(lambda x: x['wav autocorr'][featname])
    oned_features = ['wav autocorr']
    
    basic_features = [fn for fn in all_names if fn not in oned_features]
    for featname in basic_features:
        df[featname] = df['feats'].apply(lambda x: x[featname])
    
    df.drop(['feats'], axis=1, inplace=True)

In [23]:
def extract_features_from_file_v4(fname):
    audio = load_file_librosa(fname)
    output = {}
    if audio.shape[0] == 0:
        output['fundamental freq basic'] = [0.0] * N_BASIC
        output['dfundamental freq/dt basic'] = [0.0] * N_BASIC
        
        output['fundamental confidence basic'] = [0.0] * N_BASIC
        output['maxmag'] = 0.0
        output['strong peak'] = 0.0
    else:
        spectrum = standard.Spectrum()

        pitchmelodia = standard.PitchMelodia()
        equalloudness = standard.EqualLoudness()
        maxmag = standard.MaxMagFreq()
        strongpeak = standard.StrongPeak()
        
        el_audio = equalloudness(audio)
        pm = pitchmelodia(el_audio)
        spec = spectrum(audio)
        
        pmfreq = pm[0][pm[0] > 0]
        pmconf = pm[1][pm[0] > 0]
        
        output['fundamental freq basic'] = get_basic_features(pmfreq)
        try:
            gradarr = np.gradient(pmfreq)
            output['dfundamental freq/dt basic'] = get_basic_features(gradarr)
        except ValueError as e:
            output['dfundamental freq/dt basic'] = [0.0] * N_BASIC
        
        output['fundamental confidence basic'] = get_basic_features(pmconf)
        output['maxmag'] = maxmag(spec)
        output['strong peak'] = strongpeak(spec)
        
    return output

In [24]:
def process_names_v4(df):
    for featname in ['maxmag', 'strong peak']:
        df[featname] = df['feats'].apply(lambda x: x[featname])
        
    for featname in ['fundamental freq basic', 'dfundamental freq/dt basic',
                     'fundamental confidence basic']:
        for i, funcname in enumerate(['argmin_rel', 'argmax_rel', 'min', 'max',
                                      'mean', 'std', 'perc10', 'perc25', 'perc50',
                                      'perc75', 'perc90', 'skew', 'kurtosis']):
            df[' '.join((featname, funcname))] = df['feats'].apply(lambda x: x[featname][i])

    df.drop(['feats'], axis=1, inplace=True)

In [25]:
def extract_features_from_file_v5(fname):
    audio = load_file(fname)
    output = {}
    gammatone_filtered_audio = filters.erb_filterbank(audio[0, :], gmtf)
    for i in range(48):
        output['gammatone ACR ' + str(i)] = wav_autocorrelation(gammatone_filtered_audio[i, :])
    return output

In [26]:
def process_names_v5(df):
    for i in range(48):
        for featname in ['wav_autocorr_peak_position', 'wav_autocorr_peak_value_normalized',
                         'wav_autocorr_ZCR']:
            df['gammatone ACR ' + str(i) + featname] = df['feats'].apply(lambda x: x['gammatone ACR ' + str(i)][featname])
            
    df.drop(['feats'], axis=1, inplace=True)

In [27]:
df_train = pd.read_csv(prefix + 'train.csv')
df_test = pd.read_csv(prefix + 'test.csv')
version = 1

In [28]:
version = 2

In [29]:
if version == 1:
    df_train = pd.read_csv(prefix + 'train.csv')
    df_train['feats'] = df_train['fname'].progress_apply(lambda x: extract_features_from_file_v1(prefix_train + x))
    process_names_v1(df_train)
    df_train['label'] = df_train['label'].apply(lambda x: labels_list.index(x))
    df_train.to_hdf('data/features.h5', output_prefix + 'train_basic', table=True, mode='a')
    version = 2
if version == 2:
    df_train = pd.read_csv(prefix + 'train.csv')
    df_train['feats'] = df_train['fname'].progress_apply(lambda x: extract_features_from_file_v2(prefix_train + x))
    process_names_v2(df_train)
    df_train.drop(['label'], axis=1, inplace=True)
    df_train.to_hdf('data/features.h5', output_prefix + 'train_v2', table=True, mode='a')
    version = 3
if version == 3:
    df_train = pd.read_csv(prefix + 'train.csv')
    df_train['feats'] = df_train['fname'].progress_apply(lambda x: extract_features_from_file_v3(prefix_train + x))
    process_names_v3(df_train)
    df_train.drop(['label'], axis=1, inplace=True)
    df_train.to_hdf('data/features_new.h5', output_prefix + 'train_v3', table=True, mode='a')
    version = 4
if version == 4:
    df_train = pd.read_csv(prefix + 'train.csv')
    df_train['feats'] = df_train['fname'].progress_apply(lambda x: extract_features_from_file_v4(prefix_train + x))
    process_names_v4(df_train)
    df_train.drop(['label', 'manually_verified'], axis=1, inplace=True)
    df_train.to_hdf('data/features_new.h5', output_prefix + 'train_v4', table=True, mode='a')
    version = 5
if version == 5:
    df_train = pd.read_csv(prefix + 'train.csv')
    df_train['feats'] = df_train['fname'].progress_apply(lambda x: extract_features_from_file_v5(prefix_train + x))
    process_names_v5(df_train)
    df_train.drop(['label', 'manually_verified'], axis=1, inplace=True)
    df_train.to_hdf('data/features_new.h5', output_prefix + 'train_v5', table=True, mode='a')

Feature extraction: 100%|██████████| 9473/9473 [07:24<00:00, 23.42it/s]
Feature extraction: 100%|██████████| 9473/9473 [12:56<00:00, 12.21it/s]
Feature extraction: 100%|██████████| 9473/9473 [55:02<00:00,  3.05it/s]
Feature extraction: 100%|██████████| 9473/9473 [3:29:00<00:00,  1.92s/it]


In [30]:
df_train.head()

Unnamed: 0,fname,gammatone ACR 0wav_autocorr_peak_position,gammatone ACR 0wav_autocorr_peak_value_normalized,gammatone ACR 0wav_autocorr_ZCR,gammatone ACR 1wav_autocorr_peak_position,gammatone ACR 1wav_autocorr_peak_value_normalized,gammatone ACR 1wav_autocorr_ZCR,gammatone ACR 2wav_autocorr_peak_position,gammatone ACR 2wav_autocorr_peak_value_normalized,gammatone ACR 2wav_autocorr_ZCR,...,gammatone ACR 44wav_autocorr_ZCR,gammatone ACR 45wav_autocorr_peak_position,gammatone ACR 45wav_autocorr_peak_value_normalized,gammatone ACR 45wav_autocorr_ZCR,gammatone ACR 46wav_autocorr_peak_position,gammatone ACR 46wav_autocorr_peak_value_normalized,gammatone ACR 46wav_autocorr_ZCR,gammatone ACR 47wav_autocorr_peak_position,gammatone ACR 47wav_autocorr_peak_value_normalized,gammatone ACR 47wav_autocorr_ZCR
0,00044347.wav,9.1e-05,0.84368,0.46375,0.000113,0.87433,0.4125,0.000113,0.959487,0.4075,...,0.01,0.006757,0.819804,0.0075,0.007982,0.546218,0.0075,0.010567,0.777358,0.005
1,001ca53d.wav,9.1e-05,0.841002,0.33375,0.000113,0.864383,0.325,0.000113,0.906748,0.34875,...,0.01,0.004943,0.924345,0.01,0.004875,0.899121,0.01,0.004807,0.85441,0.01
2,002d256b.wav,9.1e-05,0.841193,0.45875,0.000113,0.812151,0.44,0.000113,0.938686,0.39875,...,0.01,0.008367,0.576707,0.00625,0.008934,0.883678,0.00625,0.009546,0.898321,0.00625
3,0033e230.wav,0.000113,0.930468,0.4175,0.000113,0.965085,0.4175,0.000113,0.9682,0.4175,...,0.01,0.006576,0.721263,0.0075,0.008118,0.667516,0.00625,0.010249,0.624977,0.005
4,00353774.wav,9.1e-05,0.76696,0.375,0.000113,0.896295,0.4175,0.000113,0.938314,0.395,...,0.0125,0.005215,0.392228,0.00875,0.007937,0.431575,0.00875,0.009909,0.723917,0.00625


In [47]:
del df_train

In [29]:
if version == 1:
    df_test['feats'] = df_test['fname'].progress_apply(lambda x: extract_features_from_file_v1(prefix_test + x))
    process_names_v1(df_test)
    df_test.to_hdf('data/features.h5', 'test_basic', table=True, mode='a')
elif version == 2:
    df_test['feats'] = df_test['fname'].progress_apply(lambda x: extract_features_from_file_v2(prefix_test + x))
    process_names_v2(df_test)
    df_test.to_hdf('data/features.h5', 'test_v2', table=True, mode='a')
elif version == 3:
    df_test['feats'] = df_test['fname'].progress_apply(lambda x: extract_features_from_file_v3(prefix_test + x))
    process_names_v3(df_test)
    df_test.to_hdf('data/features_new.h5', 'test_v3', table=True, mode='a')
elif version == 4:
    df_test['feats'] = df_test['fname'].progress_apply(lambda x: extract_features_from_file_v4(prefix_test + x))
    process_names_v4(df_test)
    df_test.to_hdf('data/features_new.h5', 'test_v4', table=True, mode='a')
elif version == 5:
    df_test['feats'] = df_test['fname'].progress_apply(lambda x: extract_features_from_file_v5(prefix_test + x))
    process_names_v5(df_test)
    df_test.to_hdf('data/features_new.h5', 'test_v5', table=True, mode='a')

Feature extraction: 100%|██████████| 9400/9400 [2:48:04<00:00,  1.07s/it]


In [34]:
# keep = ['fname', 'feats']
# elsecols = [col for col in df_train.columns if col not in keep]
# df_train.drop(elsecols, axis=1, inplace=True)

In [39]:
# keep = ['fname', 'feats']
# elsecols = [col for col in df_test.columns if col not in keep]
# df_test.drop(elsecols, axis=1, inplace=True)

In [30]:
df_test.head()

Unnamed: 0,fname,gammatone ACR 0wav_autocorr_peak_position,gammatone ACR 0wav_autocorr_peak_value_normalized,gammatone ACR 0wav_autocorr_ZCR,gammatone ACR 1wav_autocorr_peak_position,gammatone ACR 1wav_autocorr_peak_value_normalized,gammatone ACR 1wav_autocorr_ZCR,gammatone ACR 2wav_autocorr_peak_position,gammatone ACR 2wav_autocorr_peak_value_normalized,gammatone ACR 2wav_autocorr_ZCR,...,gammatone ACR 44wav_autocorr_ZCR,gammatone ACR 45wav_autocorr_peak_position,gammatone ACR 45wav_autocorr_peak_value_normalized,gammatone ACR 45wav_autocorr_ZCR,gammatone ACR 46wav_autocorr_peak_position,gammatone ACR 46wav_autocorr_peak_value_normalized,gammatone ACR 46wav_autocorr_ZCR,gammatone ACR 47wav_autocorr_peak_position,gammatone ACR 47wav_autocorr_peak_value_normalized,gammatone ACR 47wav_autocorr_ZCR
0,00063640.wav,9.1e-05,0.868697,0.4725,0.000113,0.809791,0.44125,0.000113,0.956177,0.39125,...,0.01,0.00576,0.780981,0.00875,0.008027,-0.005908,0.005,0.013492,0.741351,0.005
1,0013a1db.wav,9.1e-05,0.891996,0.46625,0.000113,0.822131,0.42125,0.000113,0.931122,0.37375,...,0.025,0.001678,0.170845,0.0125,0.00381,-0.583405,0.0075,0.009615,0.618578,0.005
2,002bb878.wav,9.1e-05,0.85438,0.46625,0.000113,0.845908,0.4375,0.000113,0.936798,0.4,...,0.01,0.006735,0.722568,0.0075,0.008073,0.628051,0.00625,0.010159,0.427464,0.00625
3,002d392d.wav,9.1e-05,0.855488,0.0625,0.000113,0.857635,0.0725,0.000113,0.950509,0.1,...,0.01,0.012268,0.55175,0.005,0.01161,0.673486,0.005,0.01195,0.806451,0.005
4,00326aa9.wav,9.1e-05,0.794682,0.11375,9.1e-05,0.693704,0.12125,0.000113,0.728096,0.12125,...,0.01,0.007302,0.544374,0.0075,0.008889,0.646964,0.00625,0.010045,0.713379,0.00625
