In [1]:
import pandas as pd
import numpy as np


In [2]:
data = pd.read_csv('greattit.csv').drop(['Unnamed: 0', 'Genus', 'Specific_epithet', 'Subspecies', 'English_name', 'Recordist', 
                                        'Locality', 'Audio_file', 'License', 'Url', 'Date'], axis=1)
data.head()

Unnamed: 0,Recording_ID,Country,Latitude,Longitude,Vocalization_type,Quality,Time
0,441880,Netherlands,52.1468,5.3788,alarm call,no score,09:00
1,440223,France,42.608,8.861,call,no score,09:30
2,440205,Netherlands,52.1331,5.0489,begging call,no score,14:00
3,439558,France,49.6682,-1.4283,song,no score,18:00
4,439316,France,42.608,8.861,call,no score,11:00


In [3]:
data = data.dropna(subset=['Vocalization_type'])

In [4]:
binary_data = data.loc[data.Vocalization_type.str.contains('song') != data.Vocalization_type.str.contains('call')]
binary_data['labels'] = binary_data.Vocalization_type.str.contains('call').astype('int')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [5]:
binary_data['labels'].sum() / len(binary_data)

0.6265389876880985

In [6]:
from scipy.stats import skew
import os
import librosa

def get_mfcc(name, path):
    try:
        b, _ = librosa.core.load(path + str(name) + '.wav')
    except:
        try:
            b, _ = librosa.core.load(path + '-' + str(name) + '.wav')
        except:
            return pd.Series([0] * 425)
        
    ft1 = librosa.feature.mfcc(b, n_mfcc=80)
    ft2 = librosa.feature.zero_crossing_rate(b)[0]
    ft3 = librosa.feature.spectral_rolloff(b)[0]
    ft4 = librosa.feature.spectral_centroid(b)[0]
    ft5 = librosa.feature.spectral_contrast(b)[0]
    ft6 = librosa.feature.spectral_bandwidth(b)[0]
    ft1_trunc = np.hstack((np.mean(ft1, axis=1), np.std(ft1, axis=1), skew(ft1, axis = 1), np.max(ft1, axis = 1), np.min(ft1, axis = 1)))
    ft2_trunc = np.hstack((np.mean(ft2), np.std(ft2), skew(ft2), np.max(ft2), np.min(ft2)))
    ft3_trunc = np.hstack((np.mean(ft3), np.std(ft3), skew(ft3), np.max(ft3), np.min(ft3)))
    ft4_trunc = np.hstack((np.mean(ft4), np.std(ft4), skew(ft4), np.max(ft4), np.min(ft4)))
    ft5_trunc = np.hstack((np.mean(ft5), np.std(ft5), skew(ft5), np.max(ft5), np.min(ft5)))
    ft6_trunc = np.hstack((np.mean(ft6), np.std(ft6), skew(ft6), np.max(ft6), np.min(ft6)))
    return pd.Series(np.hstack((ft1_trunc, ft2_trunc, ft3_trunc, ft4_trunc, ft5_trunc, ft6_trunc)))

In [7]:
train_data = pd.DataFrame()
train_data['Recording_ID'] = binary_data['Recording_ID']

In [8]:
from tqdm import tqdm, tqdm_pandas

tqdm.pandas()
train_data = train_data['Recording_ID'].progress_apply(get_mfcc, path='./sounds/')

100%|██████████| 3655/3655 [5:23:18<00:00,  5.01s/it]


In [12]:
train_data['id'] = binary_data['Recording_ID']
train_data['labels'] = binary_data['labels']
train_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,417,418,419,420,421,422,423,424,id,labels
0,-436.514074,127.301726,18.283442,-6.818973,-0.517332,17.952541,10.912547,12.669348,6.394881,10.34183,...,0.72695,23.723413,7.81972,2475.112147,213.669915,2.583432,3428.771732,2161.574903,441880,1
1,-239.07462,19.287065,-68.560588,-18.418223,-21.925773,-7.657923,-15.589265,-4.075395,-15.764188,-4.112783,...,1.208634,50.550359,4.978101,2434.314219,269.386657,-1.763712,3057.348932,1179.227306,440223,1
2,-367.70483,-31.474539,-108.760128,-90.779177,-75.426763,-23.981639,-25.06712,11.283314,3.798569,27.655528,...,0.626702,23.814826,1.4027,2309.327586,215.807624,-0.914716,2801.966575,1618.665418,440205,1
3,-508.650269,-38.546771,-95.776263,4.488206,-46.348091,-5.754554,-7.404113,-8.611265,-5.02268,3.62996,...,0.300264,38.912805,0.82972,1789.83292,508.838089,-0.666589,2693.892107,660.6096,439558,0
4,-321.26915,-76.923236,-119.413991,-13.216269,-21.198838,-4.078821,-5.427807,-1.305652,1.698714,5.166934,...,-0.470492,39.343142,3.233353,2138.560642,441.305899,-1.505895,2716.800178,872.997235,439316,1


In [13]:
train_data.to_csv('train_data.csv')

In [22]:
import scipy
def extract_features(files, path):
    features = {}

    cnt = 0
    for f in tqdm(files):
        features[f] = {}

        fs, data = scipy.io.wavfile.read(os.path.join(path, f +  '.wav'))

        abs_data = np.abs(data)
        diff_data = np.diff(data)

        def calc_part_features(data, n=2, prefix=''):
            f_i = 1
            for i in range(0, len(data), len(data)//n):
                features[f]['{}mean_{}_{}'.format(prefix, f_i, n)] = np.mean(data[i:i + len(data)//n])
                features[f]['{}std_{}_{}'.format(prefix, f_i, n)] = np.std(data[i:i + len(data)//n])
                features[f]['{}min_{}_{}'.format(prefix, f_i, n)] = np.min(data[i:i + len(data)//n])
                features[f]['{}max_{}_{}'.format(prefix, f_i, n)] = np.max(data[i:i + len(data)//n])

        features[f]['len'] = len(data)
        if features[f]['len'] > 0:
            n = 1
            calc_part_features(data, n=n)
            calc_part_features(abs_data, n=n, prefix='abs_')
            calc_part_features(diff_data, n=n, prefix='diff_')

            n = 2
            calc_part_features(data, n=n)
            calc_part_features(abs_data, n=n, prefix='abs_')
            calc_part_features(diff_data, n=n, prefix='diff_')

            n = 3
            calc_part_features(data, n=n)
            calc_part_features(abs_data, n=n, prefix='abs_')
            calc_part_features(diff_data, n=n, prefix='diff_')


        cnt += 1

        # if cnt >= 1000:
        #     break

    features = pd.DataFrame(features).T.reset_index()
    features.rename(columns={'index': 'fname'}, inplace=True)
    
    return features

In [18]:
train_data['id'] = train_data['id'].astype('str')
train_data['id'].loc[train_data['id'].str.len() == 5] = '-' + train_data['id'].loc[train_data['id'].str.len() == 5]
train_data.tail()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,417,418,419,420,421,422,423,424,id,labels
3968,-644.304467,43.769088,20.600399,25.566073,0.615168,17.255366,-0.293559,13.501394,-2.383148,12.806499,...,0.478269,32.385765,17.71179,3139.011269,97.391632,-0.177115,3343.471119,2935.36003,201673,0
3969,-414.45672,-43.620065,-93.900036,-65.899176,-49.250565,2.627686,22.137639,21.548692,7.090813,14.051358,...,0.555006,34.085298,1.354522,2739.413691,378.584259,-1.746263,3271.838006,950.374428,189124,1
3970,-254.885282,145.669051,-2.75553,-26.635543,-3.858492,12.156044,2.41056,12.728343,-3.671137,7.911865,...,1.364373,36.918631,4.514786,2006.76114,525.498462,0.245232,3608.330539,0.0,179531,1
3971,-318.532869,126.835176,11.064388,9.777978,-1.420109,12.56321,22.929329,14.415879,12.750367,9.554238,...,0.641927,40.863982,8.253314,1872.316921,366.100559,0.699289,3317.922538,698.237823,176129,1
3972,-344.450333,206.764661,-105.561713,11.7096,22.89461,-55.331799,9.614875,-1.490481,-39.378572,7.075187,...,-1.248771,59.929025,2.572144,1074.68772,67.689587,-5.417591,1263.382798,0.0,163123,1


In [37]:
path = './sounds/'
train_files = train_data.loc[train_data[train_data.columns[:-2]].any(axis=1)]['id'].values
train_features = extract_features(train_files, path)


  0%|          | 0/3652 [00:00<?, ?it/s][A
  0%|          | 1/3652 [00:00<13:25,  4.53it/s][A
  0%|          | 2/3652 [00:00<12:13,  4.98it/s][A
  0%|          | 4/3652 [00:00<10:06,  6.02it/s][A
  0%|          | 5/3652 [00:00<10:21,  5.87it/s][A
  0%|          | 6/3652 [00:01<10:24,  5.84it/s][A
  0%|          | 7/3652 [00:01<10:44,  5.65it/s][A
  0%|          | 8/3652 [00:01<10:17,  5.90it/s][A
  0%|          | 9/3652 [00:01<12:25,  4.89it/s][A
  0%|          | 10/3652 [00:01<12:07,  5.01it/s][A
  0%|          | 11/3652 [00:02<13:17,  4.57it/s][A
  0%|          | 12/3652 [00:02<14:04,  4.31it/s][A
  0%|          | 13/3652 [00:03<14:44,  4.12it/s][A
  0%|          | 14/3652 [00:03<14:34,  4.16it/s][A
  0%|          | 15/3652 [00:03<14:37,  4.14it/s][A
  0%|          | 17/3652 [00:03<13:43,  4.42it/s][A
  1%|          | 19/3652 [00:04<13:02,  4.64it/s][A
  1%|          | 20/3652 [00:05<17:01,  3.56it/s][A
  1%|          | 21/3652 [00:05<16:34,  3.65it/s][A
Exception

In [41]:
train_features.rename(columns={'fname':'id'})

Unnamed: 0,id,abs_max_1_1,abs_max_1_2,abs_max_1_3,abs_mean_1_1,abs_mean_1_2,abs_mean_1_3,abs_min_1_1,abs_min_1_2,abs_min_1_3,...,max_1_3,mean_1_1,mean_1_2,mean_1_3,min_1_1,min_1_2,min_1_3,std_1_1,std_1_2,std_1_3
0,-22726,31225.0,31225.0,31225.0,2184.843712,2101.954765,2155.916481,0.0,0.0,0.0,...,31153.0,14.300062,16.446491,16.785581,-31225.0,-31225.0,-31225.0,3088.945387,3149.604158,3400.659814
1,-25526,13275.0,12515.0,9223.0,727.386662,577.205133,594.468735,0.0,0.0,0.0,...,9223.0,-23.518439,-23.522498,-23.679666,-13035.0,-12515.0,-8619.0,1194.934001,861.104487,880.122898
2,-25593,9716.0,8043.0,7346.0,533.091225,492.555384,427.238398,0.0,0.0,0.0,...,7346.0,-0.021408,-0.036117,-0.017971,-9716.0,-7886.0,-7192.0,909.533247,852.738770,699.490879
3,-25667,11963.0,8010.0,7477.0,609.831819,535.684602,538.699965,0.0,0.0,0.0,...,7477.0,-4.772540,-4.715030,-4.850790,-11963.0,-7507.0,-7265.0,1201.083616,943.939403,950.354954
4,-25668,17386.0,14333.0,13363.0,1230.035881,1031.585906,990.376222,0.0,0.0,0.0,...,12334.0,-11.465824,-11.749057,-11.652404,-15754.0,-14262.0,-13363.0,2111.684991,1832.422712,1741.118352
5,-25669,10535.0,10535.0,10535.0,528.321772,534.280544,559.109201,0.0,0.0,0.0,...,10201.0,-4.615731,-4.639975,-4.579812,-10535.0,-10535.0,-10535.0,1103.250689,1124.915093,1146.488898
6,-25670,11920.0,8423.0,8191.0,656.989405,638.302133,626.249327,0.0,0.0,0.0,...,8191.0,-4.553197,-4.629821,-4.638208,-11568.0,-8101.0,-7873.0,1272.755382,1207.841813,1193.474710
7,-25672,15238.0,15238.0,15238.0,571.825354,652.703107,664.231119,0.0,0.0,0.0,...,15042.0,-4.743626,-4.719785,-4.731732,-15238.0,-15238.0,-15238.0,1233.160406,1437.577709,1565.828076
8,-25738,23928.0,23928.0,23928.0,1426.638896,1206.204529,1129.241841,0.0,0.0,0.0,...,21986.0,-21.222794,-20.831692,-20.816292,-23928.0,-23928.0,-23928.0,2075.714843,1742.753237,1709.959877
9,-25739,13580.0,10163.0,10163.0,730.396994,667.256337,740.956894,0.0,0.0,0.0,...,9686.0,-14.157132,-14.024228,-14.140639,-13009.0,-10163.0,-10163.0,1116.969023,948.776859,1064.016143
