> This experiment uses Lightgbm to fit statistical features extracted from the raw files.
>
> As you may know, Deep Learning/CNN is the right way to deal with this kind of challenge.
>
> But the challenge in this Kernel is to use pure statistics and GBDT to build good solutions.
>
> As extracting takes a lot of time, I preprocessed train and test locally and just uploaded files.
>
> Giba


In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import librosa
import matplotlib.pyplot as plt
import gc

from tqdm import tqdm, tqdm_notebook
from sklearn.metrics import label_ranking_average_precision_score
from sklearn.metrics import roc_auc_score

from joblib import Parallel, delayed
import lightgbm as lgb
from scipy import stats

from sklearn.model_selection import KFold

import warnings
warnings.filterwarnings('ignore')

def calculate_overall_lwlrap_sklearn(truth, scores):
    """Calculate the overall lwlrap using sklearn.metrics.lrap."""
    # sklearn doesn't correctly apply weighting to samples with no labels, so just skip them.
    sample_weight = np.sum(truth > 0, axis=1)
    nonzero_weight_sample_indices = np.flatnonzero(sample_weight > 0)
    overall_lwlrap = label_ranking_average_precision_score(
        truth[nonzero_weight_sample_indices, :] > 0, 
        scores[nonzero_weight_sample_indices, :], 
        sample_weight=sample_weight[nonzero_weight_sample_indices])
    return overall_lwlrap

tqdm.pandas()

In [3]:
test = pd.read_csv('../input/freesound-audio-tagging-2019/sample_submission.csv')

label_columns = list( test.columns[1:] )
label_mapping = dict((label, index) for index, label in enumerate(label_columns))

print(test.shape)

(4970, 2) (1120, 81)


Load preprocessed train and test

In [7]:
X     = np.load( '../input/freesoundpreproc1/LGB-train-1.npy' )
Xtest = np.load( '../input/freesoundpreproc1/LGB-test-1.npy' )
Y     = np.load( '../input/freesoundpreproc1/LGB-target.npy' )

X.shape, Xtest.shape, Y.shape

((17866, 149), (1120, 149), (17866, 80))

In [9]:
n_fold = 10
folds = KFold(n_splits=n_fold, shuffle=True, random_state=69)

params = {'num_leaves': 15,
         'min_data_in_leaf': 200, 
         'objective':'binary',
         "metric": 'auc',
         'max_depth': -1,
         'learning_rate': 0.05,
         "boosting": "gbdt",
         "bagging_fraction": 0.85,
         "bagging_freq": 1,
         "feature_fraction": 0.20,
         "bagging_seed": 42,
         "verbosity": -1,
         "nthread": -1,
         "random_state": 69}

PREDTRAIN = np.zeros( (X.shape[0],80) )
PREDTEST  = np.zeros( (Xtest.shape[0],80) )
for f in range(len(label_columns)):
    y = Y[:,f] #target label
    oof      = np.zeros( X.shape[0] )
    oof_test = np.zeros( Xtest.shape[0] )
    for fold_, (trn_idx, val_idx) in enumerate(folds.split(X,y)):
        model = lgb.LGBMClassifier(**params, n_estimators = 20000)
        model.fit(X[trn_idx,:], 
                  y[trn_idx], 
                  eval_set=[(X[val_idx,:], y[val_idx])], 
                  eval_metric='auc',
                  verbose=0, 
                  early_stopping_rounds=25)
        oof[val_idx] = model.predict_proba(X[val_idx,:], num_iteration=model.best_iteration_)[:,1]
        oof_test    += model.predict_proba(Xtest       , num_iteration=model.best_iteration_)[:,1]/n_fold

    PREDTRAIN[:,f] = oof    
    PREDTEST [:,f] = oof_test
    
    print( f, str(roc_auc_score( y, oof ))[:6], label_columns[f] )
    
print( 'Competition Metric Lwlrap cv:', calculate_overall_lwlrap_sklearn( Y, PREDTRAIN ) )

0 0.9078 Accelerating_and_revving_and_vroom
1 0.9265 Accordion
2 0.9402 Acoustic_guitar
3 0.9051 Applause
4 0.9118 Bark
5 0.9431 Bass_drum
6 0.9441 Bass_guitar
7 0.8304 Bathtub_(filling_or_washing)
8 0.8434 Bicycle_bell
9 0.8043 Burping_and_eructation
10 0.9236 Bus
11 0.7229 Buzz
12 0.8949 Car_passing_by
13 0.9415 Cheering
14 0.8745 Chewing_and_mastication
15 0.9283 Child_speech_and_kid_speaking
16 0.7691 Chink_and_clink
17 0.8936 Chirp_and_tweet
18 0.8934 Church_bell
19 0.8770 Clapping
20 0.8535 Computer_keyboard
21 0.8632 Crackle
22 0.8853 Cricket
23 0.9357 Crowd
24 0.7947 Cupboard_open_or_close
25 0.8580 Cutlery_and_silverware
26 0.8498 Dishes_and_pots_and_pans
27 0.8794 Drawer_open_or_close
28 0.8867 Drip
29 0.8681 Electric_guitar
30 0.8514 Fart
31 0.8679 Female_singing
32 0.8861 Female_speech_and_woman_speaking
33 0.8518 Fill_(with_liquid)
34 0.8982 Finger_snapping
35 0.9083 Frying_(food)
36 0.7916 Gasp
37 0.8559 Glockenspiel
38 0.9073 Gong
39 0.9626 Gurgling
40 0.8837 Harmonica
4

In [11]:
test[label_columns] = PREDTEST
test.to_csv('submission.csv', index=False)
test.head()

Unnamed: 0,fname,Accelerating_and_revving_and_vroom,Accordion,Acoustic_guitar,Applause,Bark,Bass_drum,Bass_guitar,Bathtub_(filling_or_washing),Bicycle_bell,Burping_and_eructation,Bus,Buzz,Car_passing_by,Cheering,Chewing_and_mastication,Child_speech_and_kid_speaking,Chink_and_clink,Chirp_and_tweet,Church_bell,Clapping,Computer_keyboard,Crackle,Cricket,Crowd,Cupboard_open_or_close,Cutlery_and_silverware,Dishes_and_pots_and_pans,Drawer_open_or_close,Drip,Electric_guitar,Fart,Female_singing,Female_speech_and_woman_speaking,Fill_(with_liquid),Finger_snapping,Frying_(food),Gasp,Glockenspiel,Gong,...,Harmonica,Hi-hat,Hiss,Keys_jangling,Knock,Male_singing,Male_speech_and_man_speaking,Marimba_and_xylophone,Mechanical_fan,Meow,Microwave_oven,Motorcycle,Printer,Purr,Race_car_and_auto_racing,Raindrop,Run,Scissors,Screaming,Shatter,Sigh,Sink_(filling_or_washing),Skateboard,Slam,Sneeze,Squeak,Stream,Strum,Tap,Tick-tock,Toilet_flush,Traffic_noise_and_roadway_noise,Trickle_and_dribble,Walk_and_footsteps,Water_tap_and_faucet,Waves_and_surf,Whispering,Writing,Yell,Zipper_(clothing)
0,000ccb97.wav,0.001701,0.003465,0.005784,0.002173,0.012073,0.006242,0.002575,0.01475,0.047343,0.018991,0.005114,0.010724,0.005353,0.002466,0.011606,0.007054,0.03743,0.123227,0.005719,0.031604,0.012731,0.010826,0.022032,0.002971,0.026874,0.065533,0.085551,0.004328,0.009503,0.00845,0.01098,0.004067,0.003292,0.010778,0.102108,0.003154,0.036711,0.006091,0.007821,...,0.006402,0.186183,0.050988,0.171281,0.010754,0.002038,0.00467,0.032471,0.010836,0.011894,0.013356,0.000827,0.013836,0.006198,0.002601,0.019029,0.007605,0.011166,0.033381,0.113313,0.007103,0.008203,0.014486,0.006221,0.008932,0.044437,0.007579,0.008924,0.029098,0.006852,0.007989,0.001585,0.015085,0.008546,0.01183,0.0026,0.014909,0.014685,0.018834,0.021447
1,0012633b.wav,0.043288,0.004765,0.0069,0.005487,0.015202,0.004686,0.006452,0.022611,0.010462,0.02239,0.013816,0.015363,0.041319,0.002985,0.040197,0.007133,0.008474,0.015052,0.028771,0.009913,0.070193,0.044052,0.031364,0.004928,0.021136,0.011766,0.010441,0.019389,0.013524,0.007891,0.010772,0.016518,0.010283,0.013182,0.008777,0.018699,0.016299,0.007019,0.007234,...,0.008714,0.006993,0.014951,0.014322,0.012754,0.006738,0.00702,0.008221,0.021438,0.029574,0.020073,0.046411,0.024924,0.073246,0.011561,0.008836,0.044392,0.016318,0.007874,0.007907,0.01806,0.036442,0.013169,0.010956,0.00882,0.040564,0.019688,0.012835,0.006325,0.021711,0.027569,0.028849,0.01036,0.08829,0.036637,0.052185,0.047691,0.087686,0.009724,0.070977
2,001ed5f1.wav,0.007671,0.002849,0.006193,0.005636,0.035427,0.003122,0.002564,0.030687,0.009929,0.015878,0.005172,0.013116,0.025639,0.00236,0.032685,0.013589,0.020729,0.010208,0.006298,0.085823,0.070527,0.013212,0.010005,0.004673,0.034943,0.044784,0.044952,0.149141,0.01495,0.006144,0.018467,0.005525,0.018998,0.025845,0.035531,0.006336,0.026798,0.004962,0.004621,...,0.009383,0.006396,0.014552,0.021231,0.060768,0.0036,0.006575,0.00781,0.010977,0.02769,0.052321,0.008458,0.013906,0.01231,0.003723,0.011183,0.025731,0.031852,0.015984,0.079118,0.030774,0.019499,0.052701,0.074969,0.179519,0.042464,0.008699,0.009911,0.014626,0.036715,0.013758,0.00607,0.015382,0.025617,0.015145,0.002915,0.020527,0.042038,0.031089,0.045184
3,00294be0.wav,0.01022,0.00305,0.006721,0.003786,0.01286,0.002891,0.002695,0.030088,0.010429,0.013716,0.007062,0.01981,0.038327,0.001689,0.088642,0.013676,0.009903,0.019943,0.011506,0.01039,0.090408,0.023384,0.06868,0.003337,0.023706,0.011499,0.01005,0.035739,0.023063,0.006036,0.00968,0.008147,0.004111,0.016224,0.011778,0.010391,0.018617,0.005704,0.009979,...,0.007332,0.005147,0.017645,0.016208,0.016605,0.003061,0.006693,0.006005,0.01622,0.038075,0.017743,0.013133,0.023955,0.187232,0.002944,0.011556,0.038248,0.021309,0.007643,0.006742,0.021274,0.017979,0.017679,0.009952,0.010671,0.016195,0.016862,0.008817,0.010971,0.143709,0.017017,0.002932,0.011373,0.086058,0.026009,0.020457,0.098102,0.132359,0.009708,0.099416
4,003fde7a.wav,0.003534,0.005038,0.012481,0.004631,0.012968,0.00373,0.003668,0.037293,0.614078,0.015593,0.005991,0.014871,0.011136,0.003979,0.010602,0.006838,0.313897,0.098104,0.108955,0.011213,0.028588,0.012039,0.024335,0.004009,0.016403,0.035801,0.024263,0.007551,0.006813,0.011213,0.016197,0.016119,0.002914,0.014758,0.01871,0.007284,0.018883,0.086364,0.049771,...,0.073677,0.015447,0.072272,0.141474,0.01221,0.004299,0.002554,0.086359,0.01221,0.023487,0.016602,0.000991,0.018055,0.009611,0.004544,0.012035,0.012272,0.103961,0.044982,0.077004,0.00864,0.035273,0.008729,0.005741,0.02281,0.038267,0.012573,0.043493,0.012674,0.035545,0.049425,0.004789,0.009789,0.009015,0.060554,0.002568,0.016323,0.018379,0.022514,0.031863
