This is with MFCC and XGBoost

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import glob
import os
import gc
import time
from scipy.interpolate import interp1d
import lightgbm as lgb
import xgboost as xgb
from joblib import Parallel, delayed
from tqdm.notebook import tqdm
from scipy.stats import rankdata

from sklearn.model_selection import GroupKFold
from sklearn.metrics import roc_auc_score, label_ranking_average_precision_score

from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn import svm
from sklearn.ensemble import VotingClassifier

import soundfile as sf
# Librosa Libraries
import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt

import cuml as cm
import cupy as cp

In [None]:
trainfiles = glob.glob( '../input/rfcx-species-audio-detection/train/*.flac' )
testfiles = glob.glob( '../input/rfcx-species-audio-detection/test/*.flac' )
len(trainfiles), len(testfiles), trainfiles[0]

In [None]:
traint = pd.read_csv( '../input/rfcx-species-audio-detection/train_tp.csv' )
trainf = pd.read_csv( '../input/rfcx-species-audio-detection/train_fp.csv' )
traint.shape, trainf.shape

In [None]:
traint.head()

In [None]:
trainf.head()

In [None]:
df_for_zero = traint[traint.species_id == 0]['recording_id']
df_for_zero.reset_index(inplace=True, drop=True)
#print(df_for_zero)
print(type(df_for_zero), len(df_for_zero),df_for_zero.get(0))

In [None]:
fn = df_for_zero.get(1)
data, samplerate = sf.read('../input/rfcx-species-audio-detection/train/'+fn+'.flac')
print( data.shape, samplerate )
print (data.shape[0])
librosa.display.waveplot(y = data, sr = samplerate, color = "#B14D")
ipd.Audio('../input/rfcx-species-audio-detection/train/'+fn+'.flac')

In [None]:
print(type(data),data.shape)


In [None]:
data1 = cp.array(data)
print(type(data1),data1.shape)
varfft = cp.abs( cp.fft.fft(data1)[:(len(data1)//2)] )
print(type(varfft),varfft.shape)
varfft1 = cp.asnumpy( varfft.reshape( (1000,1440) ).mean(axis=1) )
print(type(varfft1),varfft1.shape)


In [None]:
mfccs = librosa.feature.mfcc(y=data, sr=samplerate , n_mfcc=40)
print(type(mfccs),mfccs.shape)
mfccs_scaled = np.mean(mfccs.T,axis=0)
print(type(mfccs_scaled),mfccs_scaled.shape)


In [None]:
import matplotlib.pyplot
plt.plot(mfccs_scaled)
plt.show()

In [None]:
plt.plot(varfft1)
plt.show()

In [None]:
def extract_fft(fn):
    data, samplerate = sf.read(fn)
    data = cp.array(data)

    varfft = cp.abs( cp.fft.fft(data)[:(len(data)//2)] )
    
    return cp.asnumpy( varfft.reshape( (1000,1440) ).mean(axis=1) )

In [None]:
def extract_mfccs(fn):
    data, samplerate = sf.read(fn)
    mfccs = librosa.feature.mfcc(y=data, sr=samplerate , n_mfcc=40)
    
    return np.mean(mfccs.T,axis=0)

In [None]:
FT = []
for fn in tqdm(traint.recording_id.values):
    FT.append( extract_mfccs( '../input/rfcx-species-audio-detection/train/'+fn+'.flac' ) )
FT = np.stack(FT)
gc.collect()

FT.shape

In [None]:
# This loop runs in 7min using cupy(GPU) and 40min on numpy(CPU). ~7x Faster in GPU

FF = []
for fn in tqdm(trainf.recording_id.values):
    FF.append( extract_mfccs( '../input/rfcx-species-audio-detection/train/'+fn+'.flac' ) )
FF = np.stack(FF)
gc.collect()

FF.shape

In [None]:
#Combine True Positives and False Positives

TRAIN = np.vstack( (FT, FF) )

del FT, FF
gc.collect()
TRAIN.shape

In [None]:
TEST = []
for fn in tqdm(testfiles):
    TEST.append( extract_mfccs(fn) )
TEST = np.stack(TEST)
gc.collect()

TEST.shape

In [None]:
tt = traint[['recording_id','species_id']].copy()
tf = trainf[['recording_id','species_id']].copy()
tf['species_id'] = -1

TRAIN_TAB = pd.concat( (tt, tf) )

for i in range(24):
    TRAIN_TAB['s'+str(i)] = 0
    TRAIN_TAB.loc[TRAIN_TAB.species_id==i,'s'+str(i)] = 1

TRAIN_TAB.head()

In [None]:
from sklearn.preprocessing import StandardScaler

std = StandardScaler()
std.fit( np.vstack((TRAIN,TEST)) )

TRAIN = std.transform(TRAIN)
TEST  = std.transform(TEST)
gc.collect()

In [None]:
sub = pd.DataFrame({'recording_id': [f.split('/')[-1].split('.')[0] for f in testfiles] })
gkf = GroupKFold(5)

SCORE = []
groups = TRAIN_TAB['recording_id'].values
for tgt in range(0,24):
    starttime = time.time()
    target = TRAIN_TAB['s'+str(tgt)].values

    ytrain = np.zeros(TRAIN.shape[0])
    ytest = np.zeros(TEST.shape[0])
    for ind_train, ind_valid in gkf.split( TRAIN, target, groups ):
        
        # Define 4 models
        clf1 = LogisticRegression(random_state=49)
        clf4 = XGBClassifier(reg_alpha=3, reg_lambda=1,gamma=5, n_estimators=2, objective='binary:logistic',                 
                                     learning_rate=1, max_delta_step=0, max_depth=2)
        clf5 = svm.SVC(C=0.05, degree=2, gamma='scale', shrinking=True, kernel='poly',probability=True)
        clf6 = RandomForestClassifier(n_estimators=3, random_state=2)
        clf7 = svm.SVC(C=0.01, degree=1, gamma='scale', shrinking=True, kernel='linear',probability=True)


        eclf = VotingClassifier(estimators=[('lr', clf1),('xgb', clf4),('svm', clf5),
                                           ('rf2', clf6),('svm2', clf7)],voting='soft')

        
        # Train using GPUs
        eclf.fit( X=TRAIN[ind_train], y=target[ind_train] )

        # Predict valid and test sets
        yvalid1 = eclf.predict_proba(TRAIN[ind_valid])[:,1]
        ytest1 = eclf.predict_proba(TEST)[:,1]
  
        
        #Rank predictions
        #Weighted average models
        ytrain[ind_valid] = yvalid1
        ytest += ytest1 / 5.

    score = roc_auc_score(target, ytrain)
    print( 'Target AUC', tgt, score, time.time()-starttime )
    SCORE.append(score)
    
    TRAIN_TAB['y'+str(tgt)] = ytrain
    sub['s'+str(tgt)] = ytest

print('Overall Score:', np.mean(SCORE) )

In [None]:
sub.to_csv('submission.csv', index=False)

In [None]:
!ls