Extraction of soundnet feature vectors for ESC-50

In [2]:
#--mesure time
import time 
#soundnet in pytorch
import soundnet.extract_features as ex
import pandas as pd 
import librosa
from tqdm import tqdm
import numpy as np

In [3]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('bmh')

Extracting all feature vectors for ESC-10

In [4]:
import librosa

def extract_vectors_resampl(filepath):
    audio,sr = librosa.load(path=filepath,sr=22050)
    features = ex.extract_pytorch_feature_nooutput(audio,'./soundnet/sound8.pth')
   
    #print([x.shape for x in features])
    
    ##extract vector
    vectors = []
    for idlayer in range(len(features)):
        vectors.append(ex.extract_vector(features,idlayer)) #features vector 
    return vectors, len(audio)/sr

In [4]:
vec,siz= extract_vectors_resampl('/home/nfarrugi/git/ESC-50/audio/4-195497-A-11.wav')

In [5]:
len(vec)

7

Fetching the file list for ESC10

In [5]:
import pandas as pd

Df = pd.read_csv('/home/nfarrugi/git/ESC-50/meta/esc50.csv')

In [9]:
import os 
from tqdm import tqdm
pathesc = '/home/nfarrugi/git/ESC-50/audio/'
listfiles = os.listdir(pathesc)

all_fv_esc10 = dict()

#for curfile in tqdm(listfiles[:10]):

for curfile in tqdm(Df[Df['esc10']]['filename']):
    
    curid = curfile[:-4]
        
    vec,siz= extract_vectors_resampl(os.path.join(pathesc,curfile))
    
    all_fv_esc10[curid] = vec
    

100%|██████████| 400/400 [02:01<00:00,  3.24it/s]


In [10]:
np.savez_compressed('/home/nfarrugi/esc10-allfv.npz',all_fv_esc10 = all_fv_esc10)

In [6]:
all_fv_esc10 = np.load('/home/nfarrugi/esc10-allfv.npz')['all_fv_esc10']

Let's build the X and y for classification. We will use the predefined folds

In [11]:
Df_esc10 = Df[Df['esc10']]

y = Df_esc10['category']

group = Df_esc10['fold']

In [26]:
layer = 6 # 6 for conv7 layer

X = []

for curstim in Df_esc10['filename']:
    curid = curstim[:-4]
    curfv = all_fv_esc10[curid][layer].mean(axis=0)
    #curfv = all_fv_esc10[curid][layer]
    X.append(curfv)
X = np.stack(X)

In [27]:
X= X.reshape(400,-1)

In [28]:
X.shape

(400, 1024)

In [33]:
from sklearn.model_selection import LeaveOneGroupOut,cross_validate

from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC,LinearSVC


from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

est = make_pipeline(StandardScaler(),SelectKBest(k=100),MLPClassifier(max_iter=5000))

scores = cross_validate(estimator=est,X=X,y=y,groups=group,cv = LeaveOneGroupOut(),return_train_score=False)

print(scores['test_score'])

   57   59   61   66   67   71   72   74   84   89   95  102  108  109
  114  129  132  136  140  154  155  158  174  182  188  189  191  193
  195  199  200  201  206  211  214  223  225  226  228  229  246  247
  248  250  253  258  263  273  274  276  281  287  293  300  301  303
  306  310  314  317  319  321  323  324  326  331  332  335  339  341
  342  345  367  368  389  393  398  404  406  408  412  419  420  423
  426  429  431  436  440  444  451  459  460  461  464  465  477  488
  490  492  494  498  506  513  516  520  522  529  535  536  541  547
  553  556  557  559  560  572  573  578  579  588  601  609  620  626
  628  629  632  637  640  646  647  649  652  667  669  672  673  682
  684  687  689  705  707  709  714  715  721  734  736  737  739  743
  751  761  763  766  776  778  781  787  791  801  803  805  810  816
  820  823  828  829  830  834  835  848  849  858  865  873  877  882
  885  888  892  901  904  906  909  911  914  924  926  929  930  935
  939 

[0.6375 0.5625 0.6625 0.6875 0.675 ]


Can adding an encoding model that estimates fMRI from conv5 help training on ESC-10 ? 

In [16]:
from joblib import dump, load

estim = load('fmri_video/code/encoding_conv7.joblib')

Estimate brain activity using ESC-10 feature vectors

In [29]:
brain_predictions = estim.predict(X)

In [30]:
brain_predictions.shape

(400, 44690)

In [31]:
from sklearn.feature_selection import SelectKBest

X_brain = np.hstack([X,brain_predictions])

est = make_pipeline(StandardScaler(),MLPClassifier(max_iter=1000,verbose=0))

scores = cross_validate(estimator=est,X=X_brain,y=y,groups=group,cv = LeaveOneGroupOut(),return_train_score=False)

print(scores['test_score'])


   57   59   61   66   67   71   72   74   84   89   95  102  108  109
  114  129  132  136  140  154  155  158  174  182  188  189  191  193
  195  199  200  201  206  211  214  223  225  226  228  229  246  247
  248  250  253  258  263  273  274  276  281  287  293  300  301  303
  306  310  314  317  319  321  323  324  326  331  332  335  339  341
  342  345  367  368  389  393  398  404  406  408  412  419  420  423
  426  429  431  436  440  444  451  459  460  461  464  465  477  488
  490  492  494  498  506  513  516  520  522  529  535  536  541  547
  553  556  557  559  560  572  573  578  579  588  601  609  620  626
  628  629  632  637  640  646  647  649  652  667  669  672  673  682
  684  687  689  705  707  709  714  715  721  734  736  737  739  743
  751  761  763  766  776  778  781  787  791  801  803  805  810  816
  820  823  828  829  830  834  835  848  849  858  865  873  877  882
  885  888  892  901  904  906  909  911  914  924  926  929  930  935
  939 

[0.6    0.675  0.6625 0.7375 0.7375]


Same tests on ESC-50

In [128]:
pathesc = '/home/nfarrugi/git/ESC-50/audio/'
listfiles = os.listdir(pathesc)

all_fv_esc50 = dict()

#for curfile in tqdm(listfiles[:10]):

for curfile in (tqdm(listfiles)):
    
    curid = curfile[:-4]
        
    vec,siz= extract_vectors_resampl(os.path.join(pathesc,curfile))
    
    all_fv_esc50[curid] = vec

  6%|▌         | 112/2000 [00:32<09:21,  3.37it/s]

KeyboardInterrupt: 

In [105]:
np.savez_compressed('/home/nfarrugi/esc50-allfv.npz',all_fv_esc50 = all_fv_esc50)



In [129]:
all_fv_esc50 = np.load('/home/nfarrugi/esc50-allfv.npz')['all_fv_esc50']

In [130]:
y = Df['category']

group = Df['fold']

layer = 4 # 6 for conv7 layer

X = []

for curstim in Df['filename']:
    curid = curstim[:-4]
    curfv = all_fv_esc50[curid][layer].mean(axis=0)
    #curfv = all_fv_esc10[curid][layer]
    X.append(curfv)
X = np.stack(X)

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [119]:

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline


est = make_pipeline(StandardScaler(),LinearSVC(max_iter=5000,C=0.01))


scores = cross_validate(estimator=est,X=X,y=y,groups=group,cv = LeaveOneGroupOut(),return_train_score=False)

print(scores['test_score'])

[0.4225 0.42   0.455  0.5    0.475 ]
