# __Feature Selection__

References: <br>
- Bird songs - pad and resize spectrogram <br>
https://www.kaggle.com/code/leirahua/bird-songs-pad-and-resize-spectrogram/notebook 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from pathlib import Path

In [2]:
from scipy import signal
from scipy.fft import fftshift
from scipy.io.wavfile import read

In [3]:
from skimage.transform import resize
from skimage.io import imread

In [4]:
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [5]:
import librosa
from librosa.feature import melspectrogram, mfcc

In [6]:
from audio_utils import *

In [30]:
import PIL.Image as pimg

In [7]:
Categories=['Cough', 'Sneeze']
datadir='data/input/' 

In [8]:
Categories[0]

'Cough'

In [17]:
sampling_rate = 44100

In [18]:
x_cgh_0, sr_cgh_0 = librosa.load(os.path.join(datadir, Categories[0], 'cough_0001_1.wav'), sr=sampling_rate)
x_cgh_1, sr_cgh_1 = librosa.load(os.path.join(datadir, Categories[0], 'cough_0002_1.wav'), sr=sampling_rate)
x_snz_0, sr_snz_0 = librosa.load(os.path.join(datadir, Categories[1], 'sneeze_0001.wav'), sr=sampling_rate)
x_snz_1, sr_snz_1 = librosa.load(os.path.join(datadir, Categories[1], 'sneeze_0003.wav'), sr=sampling_rate)

In [19]:
sr_cgh_0, sr_cgh_1, sr_snz_0, sr_snz_1

(44100, 44100, 44100, 44100)

In [20]:
x_cgh_0.ndim

1

In [29]:
def extract_features(filename, sampling_rate=44100):
    data, s_rate = librosa.load(filename, sr=sampling_rate)
    
    # short term fourier transform
    stft = np.abs(librosa.stft(data))
    
    # mfcc (mel-frequency cepstrum)
    mfccs = np.mean(librosa.feature.mfcc(y=data, sr=s_rate, n_mfcc=40).T,axis=0)

    # chroma
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=s_rate).T,axis=0)

    # melspectrogram
    mel = np.mean(librosa.feature.melspectrogram(y=data, sr=s_rate).T,axis=0)

    # spectral contrast
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=s_rate).T,axis=0)

    # tonnetz
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(data), sr=s_rate).T,axis=0)
    
    return mfccs, chroma, mel, contrast, tonnetz

In [23]:
# short term fourier transform
stft = np.abs(librosa.stft(x_cgh_0))

# mfcc (mel-frequency cepstrum)
mfccs = np.mean(librosa.feature.mfcc(y=x_cgh_0, sr=sampling_rate, n_mfcc=40).T,axis=0)

# chroma
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sampling_rate).T,axis=0)

# melspectrogram
mel = np.mean(librosa.feature.melspectrogram(y=x_cgh_0, sr=sampling_rate).T,axis=0)

# spectral contrast
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sampling_rate).T,axis=0)

# tonnetz
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(x_cgh_0), sr=sampling_rate).T,axis=0)

In [24]:
stft

array([[3.2996049e-06, 3.0444819e-05, 2.5469426e-05, ..., 5.1485799e-02,
        1.8612771e-01, 5.3048021e-01],
       [1.2463125e-05, 3.7660346e-05, 5.4315671e-05, ..., 1.9293189e-01,
        4.1957361e-01, 5.5440718e-01],
       [1.3319244e-05, 3.4360499e-05, 2.6463593e-05, ..., 2.3372597e-01,
        4.3394649e-01, 4.7682282e-01],
       ...,
       [1.2656720e-03, 2.9768653e-03, 2.2084846e-03, ..., 3.3486017e-03,
        2.7631086e-03, 9.0701680e-04],
       [8.0661080e-04, 1.8971226e-03, 4.6038260e-03, ..., 1.6320058e-03,
        1.7641882e-03, 5.9547788e-04],
       [7.8795024e-04, 2.7241088e-03, 6.2575294e-03, ..., 2.3794738e-03,
        1.9540647e-03, 8.5947395e-04]], dtype=float32)

In [25]:
mfccs

array([-4.6589490e+02,  1.4485303e+02, -3.9907974e+01,  4.9108558e+00,
       -1.3923359e+01, -8.1866384e-01, -3.3151114e+00, -6.8905773e+00,
        1.0387737e+00,  6.7915976e-01,  1.3535534e+01,  6.2752490e+00,
        9.0014744e+00, -9.5741719e-01, -5.0450726e+00, -2.0340781e+00,
       -2.0237420e-01,  1.5886880e-01, -9.1868095e+00, -2.5505562e+00,
        2.2099690e+00, -5.3315139e+00, -6.4374261e+00, -1.8383557e+00,
       -2.7426693e+00, -5.0907416e+00,  1.9375498e+00, -2.7557114e-01,
        5.5409450e-02, -4.5949354e+00, -1.9091806e+00,  5.0546150e+00,
       -2.1549180e+00, -4.4223185e+00, -2.8740797e+00,  2.1490483e+00,
        4.3547087e+00,  3.3248465e+00,  5.8356128e+00,  3.7910025e+00],
      dtype=float32)

In [26]:
chroma

array([0.51513875, 0.59569013, 0.60699993, 0.6449531 , 0.71443146,
       0.68778026, 0.7000103 , 0.6783201 , 0.7618174 , 0.76426876,
       0.62942404, 0.49195316], dtype=float32)

In [27]:
mel

array([2.2305558e-02, 4.3397355e-03, 7.9138465e-03, 2.7999159e-02,
       7.5627214e-01, 4.6998620e+00, 5.5987663e+00, 3.9556858e-01,
       2.9587394e-02, 1.7624086e-02, 2.7630175e-02, 1.3202353e-01,
       2.1424103e-01, 1.8107158e-01, 3.2049425e-02, 1.1467484e-02,
       2.8949456e-02, 1.0593116e-01, 2.2515263e-01, 8.2295090e-01,
       7.7835751e-01, 1.8928555e-01, 2.0323771e-01, 3.0641401e-01,
       9.4510454e-01, 8.4187698e-01, 1.1021043e+00, 6.3990128e-01,
       5.3945422e-01, 4.0952328e-01, 1.6323711e-01, 1.0968590e-01,
       1.1177710e-01, 2.6262501e-01, 2.8281027e-01, 1.5059820e-01,
       6.9392994e-02, 4.3608516e-02, 2.6541768e-02, 6.2940128e-02,
       1.8925345e-01, 2.3555738e-01, 1.4248748e-01, 6.6932023e-02,
       7.5672351e-02, 8.8857688e-02, 7.5292699e-02, 1.2648378e-01,
       2.1565741e-01, 1.8842579e-01, 4.4858503e-01, 2.1254627e-01,
       7.6738372e-02, 3.1903990e-02, 4.4150155e-02, 4.1014314e-02,
       3.2004800e-02, 3.9255925e-02, 5.2212942e-02, 4.3109361e