In [11]:
import IPython.display as ipd
import librosa
import librosa.display
import numpy as np
import pandas as pd
from scipy import stats

In [12]:
#filename = './files/1.wav'
filename = "./files/1.mp3"

print('File: {}'.format(filename))
x, sr = librosa.load(filename, sr=None, mono=True)
print('Duration: {:.2f}s, {} samples'.format(x.shape[-1] / sr, x.size))

File: ./files/1.mp3


  return f(*args, **kwargs)


Duration: 55.54s, 2449408 samples


In [13]:
def columns():
    feature_sizes = dict(chroma_stft=12, chroma_cqt=12, chroma_cens=12,
                         tonnetz=6, mfcc=20, rmse=1, zcr=1,
                         spectral_centroid=1, spectral_bandwidth=1,
                         spectral_contrast=7, spectral_rolloff=1)
    moments = ('mean', 'std', 'skew', 'kurtosis', 'median', 'min', 'max')

    columns = []
    for name, size in feature_sizes.items():
        for moment in moments:
            it = ((name, moment, '{:02d}'.format(i+1)) for i in range(size))
            columns.extend(it)

    names = ('feature', 'statistics', 'number')
    columns = pd.MultiIndex.from_tuples(columns, names=names)

    return columns.sort_values()

In [14]:
features = pd.Series(index=columns(), dtype=np.float32, name=1)

def feature_stats(name, values):
        features[name, 'mean'] = np.mean(values, axis=1)
        features[name, 'std'] = np.std(values, axis=1)
        features[name, 'skew'] = stats.skew(values, axis=1)
        features[name, 'kurtosis'] = stats.kurtosis(values, axis=1)
        features[name, 'median'] = np.median(values, axis=1)
        features[name, 'min'] = np.min(values, axis=1)
        features[name, 'max'] = np.max(values, axis=1)

In [15]:
f = librosa.feature.zero_crossing_rate(x, frame_length=2048, hop_length=512)
feature_stats('zcr', f)

cqt = np.abs(librosa.cqt(x, sr=sr, hop_length=512, bins_per_octave=12,
                                 n_bins=7*12, tuning=None))
assert cqt.shape[0] == 7 * 12
assert np.ceil(len(x)/512) <= cqt.shape[1] <= np.ceil(len(x)/512)+1

f = librosa.feature.chroma_cqt(C=cqt, n_chroma=12, n_octaves=7)
feature_stats('chroma_cqt', f)
f = librosa.feature.chroma_cens(C=cqt, n_chroma=12, n_octaves=7)
feature_stats('chroma_cens', f)
f = librosa.feature.tonnetz(chroma=f)
feature_stats('tonnetz', f)

del cqt
stft = np.abs(librosa.stft(x, n_fft=2048, hop_length=512))
assert stft.shape[0] == 1 + 2048 // 2
assert np.ceil(len(x)/512) <= stft.shape[1] <= np.ceil(len(x)/512)+1
del x

f = librosa.feature.chroma_stft(S=stft**2, n_chroma=12)
feature_stats('chroma_stft', f)

f = librosa.feature.rms(S=stft)
feature_stats('rmse', f)

f = librosa.feature.spectral_centroid(S=stft)
feature_stats('spectral_centroid', f)
f = librosa.feature.spectral_bandwidth(S=stft)
feature_stats('spectral_bandwidth', f)
f = librosa.feature.spectral_contrast(S=stft, n_bands=6)
feature_stats('spectral_contrast', f)
f = librosa.feature.spectral_rolloff(S=stft)
feature_stats('spectral_rolloff', f)

mel = librosa.feature.melspectrogram(sr=sr, S=stft**2)
del stft
f = librosa.feature.mfcc(S=librosa.power_to_db(mel), n_mfcc=20)
feature_stats('mfcc', f)

In [16]:
features = features.to_frame().T
print(features.shape)

(1, 518)


In [17]:
columns = ['mfcc', 'chroma_cens', 'tonnetz', 'spectral_contrast']
columns.append(['spectral_centroid', 'spectral_bandwidth', 'spectral_rolloff'])
columns.append(['rmse', 'zcr'])
for column in columns:
    ipd.display(features[column].head().style.format('{:.2f}'))

statistics,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,median,median,median,median,median,median,median,median,median,median,median,median,median,median,median,median,median,median,median,median,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std
number,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20
1,-0.62,2.63,-0.69,0.33,-0.86,0.03,-0.45,-0.02,0.89,0.53,-0.11,-0.05,0.17,0.24,-0.03,0.09,-0.11,0.22,0.1,0.21,-15.22,233.39,117.2,131.69,68.32,77.78,43.94,42.16,34.4,43.32,28.99,31.67,40.98,28.12,30.86,20.76,24.83,30.45,24.34,25.48,-192.66,144.86,16.34,51.46,5.89,26.5,-3.73,0.73,0.94,-5.49,-1.86,-1.19,3.58,-1.2,-1.01,-4.66,-0.47,-1.38,-1.26,-1.76,-169.1,149.78,9.35,54.0,7.0,26.95,-3.42,0.7,1.3,-5.3,-0.97,-1.5,3.69,-1.04,-0.76,-4.28,-0.38,-1.27,-1.12,-1.61,-464.07,-50.83,-74.07,-30.23,-85.2,-28.25,-57.84,-45.37,-51.07,-53.42,-37.68,-33.96,-28.84,-41.37,-28.38,-33.43,-27.17,-32.14,-31.67,-31.77,-0.58,-1.17,0.32,-0.05,-0.1,-0.01,-0.09,0.02,-0.51,0.03,-0.25,0.02,-0.06,-0.23,0.01,-0.12,-0.06,-0.14,-0.19,-0.15,100.09,36.06,42.49,24.47,25.55,14.99,16.42,12.27,12.47,12.51,10.27,9.74,9.1,9.29,8.85,7.85,7.82,7.86,7.61,8.4


statistics,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,max,max,max,max,max,max,max,max,max,max,max,max,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,median,median,median,median,median,median,median,median,median,median,median,median,min,min,min,min,min,min,min,min,min,min,min,min,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,std,std,std,std,std,std,std,std,std,std,std,std
number,01,02,03,04,05,06,07,08,09,10,11,12,01,02,03,04,05,06,07,08,09,10,11,12,01,02,03,04,05,06,07,08,09,10,11,12,01,02,03,04,05,06,07,08,09,10,11,12,01,02,03,04,05,06,07,08,09,10,11,12,01,02,03,04,05,06,07,08,09,10,11,12,01,02,03,04,05,06,07,08,09,10,11,12
1,-0.43,-0.48,-0.09,-0.37,0.04,-0.74,-0.57,0.13,1.5,5.66,-0.52,-0.78,0.55,0.6,0.66,0.83,0.88,0.78,0.67,0.84,0.65,0.53,0.56,0.78,0.2,0.25,0.3,0.36,0.31,0.29,0.21,0.21,0.12,0.06,0.14,0.24,0.19,0.25,0.3,0.37,0.3,0.29,0.18,0.12,0.05,0.02,0.11,0.21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.35,-0.07,0.08,0.21,0.51,0.24,0.58,1.04,1.57,2.21,0.72,0.45,0.11,0.13,0.16,0.18,0.17,0.19,0.17,0.22,0.16,0.09,0.13,0.19


statistics,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,max,max,max,max,max,max,mean,mean,mean,mean,mean,mean,median,median,median,median,median,median,min,min,min,min,min,min,skew,skew,skew,skew,skew,skew,std,std,std,std,std,std
number,01,02,03,04,05,06,01,02,03,04,05,06,01,02,03,04,05,06,01,02,03,04,05,06,01,02,03,04,05,06,01,02,03,04,05,06,01,02,03,04,05,06
1,1.17,0.9,-0.56,2.21,-0.17,-0.04,0.27,0.25,0.59,0.8,0.17,0.13,-0.02,-0.01,0.08,0.0,-0.0,-0.02,-0.03,0.0,0.08,-0.03,-0.0,-0.01,-0.24,-0.43,-0.43,-0.59,-0.22,-0.19,0.87,-0.51,0.1,1.26,-0.19,-0.25,0.08,0.1,0.22,0.24,0.06,0.05


statistics,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,max,max,max,max,max,max,max,mean,mean,mean,mean,mean,mean,mean,median,median,median,median,median,median,median,min,min,min,min,min,min,min,skew,skew,skew,skew,skew,skew,skew,std,std,std,std,std,std,std
number,01,02,03,04,05,06,07,01,02,03,04,05,06,07,01,02,03,04,05,06,07,01,02,03,04,05,06,07,01,02,03,04,05,06,07,01,02,03,04,05,06,07,01,02,03,04,05,06,07
1,2.03,-0.06,1.19,-0.3,1.6,2.97,-0.44,63.99,43.12,45.33,35.14,45.75,47.94,52.56,22.11,18.65,18.47,17.68,18.83,21.16,36.45,21.56,18.55,17.78,17.03,18.0,20.36,37.02,6.36,4.91,4.62,6.66,5.77,11.51,8.2,0.88,0.23,0.87,0.49,1.03,1.39,-0.24,5.94,5.27,5.69,4.56,5.09,4.82,7.37


feature,spectral_centroid,spectral_centroid,spectral_centroid,spectral_centroid,spectral_centroid,spectral_centroid,spectral_centroid,spectral_bandwidth,spectral_bandwidth,spectral_bandwidth,spectral_bandwidth,spectral_bandwidth,spectral_bandwidth,spectral_bandwidth,spectral_rolloff,spectral_rolloff,spectral_rolloff,spectral_rolloff,spectral_rolloff,spectral_rolloff,spectral_rolloff
statistics,kurtosis,max,mean,median,min,skew,std,kurtosis,max,mean,median,min,skew,std,kurtosis,max,mean,median,min,skew,std
number,01,01,01,01,01,01,01,01,01,01,01,01,01,01,01,01,01,01,01,01,01
1,4.59,5513.92,906.07,863.85,0.0,1.55,631.65,-0.52,3150.91,1276.88,1357.52,0.0,0.01,507.78,-0.36,9248.51,1985.46,1787.26,0.0,0.61,1521.68


feature,rmse,rmse,rmse,rmse,rmse,rmse,rmse,zcr,zcr,zcr,zcr,zcr,zcr,zcr
statistics,kurtosis,max,mean,median,min,skew,std,kurtosis,max,mean,median,min,skew,std
number,01,01,01,01,01,01,01,01,01,01,01,01,01,01
1,1.51,0.44,0.14,0.14,0.0,0.83,0.08,40.52,0.42,0.03,0.02,0.0,5.05,0.03
