In [3]:
import numpy as np
import pandas as pd
import librosa
import matplotlib.pyplot as plt

In [4]:
def get_tempo(y, sr):
    tempo = librosa.feature.tempo(y=y, sr=sr)
    return tempo

In [5]:
def get_short_time_energy(y, hop_length=256, frame_length=256):
    ste = np.array([
        sum(abs(y[j:j+frame_length] ** 2) / frame_length)
        for j in range(0, len(y), hop_length)
    ])
    ste_feature = np.hstack([ste.mean(), ste.std()])
    
    return ste_feature

In [17]:
def get_root_mean_square_energy(y, hop_length=512, frame_length=512):
    rms = librosa.feature.rms(y=y)
    rms_feature = np.hstack([rms.mean(), rms.std()])
    
    return rms_feature

In [7]:
def get_zcr(y):
    zcr = librosa.feature.zero_crossing_rate(y)
    zcr_feature = np.hstack([zcr.mean(), zcr.std()])
    
    return zcr_feature

In [8]:
def get_spectral_centroid(y, sr):
    centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    centroid_feature = np.hstack([centroid.mean(), centroid.std()])
    
    return centroid_feature

In [9]:
def get_spectral_bandwidth(y, sr):
    bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    bandwidth_feature = np.hstack([bandwidth.mean(), bandwidth.std()])
    
    return bandwidth_feature

In [10]:
def get_spectral_rolloff(y, sr):
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    rolloff_feature = np.hstack([rolloff.mean(), rolloff.std()])
    
    return rolloff_feature

In [11]:
def get_spectral_contrast(y, sr):
    stft = np.abs(librosa.stft(y))
    contrast = librosa.feature.spectral_contrast(S=stft, sr=sr)
    contrast_mean = contrast.mean(axis=1)
    contrast_std = contrast.std(axis=1)
    contrast_feature = np.hstack([contrast_mean, contrast_std])
    
    return contrast_feature

In [12]:
def get_mfcc(y, sr, n_mfcc=20):
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    mfcc_mean = mfcc.mean(axis=1)
    mfcc_std = mfcc.std(axis=1)
    mfcc_feature = np.hstack([mfcc_mean, mfcc_std])
    
    return mfcc_feature

In [13]:
def get_chroma(y, sr):
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_mean = chroma.mean(axis=1)
    chroma_std = chroma.std(axis=1)
    chroma_feature = np.hstack([chroma_mean, chroma_std])
    
    return chroma_feature

In [18]:
files = ['full_songs/rock1.wav', 'full_songs/folk1.wav', librosa.example('nutcracker')]
y = []
sr = []

genre_labels = ['rock', 'folk', 'classical']
feature_labels = ['Tempo', 'STE_mean', 'STE_std', 'RMS_mean', 'RMS_std', 'ZCR_mean', 'ZCR_std', 'Centroid_mean',
                 'Centroid_std', 'Bandwidth_mean', 'Bandwidth_std', 'Roll-off_mean', 'Roll-off_std', 'Contrast0_mean',
                 'Contrast1_mean', 'Contrast2_mean', 'Contrast3_mean', 'Contrast4_mean', 'Contrast5_mean',
                 'Contrast6_mean', 'Contrast0_std', 'Contrast1_std', 'Contrast2_std', 'Contrast3_std',
                 'Contrast4_std', 'Contrast5_std', 'Contrast6_std', 'MFCC0_mean', 'MFCC1_mean', 'MFCC2_mean',
                 'MFCC3_mean', 'MFCC4_mean', 'MFCC5_mean', 'MFCC6_mean', 'MFCC7_mean', 'MFCC8_mean',
                 'MFCC9_mean', 'MFCC10_mean', 'MFCC11_mean', 'MFCC12_mean', 'MFCC13_mean', 'MFCC14_mean',
                 'MFCC15_mean', 'MFCC16_mean', 'MFCC17_mean', 'MFCC18_mean', 'MFCC19_mean', 'MFCC0_std',
                 'MFCC1_std', 'MFCC2_std', 'MFCC3_std', 'MFCC4_std', 'MFCC5_std', 'MFCC6_std', 'MFCC7_std',
                 'MFCC8_std', 'MFCC9_std', 'MFCC10_std', 'MFCC11_std', 'MFCC12_std', 'MFCC13_std', 'MFCC14_std',
                 'MFCC15_std', 'MFCC16_std', 'MFCC17_std', 'MFCC18_std', 'MFCC19_std', 'Chroma0_mean',
                 'Chroma1_mean', 'Chroma2_mean', 'Chroma3_mean', 'Chroma4_mean', 'Chroma5_mean', 'Chroma6_mean',
                 'Chroma7_mean', 'Chroma8_mean', 'Chroma9_mean', 'Chroma10_mean', 'Chroma11_mean', 'Chroma0_std',
                 'Chroma1_std', 'Chroma2_std', 'Chroma3_std', 'Chroma4_std', 'Chroma5_std', 'Chroma6_std',
                 'Chroma7_std', 'Chroma8_std', 'Chroma9_std', 'Chroma10_std', 'Chroma11_std']
feature_matrix = np.zeros((len(files), len(feature_labels)))

for i in range(len(files)):
    y.append([])
    sr.append([])
    y[i], sr[i] = librosa.load(files[i])
    
    tempo = get_tempo(y[i], sr[i])
    ste = get_short_time_energy(y[i])
    rms = get_root_mean_square_energy(y[i])
    zcr = get_zcr(y[i])
    centroid = get_spectral_centroid(y[i], sr[i])
    bandwidth = get_spectral_bandwidth(y[i], sr[i])
    rolloff = get_spectral_rolloff(y[i], sr[i])
    contrast = get_spectral_contrast(y[i], sr[i])
    mfcc = get_mfcc(y[i], sr[i])
    chroma = get_chroma(y[i], sr[i])
    
    feature_vector = np.hstack([tempo, ste, rms, zcr, centroid, bandwidth, rolloff, contrast, mfcc, chroma])
    #print(feature_vector, type(feature_vector), feature_vector.shape)
    
    feature_matrix[i] = feature_vector

#print(feature_matrix, type(feature_matrix), feature_matrix.shape)
df = pd.DataFrame(feature_matrix, columns=feature_labels)
df['Genre'] = genre_labels
df

Unnamed: 0,Tempo,STE_mean,STE_std,RMS_mean,RMS_std,ZCR_mean,ZCR_std,Centroid_mean,Centroid_std,Bandwidth_mean,...,Chroma3_std,Chroma4_std,Chroma5_std,Chroma6_std,Chroma7_std,Chroma8_std,Chroma9_std,Chroma10_std,Chroma11_std,Genre
0,103.359375,0.028578,0.024877,0.155321,0.066722,0.152468,0.071613,2823.62028,800.053923,2481.120925,...,0.271728,0.288866,0.317286,0.270064,0.318054,0.269691,0.317136,0.286482,0.25887,rock
1,123.046875,0.014937,0.018984,0.102064,0.067207,0.131463,0.090206,2526.139924,889.860637,2269.577977,...,0.334465,0.376314,0.195328,0.4102,0.192881,0.360457,0.14808,0.199893,0.369279,folk
2,107.666016,0.004111,0.004903,0.056452,0.030405,0.087196,0.062094,1450.410159,652.927523,1512.806314,...,0.267288,0.339607,0.201455,0.337387,0.327815,0.157673,0.264199,0.276678,0.36674,classical


In [12]:
df.corr()

Unnamed: 0,Tempo,STE_mean,STE_std,ZCR_mean,ZCR_std,Centroid_mean,Centroid_std,Bandwidth_mean,Bandwidth_std,Roll-off_mean,...,Chroma2_std,Chroma3_std,Chroma4_std,Chroma5_std,Chroma6_std,Chroma7_std,Chroma8_std,Chroma9_std,Chroma10_std,Chroma11_std
Tempo,1.000000,-0.272426,0.054634,-0.006658,0.853088,0.106439,0.643327,0.104536,0.960723,0.039167,...,-0.979382,0.964116,0.918545,-0.701703,0.957843,-0.962564,0.701272,-0.994854,-0.994385,0.684069
STE_mean,-0.272426,1.000000,0.945856,0.963969,0.269629,0.927714,0.561375,0.928427,0.005285,0.950768,...,0.461185,-0.007211,-0.630600,0.876684,-0.537367,0.001427,0.494885,0.368512,0.169077,-0.888187
STE_std,0.054634,0.945856,1.000000,0.998121,0.567596,0.998649,0.799595,0.998747,0.329581,0.999880,...,0.148207,0.317757,-0.344542,0.673068,-0.234532,-0.323237,0.750142,0.046817,-0.159991,-0.690955
ZCR_mean,-0.006658,0.963969,0.998121,1.000000,0.516077,0.993589,0.761291,0.993803,0.271106,0.998950,...,0.208532,0.259056,-0.401423,0.717125,-0.293663,-0.264639,0.708209,0.107942,-0.099200,-0.733955
ZCR_std,0.853088,0.269629,0.567596,0.516077,1.000000,0.609606,0.948275,0.608088,0.964376,0.554780,...,-0.730093,0.960995,0.577336,-0.226871,0.667224,-0.962578,0.970211,-0.795832,-0.903512,0.202984
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Chroma7_std,-0.962564,0.001427,-0.323237,-0.264639,-0.962578,-0.371968,-0.826760,-0.370191,-0.999977,-0.308546,...,0.887961,-0.999983,-0.777007,0.482318,-0.844114,1.000000,-0.868251,0.930148,0.985843,-0.460750
Chroma8_std,0.701272,0.494885,0.750142,0.708209,0.970211,0.783487,0.996933,0.782296,0.871562,0.739814,...,-0.542797,0.865367,0.362331,0.015830,0.466900,-0.868251,1.000000,-0.625432,-0.772774,-0.040279
Chroma9_std,-0.994854,0.368512,0.046817,0.107942,-0.795832,-0.005147,-0.562447,-0.003233,-0.927662,0.062277,...,0.994810,-0.932256,-0.953872,0.770280,-0.982022,0.930148,-0.625432,1.000000,0.978546,-0.754454
Chroma10_std,-0.994385,0.169077,-0.159991,-0.099200,-0.903512,-0.211063,-0.720732,-0.209192,-0.984695,-0.144688,...,0.952505,-0.986797,-0.871554,0.622368,-0.922063,0.985843,-0.772774,0.978546,1.000000,-0.603039
