In [1]:
import numpy as np
import pandas as pd
import librosa
import matplotlib.pyplot as plt

In [7]:
def get_tempo(y, sr):
    tempo = librosa.feature.tempo(y=y, sr=sr)
    return tempo

In [60]:
def get_short_time_energy(y, hop_length=256, frame_length=512):
    ste = np.array([
        sum(abs(y[j:j+frame_length] ** 2))
        for j in range(0, len(y), hop_length)
    ])
    ste_feature = np.hstack([ste.mean(), ste.std()])
    
    return ste_feature

In [21]:
def get_zcr(y):
    zcr = librosa.feature.zero_crossing_rate(y)
    zcr_feature = np.hstack([zcr.mean(), zcr.std()])
    
    return zcr_feature

In [28]:
def get_spectral_centroid(y, sr):
    centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    centroid_feature = np.hstack([centroid.mean(), centroid.std()])
    
    return centroid_feature

In [33]:
def get_spectral_bandwidth(y, sr):
    bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    bandwidth_feature = np.hstack([bandwidth.mean(), bandwidth.std()])
    
    return bandwidth_feature

In [36]:
def get_spectral_rolloff(y, sr):
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    rolloff_feature = np.hstack([rolloff.mean(), rolloff.std()])
    
    return rolloff_feature

In [42]:
def get_spectral_contrast(y, sr):
    stft = np.abs(librosa.stft(y))
    contrast = librosa.feature.spectral_contrast(S=stft, sr=sr)
    contrast_mean = contrast.mean(axis=1)
    contrast_std = contrast.std(axis=1)
    contrast_feature = np.hstack([contrast_mean, contrast_std])
    
    return contrast_feature

In [51]:
def get_mfcc(y, sr, n_mfcc=12):
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    mfcc_mean = mfcc.mean(axis=1)
    mfcc_std = mfcc.std(axis=1)
    mfcc_feature = np.hstack([mfcc_mean, mfcc_std])
    
    return mfcc_feature

In [71]:
def get_chroma(y, sr):
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_mean = chroma.mean(axis=1)
    chroma_std = chroma.std(axis=1)
    chroma_feature = np.hstack([chroma_mean, chroma_std])
    
    return chroma_feature

In [86]:
files = ['full_songs/rock1.wav', 'full_songs/folk1.wav', librosa.example('nutcracker')]
y = []
sr = []

genre_labels = ['rock', 'folk', 'classical']
feature_labels = ['Tempo', 'STE_mean', 'STE_std', 'ZCR_mean', 'ZCR_std', 'Centroid_mean', 'Centroid_std',
                 'Bandwidth_mean', 'Bandwidth_std', 'Roll-off_mean', 'Roll-off_std', 'Contrast0_mean',
                 'Contrast1_mean', 'Contrast2_mean', 'Contrast3_mean', 'Contrast4_mean', 'Contrast5_mean',
                 'Contrast6_mean', 'Contrast0_std', 'Contrast1_std', 'Contrast2_std', 'Contrast3_std',
                 'Contrast4_std', 'Contrast5_std', 'Contrast6_std', 'MFCC0_mean', 'MFCC1_mean', 'MFCC2_mean',
                 'MFCC3_mean', 'MFCC4_mean', 'MFCC5_mean', 'MFCC6_mean', 'MFCC7_mean', 'MFCC8_mean',
                 'MFCC9_mean', 'MFCC10_mean', 'MFCC11_mean', 'MFCC0_std', 'MFCC1_std', 'MFCC2_std',
                 'MFCC3_std', 'MFCC4_std', 'MFCC5_std', 'MFCC6_std', 'MFCC7_std', 'MFCC8_std', 'MFCC9_std',
                 'MFCC10_std', 'MFCC11_std', 'Chroma0_mean', 'Chroma1_mean', 'Chroma2_mean', 'Chroma3_mean',
                 'Chroma4_mean', 'Chroma5_mean', 'Chroma6_mean', 'Chroma7_mean', 'Chroma8_mean', 'Chroma9_mean',
                 'Chroma10_mean', 'Chroma11_mean', 'Chroma0_std', 'Chroma1_std', 'Chroma2_std', 'Chroma3_std',
                 'Chroma4_std', 'Chroma5_std', 'Chroma6_std', 'Chroma7_std', 'Chroma8_std', 'Chroma9_std',
                 'Chroma10_std', 'Chroma11_std']
feature_matrix = np.zeros((3, 73))

for i in range(len(files)):
    y.append([])
    sr.append([])
    y[i], sr[i] = librosa.load(files[i])
    
    tempo = get_tempo(y[i], sr[i])
    ste = get_short_time_energy(y[i])
    zcr = get_zcr(y[i])
    centroid = get_spectral_centroid(y[i], sr[i])
    bandwidth = get_spectral_bandwidth(y[i], sr[i])
    rolloff = get_spectral_rolloff(y[i], sr[i])
    contrast = get_spectral_contrast(y[i], sr[i])
    mfcc = get_mfcc(y[i], sr[i])
    chroma = get_chroma(y[i], sr[i])
    
    feature_vector = np.hstack([tempo, ste, zcr, centroid, bandwidth, rolloff, contrast, mfcc, chroma])
    #print(feature_vector, type(feature_vector), feature_vector.shape)
    
    feature_matrix[i] = feature_vector

#print(feature_matrix, type(feature_matrix), feature_matrix.shape)
df = pd.DataFrame(feature_matrix, columns=feature_labels)
df['Genre'] = genre_labels
df

Unnamed: 0,Tempo,STE_mean,STE_std,ZCR_mean,ZCR_std,Centroid_mean,Centroid_std,Bandwidth_mean,Bandwidth_std,Roll-off_mean,...,Chroma3_std,Chroma4_std,Chroma5_std,Chroma6_std,Chroma7_std,Chroma8_std,Chroma9_std,Chroma10_std,Chroma11_std,Genre
0,103.359375,14.632006,12.053217,0.152468,0.071613,2823.62028,800.053923,2481.120925,349.189438,5415.806879,...,0.271728,0.288866,0.317286,0.270064,0.318054,0.269691,0.317136,0.286482,0.25887,rock
1,123.046875,7.647856,9.506457,0.131463,0.090206,2526.139924,889.860637,2269.577977,389.251582,4610.485044,...,0.334465,0.376314,0.195328,0.4102,0.192881,0.360457,0.14808,0.199893,0.369279,folk
2,107.666016,2.104996,2.467305,0.087196,0.062094,1450.410159,652.927523,1512.806314,345.727501,2539.75435,...,0.267288,0.339607,0.201455,0.337387,0.327815,0.157673,0.264199,0.276678,0.36674,classical
