In [1]:
# Libraries for feature extraction:
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
from tqdm import tqdm


In [2]:
#Audio data directory:
audio_dir = './Data/genres/'

In [3]:
#Dictionary for genres label encoding:
genres = {'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 
          'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9}

In [4]:
#Get selected features from each song using librosa and numpy:

def get_features(y, sr, n_fft = 1024, hop_length = 512):
    # Selected features:
    features = {'centroid': None, 'roloff': None, 'flux': None, 'rmse': None, 'zcr': None, 'chroma':None}
    
    # Using librosa to calculate the features
    features['centroid'] = librosa.feature.spectral_centroid(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    features['roloff'] = librosa.feature.spectral_rolloff(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    features['zcr'] = librosa.feature.zero_crossing_rate(y, frame_length=n_fft, hop_length=hop_length).ravel()
    features['rmse'] = librosa.feature.rms(y, frame_length=n_fft, hop_length=hop_length).ravel()
    features['flux'] = librosa.onset.onset_strength(y=y, sr=sr).ravel()
    features['chroma'] = librosa.feature.chroma_stft(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    
    # Treatment of MFCC feature
    mfcc = librosa.feature.mfcc(y, n_fft = n_fft, hop_length = hop_length, n_mfcc=13)
    for idx, v_mfcc in enumerate(mfcc):
        features['mfcc_{}'.format(idx)] = v_mfcc.ravel()
        
    # Calculate statistics for each feature:
    def get_moments(descriptors):
        result = {}
        for k, v in descriptors.items():
            result['{}_mean'.format(k)] = np.mean(v)
            result['{}_std'.format(k)] = np.std(v)
        return result
    
    dict_agg_features = get_moments(features)
    
    #Calculating one more feature:
    dict_agg_features['tempo'] = librosa.beat.tempo(y, sr=sr)[0]
    
    return dict_agg_features

In [24]:
#Reading all audio files and calculating features for each of them. This function generates a row for each song in the directory:

def read_process_songs(src_dir, debug = True):
    arr_features = []

    # Read files from the folders
    for x,_ in genres.items():
        folder = src_dir + x
    
        for root, subdirs, files in os.walk(folder):
            for file in files:
                # Read the audio file
                file_name = folder + "/" + file
                signal, sr = librosa.load(file_name)

                # Debug process
                if debug:
                    print("Reading file: {}".format(file_name))

                # Append the result to the data structure
                features = get_features(signal, sr)
                features['genre'] = genres[x]
                arr_features.append(features)
    return arr_features

In [14]:
#Reading all audio files and calculating features for each of them. 
#This function generates a 2 rows for each song, separating each of them into 2 15s audio clips:

def read_process_songs2(src_dir, debug = True):
    arr_features = []

    # Read files from the folders
    for x,_ in genres.items():
        folder = src_dir + x
    
        for root, subdirs, files in os.walk(folder):
            for file in files:
                # Read the audio file
                file_name = folder + "/" + file
                signal1, sr1 = librosa.load(file_name,duration=15)
                signal2, sr2 = librosa.load(file_name,offset=15)
                samples=[(signal1,sr1),(signal2,sr2)]

                # Debug process
                if debug:
                    print("Reading file: {}".format(file_name))

                # Append the result to the data structure
                for s in samples:
                    features = get_features(s[0], s[1])
                    features['genre'] = genres[x]
                    arr_features.append(features)
    return arr_features

In [25]:
# Get list of dicts with features and convert to dataframe (1 row per song)
features = read_process_songs(audio_dir,debug=False)

In [26]:
# Get list of dicts with features and convert to dataframe (2 rows per song, each song split in half):
features2 = read_process_songs2(audio_dir,debug=False)

In [10]:
#Convert to dataframe:
df_features1 = pd.DataFrame(features)

In [11]:
df_features1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 990 entries, 0 to 989
Data columns (total 40 columns):
centroid_mean    990 non-null float64
centroid_std     990 non-null float64
chroma_mean      990 non-null float64
chroma_std       990 non-null float64
flux_mean        990 non-null float64
flux_std         990 non-null float64
genre            990 non-null int64
mfcc_0_mean      990 non-null float64
mfcc_0_std       990 non-null float64
mfcc_10_mean     990 non-null float64
mfcc_10_std      990 non-null float64
mfcc_11_mean     990 non-null float64
mfcc_11_std      990 non-null float64
mfcc_12_mean     990 non-null float64
mfcc_12_std      990 non-null float64
mfcc_1_mean      990 non-null float64
mfcc_1_std       990 non-null float64
mfcc_2_mean      990 non-null float64
mfcc_2_std       990 non-null float64
mfcc_3_mean      990 non-null float64
mfcc_3_std       990 non-null float64
mfcc_4_mean      990 non-null float64
mfcc_4_std       990 non-null float64
mfcc_5_mean      990 no

In [19]:
df_features1.describe()

Unnamed: 0,centroid_mean,centroid_std,chroma_mean,chroma_std,flux_mean,flux_std,genre,mfcc_0_mean,mfcc_0_std,mfcc_10_mean,...,mfcc_8_std,mfcc_9_mean,mfcc_9_std,rmse_mean,rmse_std,roloff_mean,roloff_std,tempo,zcr_mean,zcr_std
count,990.0,990.0,990.0,990.0,990.0,990.0,990.0,990.0,990.0,990.0,...,990.0,990.0,990.0,990.0,990.0,990.0,990.0,990.0,990.0,990.0
mean,2101.633293,651.510202,0.453753,0.290499,1.417293,1.55427,4.5,-216.532282,59.482719,-5.555799,...,10.04393,7.86516,9.707282,0.128891,0.053661,4360.189433,1325.849435,121.042726,0.103695,0.054597
std,700.008768,280.471797,0.088156,0.018794,0.340532,0.678716,2.873733,97.954624,21.046591,6.78872,...,2.003818,7.874301,1.923625,0.064573,0.029559,1557.596573,523.244123,20.918251,0.041944,0.026151
min,534.046982,73.853662,0.211247,0.209446,0.608654,0.163264,0.0,-617.812439,13.54466,-26.293921,...,6.10359,-12.293718,5.337916,0.005265,0.004148,730.280348,124.919782,69.837416,0.021691,0.007039
25%,1525.887998,438.330101,0.39247,0.281165,1.161509,1.075469,2.0,-272.108604,44.643503,-10.491795,...,8.490852,1.8356,8.304592,0.085169,0.034457,3133.797163,917.915046,103.359375,0.070116,0.035725
50%,2112.958377,600.494099,0.46069,0.29265,1.363733,1.448091,4.5,-192.06443,58.017269,-5.53082,...,9.807375,7.411592,9.443274,0.119777,0.048122,4483.452704,1272.71435,117.453835,0.099473,0.049433
75%,2623.693136,814.389822,0.511269,0.302494,1.630967,1.981211,7.0,-147.179733,72.448988,-0.592356,...,11.30649,14.226377,10.839503,0.173146,0.065561,5349.05666,1672.12668,135.999178,0.132425,0.066568
max,4171.532799,1921.069855,0.689486,0.342938,2.450388,4.371769,9.0,-47.043102,166.846832,17.014378,...,18.426645,27.413534,19.880682,0.394304,0.176083,8569.415467,3423.674142,184.570312,0.274505,0.186829


In [20]:
df_features1.groupby('genre').mean()

Unnamed: 0_level_0,centroid_mean,centroid_std,chroma_mean,chroma_std,flux_mean,flux_std,mfcc_0_mean,mfcc_0_std,mfcc_10_mean,mfcc_10_std,...,mfcc_8_std,mfcc_9_mean,mfcc_9_std,rmse_mean,rmse_std,roloff_mean,roloff_std,tempo,zcr_mean,zcr_std
genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1649.174129,521.729407,0.429335,0.300208,1.391695,1.526578,-235.647698,58.470357,-9.232909,9.524527,...,10.207779,8.241184,9.855519,0.134283,0.057289,3458.259303,1176.362606,124.198627,0.079003,0.042369
1,1294.469166,325.540346,0.318281,0.296181,0.99983,0.726121,-385.371123,69.986975,-6.080446,10.641368,...,10.076955,0.999549,10.111902,0.042918,0.0236,2348.734258,712.39503,125.937531,0.077573,0.028184
2,1780.211424,591.585561,0.432005,0.291483,1.256552,1.327519,-214.801876,54.211695,-6.355206,9.487279,...,10.545299,6.56873,10.322946,0.123982,0.043008,3700.235699,1318.532011,120.419897,0.083142,0.042769
3,2510.464256,729.988728,0.476346,0.286887,1.585123,1.873694,-168.231954,57.700228,-6.598454,8.997502,...,9.518864,10.765014,9.269775,0.133183,0.058948,5294.735666,1424.064066,120.014979,0.129522,0.064511
4,2427.412052,886.84376,0.528858,0.288674,1.818005,2.064275,-180.829765,67.659508,-1.928479,9.916088,...,11.032898,10.395469,10.381075,0.174293,0.08702,5133.454403,1623.483811,110.234545,0.108029,0.076417
5,1707.152407,489.03782,0.374921,0.305565,1.210625,1.26518,-282.082079,56.132658,-3.81922,8.707425,...,8.971317,3.913932,8.618519,0.086166,0.037426,3540.38832,1161.495485,119.535055,0.077274,0.037811
6,2538.286261,465.981696,0.548349,0.262834,1.170334,0.993727,-135.32354,36.610158,-10.858198,7.564052,...,8.330931,16.851456,7.970945,0.151905,0.038245,5035.929491,870.725973,123.648112,0.147848,0.047562
7,2926.187181,997.189077,0.459242,0.297994,1.627127,1.948215,-142.400219,65.470358,-2.097397,9.893075,...,10.8858,4.27434,10.558405,0.196715,0.089142,6351.014439,1847.180973,113.377624,0.133928,0.087894
8,2035.329856,878.192527,0.505532,0.290142,1.829774,2.431211,-240.482293,76.250681,-0.568495,10.460885,...,11.470647,7.804395,11.109662,0.118692,0.059269,4262.257737,1806.079774,132.898062,0.091066,0.066675
9,2147.646198,629.013094,0.464663,0.285018,1.283862,1.386182,-180.152268,52.334573,-8.019183,8.368601,...,9.39881,8.837532,8.874071,0.126774,0.042661,4476.885016,1318.174622,120.162833,0.109561,0.051777


In [12]:
#Save as .csv:
df_features1.to_csv('./Features/features1.csv', index=False)


In [16]:
df_features2=pd.DataFrame(features2)

In [17]:
df_features2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1980 entries, 0 to 1979
Data columns (total 40 columns):
centroid_mean    1980 non-null float64
centroid_std     1980 non-null float64
chroma_mean      1980 non-null float64
chroma_std       1980 non-null float64
flux_mean        1980 non-null float64
flux_std         1980 non-null float64
genre            1980 non-null int64
mfcc_0_mean      1980 non-null float64
mfcc_0_std       1980 non-null float64
mfcc_10_mean     1980 non-null float64
mfcc_10_std      1980 non-null float64
mfcc_11_mean     1980 non-null float64
mfcc_11_std      1980 non-null float64
mfcc_12_mean     1980 non-null float64
mfcc_12_std      1980 non-null float64
mfcc_1_mean      1980 non-null float64
mfcc_1_std       1980 non-null float64
mfcc_2_mean      1980 non-null float64
mfcc_2_std       1980 non-null float64
mfcc_3_mean      1980 non-null float64
mfcc_3_std       1980 non-null float64
mfcc_4_mean      1980 non-null float64
mfcc_4_std       1980 non-null float6

In [21]:
df_features2.describe()

Unnamed: 0,centroid_mean,centroid_std,chroma_mean,chroma_std,flux_mean,flux_std,genre,mfcc_0_mean,mfcc_0_std,mfcc_10_mean,...,mfcc_8_std,mfcc_9_mean,mfcc_9_std,rmse_mean,rmse_std,roloff_mean,roloff_std,tempo,zcr_mean,zcr_std
count,1980.0,1980.0,1980.0,1980.0,1980.0,1980.0,1980.0,1980.0,1980.0,1980.0,...,1980.0,1980.0,1980.0,1980.0,1980.0,1980.0,1980.0,1980.0,1980.0,1980.0
mean,2101.55824,639.050011,0.453808,0.289594,1.416059,1.547285,4.5,-216.869493,56.843673,-5.68876,...,9.859573,8.013199,9.54967,0.128882,0.052513,4360.00288,1301.778379,121.276821,0.103653,0.053397
std,708.6011,286.974973,0.090838,0.019949,0.344241,0.694053,2.873007,100.165628,20.993436,7.044022,...,2.089142,8.063983,2.03766,0.065427,0.029687,1574.057491,533.158762,21.157112,0.042932,0.02697
min,528.695515,43.333355,0.126825,0.208786,0.559117,0.15994,0.0,-653.586731,9.54723,-27.60458,...,5.667604,-17.41501,5.133595,0.003434,0.001955,716.553112,117.940624,63.024009,0.021119,0.006806
25%,1530.083938,423.411856,0.393762,0.278927,1.158767,1.060243,2.0,-273.499741,42.148064,-10.781512,...,8.281676,1.79923,8.084083,0.085034,0.032853,3121.382684,904.837254,103.359375,0.069603,0.034104
50%,2116.458414,591.284539,0.461585,0.292046,1.356086,1.434154,4.5,-193.411926,55.430264,-5.548859,...,9.626751,7.71823,9.276059,0.119796,0.047165,4464.828128,1238.516183,117.453835,0.099513,0.04784
75%,2614.461215,809.72014,0.513838,0.302552,1.624592,1.970705,7.0,-145.270535,69.967079,-0.485506,...,11.108516,14.81529,10.706733,0.172842,0.065846,5377.46966,1647.756139,135.999178,0.133689,0.066579
max,4600.185306,1974.885127,0.703675,0.346537,2.540177,4.458624,9.0,-20.707417,179.063538,17.3883,...,19.41573,35.408268,22.664005,0.419707,0.175881,9219.539099,3401.919678,184.570312,0.322515,0.188076


In [22]:
df_features2.groupby('genre').mean()

Unnamed: 0_level_0,centroid_mean,centroid_std,chroma_mean,chroma_std,flux_mean,flux_std,mfcc_0_mean,mfcc_0_std,mfcc_10_mean,mfcc_10_std,...,mfcc_8_std,mfcc_9_mean,mfcc_9_std,rmse_mean,rmse_std,roloff_mean,roloff_std,tempo,zcr_mean,zcr_std
genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1649.166358,513.871317,0.429333,0.299494,1.391943,1.524789,-236.201765,57.450334,-9.496317,9.420878,...,10.079044,8.544552,9.730911,0.13428,0.056501,3458.183768,1159.761565,122.972046,0.078974,0.041548
1,1294.474279,302.26795,0.318706,0.295385,1.009291,0.7174,-387.010268,62.405932,-6.403488,10.229843,...,9.629743,1.377254,9.772892,0.042916,0.021419,2348.746084,667.593928,125.408352,0.077541,0.026442
2,1780.145658,582.002577,0.432255,0.290968,1.254515,1.323211,-215.052598,52.099552,-6.476244,9.349416,...,10.392176,6.701897,10.18328,0.12398,0.041942,3700.080679,1299.261805,121.709202,0.083107,0.041746
3,2510.376417,722.083473,0.476409,0.28607,1.581576,1.863743,-168.317491,56.181447,-6.674549,8.904292,...,9.401271,10.847339,9.149089,0.133179,0.058569,5294.632759,1411.858852,120.485812,0.129465,0.063502
4,2427.159708,875.882914,0.52874,0.287895,1.814285,2.056589,-180.985945,65.727346,-1.975569,9.804362,...,10.886732,10.445985,10.255496,0.174228,0.08602,5132.764345,1601.513892,110.133916,0.107997,0.075283
5,1707.061399,474.332042,0.374926,0.304241,1.209307,1.260488,-282.394911,53.575919,-4.008376,8.475389,...,8.696112,4.122431,8.380917,0.086168,0.036737,3540.136717,1130.588309,120.483552,0.077241,0.036403
6,2538.251652,454.675197,0.548536,0.261423,1.168228,0.988953,-135.430344,34.826055,-10.977199,7.434,...,8.219835,16.974509,7.845548,0.151905,0.037282,5035.874302,851.545341,125.335122,0.147795,0.046482
7,2926.063427,979.359581,0.459444,0.297095,1.623445,1.931345,-142.430812,61.668673,-2.100035,9.741295,...,10.705618,4.274415,10.412562,0.196703,0.086773,6350.741829,1810.167016,113.353968,0.133868,0.086146
8,2035.275286,870.91659,0.50546,0.289376,1.827014,2.426273,-240.595669,75.325189,-0.629795,10.377813,...,11.370485,7.871084,11.040129,0.118694,0.058832,4262.070521,1793.890193,132.807573,0.091025,0.065916
9,2147.60822,615.108469,0.464276,0.283991,1.280988,1.380061,-180.275132,49.176285,-8.146029,8.228872,...,9.21471,8.972524,8.725874,0.126771,0.041058,4476.797798,1291.60289,120.078668,0.109522,0.0505


In [23]:
df_features2.to_csv('./Features/features2.csv', index=False)