In [40]:
# Libraries for feature extraction:
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
from tqdm import tqdm


In [41]:
#Audio data directory:
audio_dir = './Data/genres/'

In [11]:
#Dictionary for genres label encoding:
genres = {'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 
          'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9}

In [42]:
#Generate spectrogram for every song:
cmap = plt.get_cmap('inferno')

plt.figure(figsize=(10,10))

gen=list(genres.keys())

for i in tqdm(range(len(gen))):
    g=gen[i]
    pathlib.Path(f'./Spectrograms/{g}').mkdir(parents=True, exist_ok=True)     
    for filename in os.listdir(f'{audio_dir}/{g}'):
        songname = f'{audio_dir}/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=15)
        plt.specgram(y, NFFT=1024, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
        plt.axis('off');
        plt.savefig(f'./Spectrograms/{g}/{filename[:-3].replace(".", "")}.png')
        plt.clf()

<Figure size 720x720 with 0 Axes>

In [12]:
#Get selected features from each song using librosa and numpy:

def get_features(y, sr, n_fft = 1024, hop_length = 512):
    # Selected features:
    features = {'centroid': None, 'roloff': None, 'flux': None, 'rmse': None, 'zcr': None, 'chroma':None}
    
    # Using librosa to calculate the features
    features['centroid'] = librosa.feature.spectral_centroid(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    features['roloff'] = librosa.feature.spectral_rolloff(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    features['zcr'] = librosa.feature.zero_crossing_rate(y, frame_length=n_fft, hop_length=hop_length).ravel()
    features['rmse'] = librosa.feature.rms(y, frame_length=n_fft, hop_length=hop_length).ravel()
    features['flux'] = librosa.onset.onset_strength(y=y, sr=sr).ravel()
    features['chroma'] = librosa.feature.chroma_stft(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    
    # Treatment of MFCC feature
    mfcc = librosa.feature.mfcc(y, n_fft = n_fft, hop_length = hop_length, n_mfcc=13)
    for idx, v_mfcc in enumerate(mfcc):
        features['mfcc_{}'.format(idx)] = v_mfcc.ravel()
        
    # Calculate statistics for each feature:
    def get_moments(descriptors):
        result = {}
        for k, v in descriptors.items():
            result['{}_mean'.format(k)] = np.mean(v)
            result['{}_std'.format(k)] = np.std(v)
        return result
    
    dict_agg_features = get_moments(features)
    
    #Calculating one more feature:
    dict_agg_features['tempo'] = librosa.beat.tempo(y, sr=sr)[0]
    
    return dict_agg_features

In [19]:
#Reading all audio files and calculating features for each of them:

def read_process_songs(src_dir, debug = True):
    arr_features = []

    # Read files from the folders
    for root, subdirs, files in os.walk(folder):
            for file in files:
                # Read the audio file
                file_name = folder + "/" + file
                signal, sr = librosa.load(file_name)
    for x,_ in genres.items():
        folder = src_dir + x
        
                # Debug process
                if debug:
                    print("Reading file: {}".format(file_name))
                
                # Append the result to the data structure
                features = get_features(signal, sr)
                features['genre'] = genres[x]
                arr_features.append(features)
    return arr_features

In [38]:
# Get list of dicts with features and convert to dataframe
features = read_process_songs(audio_dir,debug=True)

In [21]:
#Convert to dataframe:
df_features = pd.DataFrame(features)

In [31]:
#Save as .csv:
df_features.to_csv('./Features/features.csv', index=False)

In [32]:
df_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 40 columns):
centroid_mean    1000 non-null float64
centroid_std     1000 non-null float64
chroma_mean      1000 non-null float64
chroma_std       1000 non-null float64
flux_mean        1000 non-null float64
flux_std         1000 non-null float64
genre            1000 non-null int64
mfcc_0_mean      1000 non-null float64
mfcc_0_std       1000 non-null float64
mfcc_10_mean     1000 non-null float64
mfcc_10_std      1000 non-null float64
mfcc_11_mean     1000 non-null float64
mfcc_11_std      1000 non-null float64
mfcc_12_mean     1000 non-null float64
mfcc_12_std      1000 non-null float64
mfcc_1_mean      1000 non-null float64
mfcc_1_std       1000 non-null float64
mfcc_2_mean      1000 non-null float64
mfcc_2_std       1000 non-null float64
mfcc_3_mean      1000 non-null float64
mfcc_3_std       1000 non-null float64
mfcc_4_mean      1000 non-null float64
mfcc_4_std       1000 non-null float64

In [43]:
df_features.describe()

Unnamed: 0,centroid_mean,centroid_std,chroma_mean,chroma_std,flux_mean,flux_std,genre,mfcc_0_mean,mfcc_0_std,mfcc_10_mean,...,mfcc_8_std,mfcc_9_mean,mfcc_9_std,rmse_mean,rmse_std,roloff_mean,roloff_std,tempo,zcr_mean,zcr_std
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,...,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,2099.884503,651.228254,0.453887,0.290454,1.416929,1.55438,4.5,-216.715982,59.474524,-5.556559,...,10.044678,7.913355,9.70669,0.128776,0.053664,4357.599098,1325.492437,121.097859,0.103601,0.054587
std,697.925111,279.687395,0.087931,0.018798,0.340395,0.677287,2.873719,98.037566,21.042905,6.773753,...,2.00248,7.863547,1.919358,0.064564,0.029613,1553.603633,522.318652,20.907121,0.041816,0.026082
min,534.046982,73.853662,0.211247,0.209446,0.608654,0.163264,0.0,-617.812439,13.54466,-26.293921,...,6.10359,-12.293718,5.337916,0.005265,0.002197,730.280348,124.919782,69.837416,0.021691,0.007039
25%,1520.864167,440.409059,0.392546,0.280903,1.161329,1.077392,2.0,-272.163597,44.713447,-10.481475,...,8.49605,1.855776,8.304847,0.084993,0.034438,3130.428957,918.601164,103.359375,0.070226,0.035771
50%,2111.869947,600.494099,0.46069,0.29258,1.363183,1.448499,4.5,-192.257874,57.952559,-5.53082,...,9.807375,7.649071,9.436366,0.119896,0.048122,4477.282517,1272.71435,117.453835,0.099473,0.049468
75%,2613.219953,813.856939,0.510953,0.302484,1.632089,1.98204,7.0,-147.03339,72.418177,-0.610548,...,11.298329,14.249839,10.839158,0.172889,0.065798,5345.809193,1670.144645,135.999178,0.131909,0.066533
max,4171.532799,1921.069855,0.689486,0.342938,2.450388,4.371769,9.0,-47.043102,166.846832,17.014378,...,18.426645,27.413534,19.880682,0.394304,0.176083,8569.415467,3423.674142,184.570312,0.274505,0.186829


In [44]:
df_features.groupby('genre').mean()

Unnamed: 0_level_0,centroid_mean,centroid_std,chroma_mean,chroma_std,flux_mean,flux_std,mfcc_0_mean,mfcc_0_std,mfcc_10_mean,mfcc_10_std,...,mfcc_8_std,mfcc_9_mean,mfcc_9_std,rmse_mean,rmse_std,roloff_mean,roloff_std,tempo,zcr_mean,zcr_std
genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1647.469936,521.314828,0.429492,0.300166,1.389011,1.523596,-235.775327,58.86928,-9.2126,9.523001,...,10.197294,8.21876,9.851863,0.13398,0.057461,3453.741758,1175.371056,123.879492,0.078961,0.042371
1,1296.254131,324.686937,0.318998,0.29593,0.998596,0.724309,-385.943553,69.512722,-5.976834,10.602363,...,10.074632,1.194916,10.102232,0.042594,0.023386,2356.950352,711.762366,125.852694,0.077604,0.028124
2,1783.109581,591.628375,0.432244,0.291399,1.257509,1.328716,-213.783122,54.134367,-6.434403,9.47927,...,10.542613,6.668295,10.312081,0.124919,0.043327,3706.424984,1317.029309,120.651245,0.083393,0.042924
3,2508.11268,729.491671,0.47658,0.286798,1.587905,1.876482,-168.246214,57.616654,-6.652309,8.994259,...,9.517852,10.831227,9.266553,0.133051,0.058831,5292.293913,1424.039068,120.045298,0.129352,0.064457
4,2422.198262,886.528166,0.528276,0.288916,1.818747,2.065935,-181.20866,67.633963,-1.958095,9.940448,...,11.075023,10.375946,10.410501,0.174024,0.087446,5118.83039,1622.793898,110.208859,0.107964,0.076396
5,1704.0639,489.318653,0.37519,0.305674,1.210829,1.266151,-283.102623,56.022255,-3.822514,8.708285,...,8.960512,3.982172,8.618943,0.085689,0.037271,3535.854856,1165.120014,119.699696,0.076963,0.037692
6,2535.862529,467.224633,0.549066,0.262764,1.17183,1.000805,-135.372426,36.748023,-10.818347,7.570015,...,8.336109,16.807137,7.982119,0.152048,0.038578,5033.433693,872.442585,123.847177,0.147449,0.047703
7,2922.551777,996.563405,0.459049,0.298016,1.625878,1.949852,-142.897008,65.501273,-2.156384,9.901003,...,10.883228,4.321928,10.562535,0.196114,0.088796,6351.300864,1851.117812,113.858838,0.133548,0.087671
8,2037.998409,876.352564,0.505343,0.290017,1.827799,2.425285,-239.484469,76.035709,-0.576734,10.430431,...,11.453828,7.809061,11.077466,0.119294,0.059118,4264.686684,1796.72067,132.743619,0.091551,0.066794
9,2141.223828,629.173309,0.464638,0.284858,1.281184,1.38267,-181.346416,52.670989,-7.957367,8.38322,...,9.405688,8.924113,8.882611,0.12605,0.042422,4462.473486,1318.527594,120.191673,0.109229,0.051737
