In [1]:
import os
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.stats import kurtosis
from scipy.stats import skew

from sklearn.preprocessing import MinMaxScaler

In [2]:
gtzan_dir = './data/genres/'

In [3]:
# Parameters
song_samples = 22050*30
genres = {'metal': 0, 'disco': 1, 'classical': 2, 'hiphop': 3, 'jazz': 4, 
          'country': 5, 'pop': 6, 'blues': 7, 'reggae': 8, 'rock': 9}

In [4]:
def get_features(y, sr, n_fft = 1024, hop_length = 512):
    # Features to concatenate in the final dictionary
    features = {'centroid': None, 'roloff': None, 'flux': None, 'rmse': None, 'zcr': None}
    
    # Using librosa to calculate the features
    features['centroid'] = librosa.feature.spectral_centroid(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    features['roloff'] = librosa.feature.spectral_rolloff(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    features['zcr'] = librosa.feature.zero_crossing_rate(y, frame_length=n_fft, hop_length=hop_length).ravel()
    features['rmse'] = librosa.feature.rmse(y, frame_length=n_fft, hop_length=hop_length).ravel()
    features['flux'] = librosa.onset.onset_strength(y=y, sr=sr).ravel()
    
    # MFCC treatment
    mfcc = librosa.feature.mfcc(y, n_fft = n_fft, hop_length = hop_length, n_mfcc=13)
    for idx, v_mfcc in enumerate(mfcc):
        features['mfcc_{}'.format(idx)] = v_mfcc.ravel()
        
    # Get statistics from the vectors
    def get_moments(descriptors):
        result = {}
        for k, v in descriptors.items():
            result['{}_mean'.format(k)] = np.mean(v)
            result['{}_std'.format(k)] = np.std(v)
            result['{}_kurtosis'.format(k)] = kurtosis(v)
            result['{}_skew'.format(k)] = skew(v)
        return result
    
    dict_agg_features = get_moments(features)
    dict_agg_features['tempo'] = librosa.beat.tempo(y, sr=sr)[0]
    
    return dict_agg_features

In [5]:
def read_process_songs(src_dir, debug = True):    
    # Empty array of dicts with the processed features from all files
    arr_features = []

    # Read files from the folders
    for x,_ in genres.items():
        folder = src_dir + x
        
        for root, subdirs, files in os.walk(folder):
            for file in files:
                # Read the audio file
                file_name = folder + "/" + file
                signal, sr = librosa.load(file_name)
                
                # Debug process
                if debug:
                    print("Reading file: {}".format(file_name))
                
                # Append the result to the data structure
                features = get_features(signal, sr)
                features['genre'] = genres[x]
                arr_features.append(features)
    return arr_features

In [6]:
# Get list of dicts with features and convert to dataframe
features = read_process_songs(gtzan_dir, debug=False)

In [7]:
df_features = pd.DataFrame(features)

In [8]:
df_features.shape

(1000, 74)

In [9]:
df_features.head()

Unnamed: 0,centroid_mean,centroid_std,centroid_kurtosis,centroid_skew,roloff_mean,roloff_std,roloff_kurtosis,roloff_skew,flux_mean,flux_std,...,mfcc_11_mean,mfcc_11_std,mfcc_11_kurtosis,mfcc_11_skew,mfcc_12_mean,mfcc_12_std,mfcc_12_kurtosis,mfcc_12_skew,tempo,genre
0,3176.995746,550.485825,1.27793,0.851571,6235.702536,1316.78258,-0.180793,0.740059,1.131944,0.640262,...,10.570468,8.282726,-0.381802,-0.087361,-4.103276,8.186303,0.171622,-0.170111,95.703125,0
1,2850.16303,450.508356,7.136744,1.872261,5926.993343,852.51147,3.561296,1.291193,1.104693,0.68968,...,11.399026,7.171574,-0.122148,0.04945,-10.89427,7.584343,0.193536,0.236081,151.999081,0
2,2147.049597,394.428946,3.603367,1.476032,4376.752601,842.133273,1.952754,1.401493,1.156895,0.801254,...,9.597201,7.135582,-0.228952,-0.074895,-14.24871,7.786307,0.161245,-0.130394,112.347147,0
3,2700.584556,411.301524,-0.256259,0.124371,5178.110839,679.032203,-0.233782,0.240711,1.165081,0.663197,...,15.695696,6.359774,0.157388,-0.127252,-13.00533,6.983783,0.370823,0.127291,99.384014,0
4,3357.385889,548.509155,1.316912,0.691435,6810.30432,964.532861,-0.009224,0.129295,1.263426,0.848756,...,11.683862,7.427122,-0.13448,0.061887,-3.42946,7.443414,0.033728,0.119981,135.999178,0


In [10]:
df_features.to_csv('./data/GTZAN_Features.csv', index=False)