In [54]:
# Libraries for feature extraction:
import librosa
import pandas as pd
import numpy as np
import os
import pathlib
from scipy.stats import kurtosis
from scipy.stats import skew


In [55]:
#Audio data directory:
audio_dir = './Data/genres/'
test_dir = './Data/test_songs/'
spoty_dir='./Data/spotify_songs/'

In [56]:
#Dictionary for genres label encoding:
genres = {'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 
          'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9}

In [57]:
def split_songs(X, window, overlap):
    """
    Function to split a song into multiple songs.
    """
    # Temporary lists to hold results
    temp_X = []

    # Get input song array size
    xshape = X.shape[0]
    chunk = int(xshape*window)
    offset = int(chunk*(1.-overlap))
    
    # Split the song and create new ones
    spsong = [X[i:i+chunk] for i in range(0, xshape - chunk + offset, offset)]
    for s in spsong:
        temp_X.append(s)

    return np.array(temp_X)

In [58]:
#Get selected features from each song using librosa and numpy:

def get_features(y, sr, n_fft = 1024, hop_length = 512):
    # Selected features:
    features = {'centroid': None, 'roloff': None, 'flux': None, 'rmse': None, 'zcr': None, 'chroma':None}
    
    # Using librosa to calculate the features
    features['centroid'] = librosa.feature.spectral_centroid(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    features['roloff'] = librosa.feature.spectral_rolloff(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    features['zcr'] = librosa.feature.zero_crossing_rate(y, frame_length=n_fft, hop_length=hop_length).ravel()
    features['rmse'] = librosa.feature.rms(y, frame_length=n_fft, hop_length=hop_length).ravel()
    features['flux'] = librosa.onset.onset_strength(y=y, sr=sr).ravel()
    features['chroma'] = librosa.feature.chroma_stft(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    
    # Treatment of MFCC feature
    mfcc = librosa.feature.mfcc(y, n_fft = n_fft, hop_length = hop_length, n_mfcc=13)
    for idx, v_mfcc in enumerate(mfcc):
        features['mfcc_{}'.format(idx)] = v_mfcc.ravel()
        
    # Calculate statistics for each feature:
    def get_moments(descriptors):
        result = {}
        for k, v in descriptors.items():
            result[f'{k}_mean'] = np.mean(v)
            result[f'{k}_std'] = np.std(v)
            result[f'{k}_kurtosis'] = kurtosis(v)
            result[f'{k}_skew'] = skew(v)
        return result
    
    dict_agg_features = get_moments(features)
    
    #Calculating one more feature:
    dict_agg_features['tempo'] = librosa.beat.tempo(y, sr=sr)[0]
    
    return dict_agg_features

In [59]:
def read_process_songs(src_dir, window=0.2, overlap=0.5, debug = True):
    arr_features = []

    # Read files from the folders
    for x,_ in genres.items():
        folder = src_dir + x
    
        for root, subdirs, files in os.walk(folder):
            for file in files:
                # Read the audio file
                file_name = folder + "/" + file
                signal, sr = librosa.load(file_name)
                signal = signal[:660000]
                
                # Debug process
                if debug:
                    print(f"Reading file: {file_name}")
                    
                #Split songs:
                samples=split_songs(signal,window,overlap)

                # Append the result to the data structure
                for s in samples:
                    features = get_features(s, sr)
                    features['genre'] = genres[x]
                    arr_features.append(features)
    return arr_features

In [13]:
def read_process_songs_t3(src_dir, debug = True):
    arr_features = []

    # Read files from the folders
    for x,_ in genres.items():
        folder = src_dir + x
    
        for root, subdirs, files in os.walk(folder):
            for file in files:
                # Read the audio file
                file_name = folder + "/" + file
                signal, sr = librosa.load(file_name)
                samples=[(signal[:220000],sr),(signal[220000:440000],sr),(signal[440000:],sr)]

                # Debug process
                if debug:
                    print("Reading file: {}".format(file_name))

                # Append the result to the data structure
                for s in samples:
                    features = get_features(s[0], s[1])
                    features['genre'] = genres[x]
                    arr_features.append(features)
    return arr_features

In [14]:
def read_process_songs_t1(src_dir, debug = True):
    arr_features = []

    # Read files from the folders
    for x,_ in genres.items():
        folder = src_dir + x
    
        for root, subdirs, files in os.walk(folder):
            for file in files:
                # Read the audio file
                file_name = folder + "/" + file
                signal, sr = librosa.load(file_name)

                # Debug process
                if debug:
                    print("Reading file: {}".format(file_name))

                # Append the result to the data structure
                features = get_features(signal, sr)
                features['genre'] = genres[x]
                arr_features.append(features)
    return arr_features

In [26]:
# Get list of dicts with features and convert to dataframe
features = read_process_songs(audio_dir,debug=True)

In [20]:
df=pd.DataFrame(features)

In [25]:
df.to_csv('./Features/features_augmented.csv', index=False)

In [33]:
features1 = read_process_songs_t1(test_dir,debug=True)
df1=pd.DataFrame(features1)
df1.to_csv('./Features/features_augmented_t1.csv', index=False)

Reading file: ./Data/test_songs/blues/blues.00099.wav
Reading file: ./Data/test_songs/classical/classical.00099.wav
Reading file: ./Data/test_songs/country/country.00099.wav
Reading file: ./Data/test_songs/disco/disco.00099.wav
Reading file: ./Data/test_songs/hiphop/hiphop.00099.wav
Reading file: ./Data/test_songs/jazz/jazz.00099.wav
Reading file: ./Data/test_songs/metal/metal.00099.wav
Reading file: ./Data/test_songs/pop/pop.00099.wav
Reading file: ./Data/test_songs/reggae/reggae.00099.wav
Reading file: ./Data/test_songs/rock/rock.00099.wav


In [34]:
features3 = read_process_songs_t3(test_dir,debug=True)
df3=pd.DataFrame(features3)
df3.to_csv('./Features/features_augmented_t3.csv', index=False)

Reading file: ./Data/test_songs/blues/blues.00099.wav
Reading file: ./Data/test_songs/classical/classical.00099.wav
Reading file: ./Data/test_songs/country/country.00099.wav
Reading file: ./Data/test_songs/disco/disco.00099.wav
Reading file: ./Data/test_songs/hiphop/hiphop.00099.wav
Reading file: ./Data/test_songs/jazz/jazz.00099.wav
Reading file: ./Data/test_songs/metal/metal.00099.wav
Reading file: ./Data/test_songs/pop/pop.00099.wav
Reading file: ./Data/test_songs/reggae/reggae.00099.wav
Reading file: ./Data/test_songs/rock/rock.00099.wav


In [15]:
def read_process_songs_sp3(src_dir, debug = True):
    arr_features = []

    for root, subdirs, files in os.walk(src_dir):
        for file in files:
            # Read the audio file
            file_name = src_dir + file
            signal, sr = librosa.load(file_name)
            samples=[(signal[:220000],sr),(signal[220000:440000],sr),(signal[440000:],sr)]

            # Debug process
            if debug:
                print("Reading file: {}".format(file_name))

            # Append the result to the data structure
            for s in samples:
                features = get_features(s[0], s[1])
                arr_features.append(features)
    return arr_features

In [16]:
def read_process_songs_sp1(src_dir, debug = True):
    arr_features = []

    for root, subdirs, files in os.walk(src_dir):
        for file in files:
            # Read the audio file
            file_name = src_dir + file
            signal, sr = librosa.load(file_name)

            # Debug process
            if debug:
                print("Reading file: {}".format(file_name))

            # Append the result to the data structure
            features = get_features(signal, sr)
            arr_features.append(features)
    return arr_features

In [127]:
fsp1=read_process_songs_sp1(spoty_dir)

Reading file: ./Data/spotify_songs/reggae.mp3


In [128]:
dfs1=pd.DataFrame(fsp1)

In [129]:
dfs1.to_csv('./Features/fsp1.csv',index=False)

In [130]:
fsp3=read_process_songs_sp3(spoty_dir)

Reading file: ./Data/spotify_songs/reggae.mp3


In [131]:
dfs3=pd.DataFrame(fsp3)

In [132]:
dfs3.to_csv('./Features/fsp3.csv',index=False)

Unnamed: 0,centroid_kurtosis,centroid_mean,centroid_skew,centroid_std,chroma_kurtosis,chroma_mean,chroma_skew,chroma_std,flux_kurtosis,flux_mean,...,rmse_std,roloff_kurtosis,roloff_mean,roloff_skew,roloff_std,tempo,zcr_kurtosis,zcr_mean,zcr_skew,zcr_std
0,-0.272445,2292.006108,0.639644,371.37089,-0.45979,0.454361,0.717142,0.264166,6.023544,1.166718,...,0.025612,-0.25221,4324.968886,0.819843,871.146437,117.453835,-0.255651,0.145415,-0.031656,0.029378
1,-0.023791,2244.142916,0.831402,406.412611,-0.143059,0.39568,0.911344,0.276072,5.829755,1.247865,...,0.03222,0.380316,4294.772325,1.082511,868.057488,117.453835,-0.612709,0.1404,0.166721,0.034283
2,-0.624096,2381.707646,0.326672,484.115327,-0.469382,0.410844,0.744096,0.282949,6.671955,1.126775,...,0.042709,-1.168401,4817.71852,0.263824,1174.116147,117.453835,0.659586,0.128221,0.465265,0.032395


In [3]:
#mis pruebas para pipeline:
dire='.Data/test_songs/blues/'
signal, sr = librosa.load(file_name)

In [7]:
signal=signal[:660000]

In [67]:
feats=read_process_songs(test_dir,debug=True)
df=pd.DataFrame(feats)

Reading file: ./Data/test_songs/blues/blues.00099.wav
Reading file: ./Data/test_songs/classical/classical.00099.wav
Reading file: ./Data/test_songs/country/country.00099.wav
Reading file: ./Data/test_songs/disco/disco.00099.wav
Reading file: ./Data/test_songs/hiphop/hiphop.00099.wav
Reading file: ./Data/test_songs/jazz/jazz.00099.wav
Reading file: ./Data/test_songs/metal/metal.00099.wav
Reading file: ./Data/test_songs/pop/pop.00099.wav
Reading file: ./Data/test_songs/reggae/reggae.00099.wav
Reading file: ./Data/test_songs/rock/rock.00099.wav


In [68]:
df

Unnamed: 0,centroid_kurtosis,centroid_mean,centroid_skew,centroid_std,chroma_kurtosis,chroma_mean,chroma_skew,chroma_std,flux_kurtosis,flux_mean,...,rmse_std,roloff_kurtosis,roloff_mean,roloff_skew,roloff_std,tempo,zcr_kurtosis,zcr_mean,zcr_skew,zcr_std
0,1.156734,1303.552472,1.271776,446.517249,-1.082403,0.522745,0.153903,0.284313,28.273242,1.102243,...,0.064790,1.246483,2869.758358,0.904283,1001.446954,92.285156,0.754942,0.053571,1.402706,0.036526
1,-0.113253,1472.937382,0.602748,445.510937,-1.067025,0.441334,0.391482,0.312157,31.391653,1.126162,...,0.070951,0.571934,2890.623864,0.817049,1035.665271,92.285156,-1.047398,0.079420,0.092993,0.040471
2,-0.238686,1490.692762,0.798260,410.728692,-0.517147,0.388001,0.758086,0.294819,22.247490,1.069046,...,0.066827,0.027599,2865.084484,0.772098,985.400430,161.499023,-0.109306,0.081123,0.559564,0.037105
3,0.358205,1321.985184,1.180768,457.075940,-0.778194,0.440073,0.598053,0.286194,22.058193,1.089839,...,0.055991,-0.053459,2746.651867,0.743291,1115.510003,92.285156,2.356625,0.056981,1.681155,0.035761
4,-0.564948,1374.421094,0.717912,438.670604,-0.889512,0.457287,0.473993,0.287789,22.757940,1.077956,...,0.069145,-0.128705,2836.623933,0.438140,1029.812390,95.703125,1.232592,0.065638,1.223999,0.039945
5,-0.341519,1681.815625,0.061526,382.276277,-0.720812,0.415164,0.620229,0.295400,27.397029,1.139820,...,0.086312,1.466639,3441.389785,0.322448,841.168181,92.285156,-0.124813,0.096316,0.132889,0.039059
6,1.755863,1665.496491,0.939753,434.338195,-0.817403,0.419478,0.587429,0.300340,27.058923,1.124477,...,0.086410,1.332604,3422.777753,0.672301,1004.645611,129.199219,-0.464279,0.090147,0.212952,0.039716
7,5.303827,1645.401762,1.780266,624.256582,-1.088035,0.482940,0.319559,0.297097,15.234211,1.153052,...,0.080767,0.342773,3360.682004,0.680029,1308.361628,92.285156,3.122623,0.077678,1.365934,0.050760
8,8.832743,1556.073881,2.270344,571.798411,-1.010759,0.454367,0.454194,0.299282,14.790213,1.213164,...,0.073122,0.834974,3016.985374,0.926868,1248.799834,92.285156,3.926445,0.082925,1.280064,0.047590
9,1.237619,1396.739048,0.964296,238.898064,0.008227,0.413689,0.941492,0.262325,30.168999,0.864875,...,0.001994,0.855464,3034.595862,1.020300,622.135432,129.199219,0.659742,0.069964,0.814438,0.020344
