<div style="text-align:center; font-size: 30px">FEATURE AGGREGATION</div>

In [8]:
# Import libraries
import numpy as np
import pandas as pd

from glob import glob

import librosa
import librosa.display
import IPython.display as ipd
from scipy.stats import skew, kurtosis

In [9]:
# Paths to variables
blues_files = glob("../audio-ml/Data/genres_original/blues/blues.*.wav")

classical_files = glob("../audio-ml/Data/genres_original/classical/classical.*.wav")

country_files = glob("../audio-ml/Data/genres_original/country/country.*.wav")

disco_files = glob("../audio-ml/Data/genres_original/disco/disco.*.wav")

hiphop_files = glob("../audio-ml/Data/genres_original/hiphop/hiphop.*.wav")

jazz_files = glob("../audio-ml/Data/genres_original/jazz/jazz.*.wav")

metal_files = glob("../audio-ml/Data/genres_original/metal/metal.*.wav")

pop_files = glob("../audio-ml/Data/genres_original/pop/pop.*.wav")

reggae_files = glob("../audio-ml/Data/genres_original/reggae/reggae.*.wav")

rock_files = glob("../audio-ml/Data/genres_original/rock/rock.*.wav")

In [13]:
# Feature extraction function
def extract_features(audio_path):
    # Load file
    y, sr = librosa.load(audio_path)
    
    # Initialize feature dict
    features = {}
    
    # Calculate RMS Energy and extract features
    rms = librosa.feature.rms(y=y, frame_length=2048, hop_length=512)[0]
    features["rms_mean"] = np.mean(rms)
    features["rms_std"] = np.std(rms)
    # features["rms_max"] = np.max(rms)
    # features["rms_min"] = np.min(rms)
    # features["rms_skew"] = skew(rms)
    # features["rms_kurtosis"] = kurtosis(rms)
    
    # Calculate Zero Crossing Rate and extract features
    zcr = librosa.feature.zero_crossing_rate(y=y, frame_length=2048, hop_length=512)[0]
    features["zcr_mean"] = np.mean(zcr)
    features["zcr_std"] = np.std(zcr)
    # features["zcr_max"] = np.max(zcr)
    # features["zcr_min"] = np.min(zcr)
    # features["zcr_skew"] = skew(zcr)
    # features["zcr_kurtosis"] = kurtosis(zcr)  
    
    # Calculate Spectral Centroids and extract features
    centroids = librosa.feature.spectral_centroid(y=y, n_fft=2048, hop_length=512)[0]
    features["centroid_mean"] = np.mean(centroids)
    features["centroid_std"] = np.std(centroids)
    # features["centroid_max"] = np.max(centroid)
    # features["centroid_min"] = np.min(centroid)
    # features["centroid_skew"] = skew(centroid)
    # features["centroid_kurtosis"] = kurtosis(centroid)
    
    # Calculate Spectral Bandwidth and extract features
    bandwidth = librosa.feature.spectral_bandwidth(y=y, n_fft=2048, hop_length=512)[0]
    features["bandwidth_mean"] = np.mean(bandwidth)
    features["bandwidth_std"] = np.std(bandwidth)
    # features["bandwidth_max"] = np.max(bandwidth)
    # features["bandwidth_min"] = np.min(bandwidth)
    # features["bandwidth_skew"] = skew(bandwidth)
    # features["bandwidth_kurtosis"] = kurtosis(bandwidth) 
    
    # Calculate Spectral Contrast and extract features
    contrast = librosa.feature.spectral_contrast(y=y, n_fft=2048, hop_length=512)
    # For each band
    for i, band in enumerate(contrast):
        features[f"contrast_band_{i}_mean"] = np.mean(contrast)
        features[f"contrast_band_{i}_std"] = np.std(contrast)
        # features[f"contrast_band_{i}_max"] = np.max(contrast)
        # features[f"contrast_band_{i}_min"] = np.min(contrast)
        # features[f"contrast_band_{i}_skew"] = skew(contrast)
        # features[f"contrast_band_{i}_kurtosis"] = kurtosis(contrast)  
    
    # Calculate Spectral Rolloff and extract features
    rolloff = librosa.feature.spectral_rolloff(y=y, n_fft=2048, hop_length=512)[0]
    features["rolloff_mean"] = np.mean(rolloff)
    features["rolloff_std"] = np.std(rolloff)
    # features["rolloff_max"] = np.max(rolloff)
    # features["rolloff_min"] = np.min(rolloff)
    # features["rolloff_skew"] = skew(rolloff)
    # features["rolloff_kurtosis"] = kurtosis(rolloff)  

    return features

In [14]:
feat_pop = extract_features(pop_files[1])
feat_pop

{'rms_mean': np.float32(0.23100165),
 'rms_std': np.float32(0.12734859),
 'zcr_mean': np.float64(0.21382753589037123),
 'zcr_std': np.float64(0.11607546270493543),
 'centroid_mean': np.float64(4036.598130555536),
 'centroid_std': np.float64(1137.4545236791337),
 'bandwidth_mean': np.float64(3342.1673492869427),
 'bandwidth_std': np.float64(329.3525059564205),
 'contrast_band_0_mean': np.float64(17.22795361570953),
 'contrast_band_0_std': np.float64(4.894318317534678),
 'contrast_band_1_mean': np.float64(17.22795361570953),
 'contrast_band_1_std': np.float64(4.894318317534678),
 'contrast_band_2_mean': np.float64(17.22795361570953),
 'contrast_band_2_std': np.float64(4.894318317534678),
 'contrast_band_3_mean': np.float64(17.22795361570953),
 'contrast_band_3_std': np.float64(4.894318317534678),
 'contrast_band_4_mean': np.float64(17.22795361570953),
 'contrast_band_4_std': np.float64(4.894318317534678),
 'contrast_band_5_mean': np.float64(17.22795361570953),
 'contrast_band_5_std': np.