In [2]:
import opensmile
from glob import glob
from tqdm import tqdm
import pandas as pd

# eGeMAPS
Function below takes two arguments:
1. <b>audio_files_path</b>: path to directory of audio files
2. <b>level</b>: which feature level to use (f for Functionals, l for LowLevelDescriptors)

Output: DataFrame with audio file path as index and 88 features

In [3]:
def egemaps(audio_files_path, level):
    if level == 'f':
        smile = opensmile.Smile(
            feature_set=opensmile.FeatureSet.eGeMAPSv02,
            feature_level=opensmile.FeatureLevel.Functionals,
        )
    elif level == 'l':
        smile = opensmile.Smile(
            feature_set=opensmile.FeatureSet.eGeMAPSv02,
            feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
        )
    
    if not audio_files_path.endswith('/*'):
        audio_files_path += '/*'
    files = glob(audio_files_path)
    
    result = smile.process_files(files)

    return result

In [4]:
clean_dir = '/Users/brianlim/Desktop/files/cmu/intro_to_dl/project/enhanced/clean/*'
noisy_dir = '/Users/brianlim/Desktop/files/cmu/intro_to_dl/project/enhanced/noisy/*'
demucs_dir = '/Users/brianlim/Desktop/files/cmu/intro_to_dl/project/enhanced/Demucs_denoised/*'
fullsub_dir = '/Users/brianlim/Desktop/files/cmu/intro_to_dl/project/enhanced/enhanced_FullSubNet/*'

In [5]:
clean_f = egemaps(clean_dir, 'f')
clean_l = egemaps(clean_dir, 'l')
noisy_f = egemaps(noisy_dir, 'f')
noisy_l = egemaps(noisy_dir, 'l')
demucs_f = egemaps(demucs_dir, 'f')
demucs_l = egemaps(demucs_dir, 'l')
fullsub_f = egemaps(fullsub_dir, 'f')
fullsub_l = egemaps(fullsub_dir, 'l')

In [6]:
import numpy as np

In [7]:
noisy_mae = np.mean(np.abs(noisy_l.to_numpy() - clean_l.to_numpy()), axis = 0)
demucs_mae = np.mean(np.abs(demucs_l.to_numpy() - clean_l.to_numpy()), axis = 0)
fullsub_mae = np.mean(np.abs(fullsub_l.to_numpy() - clean_l.to_numpy()), axis = 0)

In [10]:
noisy_l.columns

Index(['Loudness_sma3', 'alphaRatio_sma3', 'hammarbergIndex_sma3',
       'slope0-500_sma3', 'slope500-1500_sma3', 'spectralFlux_sma3',
       'mfcc1_sma3', 'mfcc2_sma3', 'mfcc3_sma3', 'mfcc4_sma3',
       'F0semitoneFrom27.5Hz_sma3nz', 'jitterLocal_sma3nz',
       'shimmerLocaldB_sma3nz', 'HNRdBACF_sma3nz', 'logRelF0-H1-H2_sma3nz',
       'logRelF0-H1-A3_sma3nz', 'F1frequency_sma3nz', 'F1bandwidth_sma3nz',
       'F1amplitudeLogRelF0_sma3nz', 'F2frequency_sma3nz',
       'F2bandwidth_sma3nz', 'F2amplitudeLogRelF0_sma3nz',
       'F3frequency_sma3nz', 'F3bandwidth_sma3nz',
       'F3amplitudeLogRelF0_sma3nz'],
      dtype='object')

In [17]:
noisy_df = pd.DataFrame(columns = noisy_l.columns)
noisy_df.loc[0] = noisy_mae
noisy_df

Unnamed: 0,Loudness_sma3,alphaRatio_sma3,hammarbergIndex_sma3,slope0-500_sma3,slope500-1500_sma3,spectralFlux_sma3,mfcc1_sma3,mfcc2_sma3,mfcc3_sma3,mfcc4_sma3,...,logRelF0-H1-A3_sma3nz,F1frequency_sma3nz,F1bandwidth_sma3nz,F1amplitudeLogRelF0_sma3nz,F2frequency_sma3nz,F2bandwidth_sma3nz,F2amplitudeLogRelF0_sma3nz,F3frequency_sma3nz,F3bandwidth_sma3nz,F3amplitudeLogRelF0_sma3nz
0,0.483268,11.299494,13.573524,0.044102,0.024144,0.248254,19.869884,15.397346,17.760813,16.964304,...,14.445569,276.088806,315.461853,97.806877,340.632751,354.107605,92.074165,410.379883,355.498779,89.307373


In [19]:
demucs_df = pd.DataFrame(columns = demucs_l.columns)
demucs_df.loc[0] = demucs_mae
demucs_df

Unnamed: 0,Loudness_sma3,alphaRatio_sma3,hammarbergIndex_sma3,slope0-500_sma3,slope500-1500_sma3,spectralFlux_sma3,mfcc1_sma3,mfcc2_sma3,mfcc3_sma3,mfcc4_sma3,...,logRelF0-H1-A3_sma3nz,F1frequency_sma3nz,F1bandwidth_sma3nz,F1amplitudeLogRelF0_sma3nz,F2frequency_sma3nz,F2bandwidth_sma3nz,F2amplitudeLogRelF0_sma3nz,F3frequency_sma3nz,F3bandwidth_sma3nz,F3amplitudeLogRelF0_sma3nz
0,0.451802,13.397175,15.754402,0.037105,0.027894,0.225197,23.306728,15.919687,18.526052,17.316565,...,16.163132,271.990021,339.750549,98.77388,351.564423,367.649048,92.307083,432.367188,365.303772,89.258408


In [18]:
fullsub_df = pd.DataFrame(columns = fullsub_l.columns)
fullsub_df.loc[0] = fullsub_mae
fullsub_df

Unnamed: 0,Loudness_sma3,alphaRatio_sma3,hammarbergIndex_sma3,slope0-500_sma3,slope500-1500_sma3,spectralFlux_sma3,mfcc1_sma3,mfcc2_sma3,mfcc3_sma3,mfcc4_sma3,...,logRelF0-H1-A3_sma3nz,F1frequency_sma3nz,F1bandwidth_sma3nz,F1amplitudeLogRelF0_sma3nz,F2frequency_sma3nz,F2bandwidth_sma3nz,F2amplitudeLogRelF0_sma3nz,F3frequency_sma3nz,F3bandwidth_sma3nz,F3amplitudeLogRelF0_sma3nz
0,0.515414,13.180624,15.35399,0.041533,0.027047,0.278125,22.608416,16.576773,19.458452,18.911354,...,15.836701,272.488159,329.300385,98.726952,343.16861,360.406494,92.257927,418.655273,362.131165,89.2855
