In [1]:
import opensmile
from glob import glob
from tqdm import tqdm
import pandas as pd

# eGeMAPS
Function below takes two arguments:
1. <b>audio_files_path</b>: path to directory of audio files
2. <b>level</b>: which feature level to use (f for Functionals, l for LowLevelDescriptors)

Output: DataFrame with audio file path as index and 88 features

In [8]:
def egemaps(audio_files_path, level):
    if level == 'f':
        smile = opensmile.Smile(
            feature_set=opensmile.FeatureSet.eGeMAPSv02,
            feature_level=opensmile.FeatureLevel.Functionals,
        )
    elif level == 'l':
        smile = opensmile.Smile(
            feature_set=opensmile.FeatureSet.eGeMAPSv02,
            feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
        )
    
    if not audio_files_path.endswith('/*'):
        audio_files_path += '/*'
    files = glob(audio_files_path)
    
    result = smile.process_files(files)

    return result

In [4]:
clean_dir = '/Users/brianlim/Desktop/files/cmu/intro_to_dl/project/enhanced/clean/*'
noisy_dir = '/Users/brianlim/Desktop/files/cmu/intro_to_dl/project/enhanced/noisy/*'
demucs_dir = '/Users/brianlim/Desktop/files/cmu/intro_to_dl/project/enhanced/Demucs_denoised/*'
fullsub_dir = '/Users/brianlim/Desktop/files/cmu/intro_to_dl/project/enhanced/enhanced_FullSubNet/*'

In [13]:
import os
DATA_DIR = "/home/ubuntu/efs/IntroDL/denoiser/dataset"
CLEAN_DIR = os.path.join(DATA_DIR, "starter_clean/clean")
NOISY_DIR = os.path.join(DATA_DIR, "starter_noisy/noisy")
DEMUCS_DIR = "/home/ubuntu/efs/IntroDL/denoiser/results/Demucs_denoised"


In [14]:
clean_dir = CLEAN_DIR + "/*"
noisy_dir = NOISY_DIR + "/*"
demucs_dir = DEMUCS_DIR + "/*"

In [15]:
clean_f = egemaps(clean_dir, 'f')
clean_l = egemaps(clean_dir, 'l')
noisy_f = egemaps(noisy_dir, 'f')
noisy_l = egemaps(noisy_dir, 'l')
demucs_f = egemaps(demucs_dir, 'f')
demucs_l = egemaps(demucs_dir, 'l')
# fullsub_f = egemaps(fullsub_dir, 'f')
# fullsub_l = egemaps(fullsub_dir, 'l')

In [16]:
import numpy as np

In [19]:
noisy_mae = np.mean(np.abs(noisy_l.to_numpy() - clean_l.to_numpy()), axis = 0)
demucs_mae = np.mean(np.abs(demucs_l.to_numpy() - clean_l.to_numpy()), axis = 0)
# fullsub_mae = np.mean(np.abs(fullsub_l.to_numpy() - clean_l.to_numpy()), axis = 0)

In [20]:
noisy_l.columns

Index(['Loudness_sma3', 'alphaRatio_sma3', 'hammarbergIndex_sma3',
       'slope0-500_sma3', 'slope500-1500_sma3', 'spectralFlux_sma3',
       'mfcc1_sma3', 'mfcc2_sma3', 'mfcc3_sma3', 'mfcc4_sma3',
       'F0semitoneFrom27.5Hz_sma3nz', 'jitterLocal_sma3nz',
       'shimmerLocaldB_sma3nz', 'HNRdBACF_sma3nz', 'logRelF0-H1-H2_sma3nz',
       'logRelF0-H1-A3_sma3nz', 'F1frequency_sma3nz', 'F1bandwidth_sma3nz',
       'F1amplitudeLogRelF0_sma3nz', 'F2frequency_sma3nz',
       'F2bandwidth_sma3nz', 'F2amplitudeLogRelF0_sma3nz',
       'F3frequency_sma3nz', 'F3bandwidth_sma3nz',
       'F3amplitudeLogRelF0_sma3nz'],
      dtype='object')

In [21]:
noisy_df = pd.DataFrame(columns = noisy_l.columns)
noisy_df.loc[0] = noisy_mae
noisy_df

Unnamed: 0,Loudness_sma3,alphaRatio_sma3,hammarbergIndex_sma3,slope0-500_sma3,slope500-1500_sma3,spectralFlux_sma3,mfcc1_sma3,mfcc2_sma3,mfcc3_sma3,mfcc4_sma3,...,logRelF0-H1-A3_sma3nz,F1frequency_sma3nz,F1bandwidth_sma3nz,F1amplitudeLogRelF0_sma3nz,F2frequency_sma3nz,F2bandwidth_sma3nz,F2amplitudeLogRelF0_sma3nz,F3frequency_sma3nz,F3bandwidth_sma3nz,F3amplitudeLogRelF0_sma3nz
0,0.47162,11.192012,13.372391,0.043161,0.024067,0.24562,19.562346,14.963029,17.576757,16.84149,...,14.217477,273.684296,313.128387,96.646179,339.350647,351.028687,90.963844,407.804779,352.755798,88.170807


In [22]:
demucs_df = pd.DataFrame(columns = demucs_l.columns)
demucs_df.loc[0] = demucs_mae
demucs_df

Unnamed: 0,Loudness_sma3,alphaRatio_sma3,hammarbergIndex_sma3,slope0-500_sma3,slope500-1500_sma3,spectralFlux_sma3,mfcc1_sma3,mfcc2_sma3,mfcc3_sma3,mfcc4_sma3,...,logRelF0-H1-A3_sma3nz,F1frequency_sma3nz,F1bandwidth_sma3nz,F1amplitudeLogRelF0_sma3nz,F2frequency_sma3nz,F2bandwidth_sma3nz,F2amplitudeLogRelF0_sma3nz,F3frequency_sma3nz,F3bandwidth_sma3nz,F3amplitudeLogRelF0_sma3nz
0,0.442246,13.168432,15.47497,0.036879,0.027447,0.218076,22.943317,15.664083,18.427799,17.280384,...,15.773613,269.181915,338.931854,96.318138,348.730804,364.419708,90.021942,428.003754,361.661316,86.977707


In [23]:
# fullsub_df = pd.DataFrame(columns = fullsub_l.columns)
# fullsub_df.loc[0] = fullsub_mae
# fullsub_df

In [25]:
clean_f.columns

Index(['F0semitoneFrom27.5Hz_sma3nz_amean',
       'F0semitoneFrom27.5Hz_sma3nz_stddevNorm',
       'F0semitoneFrom27.5Hz_sma3nz_percentile20.0',
       'F0semitoneFrom27.5Hz_sma3nz_percentile50.0',
       'F0semitoneFrom27.5Hz_sma3nz_percentile80.0',
       'F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2',
       'F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope',
       'F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope',
       'F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope',
       'F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope', 'loudness_sma3_amean',
       'loudness_sma3_stddevNorm', 'loudness_sma3_percentile20.0',
       'loudness_sma3_percentile50.0', 'loudness_sma3_percentile80.0',
       'loudness_sma3_pctlrange0-2', 'loudness_sma3_meanRisingSlope',
       'loudness_sma3_stddevRisingSlope', 'loudness_sma3_meanFallingSlope',
       'loudness_sma3_stddevFallingSlope', 'spectralFlux_sma3_amean',
       'spectralFlux_sma3_stddevNorm', 'mfcc1_sma3_amean',
       'mfcc1_sma3_stddevNorm', 'mfcc2_