In [1]:
import opensmile
from glob import glob
from tqdm import tqdm
import pandas as pd

# eGeMAPS
Function below takes two arguments:
1. <b>audio_files_path</b>: path to directory of audio files
2. <b>level</b>: which feature level to use (f for Functionals, l for LowLevelDescriptors)

Output: DataFrame with audio file path as index and 88 features

In [2]:
def egemaps(audio_files_path, level):
    if level == 'f':
        smile = opensmile.Smile(
            feature_set=opensmile.FeatureSet.eGeMAPSv02,
            feature_level=opensmile.FeatureLevel.Functionals,
        )
    elif level == 'l':
        smile = opensmile.Smile(
            feature_set=opensmile.FeatureSet.eGeMAPSv02,
            feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
        )
    
    # if not audio_files_path.endswith('/*'):
    #     audio_files_path += '/*'
    # files = glob(audio_files_path)
    
    result = smile.process_files(audio_files_path)

    return result

In [3]:
import os
DATA_DIR = "/home/ubuntu/efs/IntroDL/denoiser/dataset"
CLEAN_DIR = os.path.join(DATA_DIR, "starter_clean/clean")
NOISY_DIR = os.path.join(DATA_DIR, "starter_noisy/noisy")
RESULT_DIR = "/home/ubuntu/efs/IntroDL/denoiser/results"
DEMUCS_DIR = os.path.join(RESULT_DIR, "Demucs_denoised")
FULLSUBNET_DIR = os.path.join(RESULT_DIR, "FullSubNet_denoised")

clean_files = sorted(glob(CLEAN_DIR + "/*"))
noisy_files = sorted(glob(NOISY_DIR + "/*"))

clean_dict = {os.path.basename(name).split('_')[-1]:name for name in clean_files}
noisy_dict = {os.path.basename(name).split('_')[-1]:name for name in noisy_files}

clean_files = []
noisy_files = []
for fileid in clean_dict.keys() : 
    clean_files.append(clean_dict[fileid])
    noisy_files.append(noisy_dict[fileid])

demucs_enhanced_files = sorted(glob(DEMUCS_DIR + "/*"))
demucs_enhanced_dict = {os.path.basename(name).split('_')[-1]:name for name in demucs_enhanced_files}
fullsubnet_enhanced_files = sorted(glob(FULLSUBNET_DIR + "/*"))
fullsubnet_enhanced_dict = {os.path.basename(name).split('_')[-1]:name for name in fullsubnet_enhanced_files}

demucs_enhanced_files = []
fullsubnet_enhanced_files = []
for fileid in clean_dict.keys() : 
    demucs_enhanced_files.append(demucs_enhanced_dict[fileid])
    fullsubnet_enhanced_files.append(fullsubnet_enhanced_dict[fileid])



In [4]:
clean_f = egemaps(clean_files, 'f')
clean_l = egemaps(clean_files, 'l')
noisy_f = egemaps(noisy_files, 'f')
noisy_l = egemaps(noisy_files, 'l')
demucs_f = egemaps(demucs_enhanced_files, 'f')
demucs_l = egemaps(demucs_enhanced_files, 'l')
fullsub_f = egemaps(fullsubnet_enhanced_files, 'f')
fullsub_l = egemaps(fullsubnet_enhanced_files, 'l')

In [5]:
import numpy as np

In [6]:
noisy_mae = np.mean(np.abs(noisy_l.to_numpy() - clean_l.to_numpy()), axis = 0)
demucs_mae = np.mean(np.abs(demucs_l.to_numpy() - clean_l.to_numpy()), axis = 0)
fullsubnet_mae = np.mean(np.abs(fullsub_l.to_numpy() - clean_l.to_numpy()), axis = 0)

In [7]:
Improve = lambda error_enhanced, error_noisy : (error_noisy - error_enhanced) / error_noisy
Imp_demucs = Improve(demucs_mae, noisy_mae)
Imp_fullsubnet = Improve(fullsubnet_mae, noisy_mae)

In [8]:
noisy_l.columns

Index(['Loudness_sma3', 'alphaRatio_sma3', 'hammarbergIndex_sma3',
       'slope0-500_sma3', 'slope500-1500_sma3', 'spectralFlux_sma3',
       'mfcc1_sma3', 'mfcc2_sma3', 'mfcc3_sma3', 'mfcc4_sma3',
       'F0semitoneFrom27.5Hz_sma3nz', 'jitterLocal_sma3nz',
       'shimmerLocaldB_sma3nz', 'HNRdBACF_sma3nz', 'logRelF0-H1-H2_sma3nz',
       'logRelF0-H1-A3_sma3nz', 'F1frequency_sma3nz', 'F1bandwidth_sma3nz',
       'F1amplitudeLogRelF0_sma3nz', 'F2frequency_sma3nz',
       'F2bandwidth_sma3nz', 'F2amplitudeLogRelF0_sma3nz',
       'F3frequency_sma3nz', 'F3bandwidth_sma3nz',
       'F3amplitudeLogRelF0_sma3nz'],
      dtype='object')

In [9]:
MAE_df = pd.DataFrame(index = noisy_l.columns)
Improve_df = pd.DataFrame(index = noisy_l.columns)

In [10]:
MAE_df["noisy"] = noisy_mae
MAE_df["demucs_enhanced"] = demucs_mae
MAE_df["fullsubnet_enhanced"] = fullsubnet_mae

Improve_df["demucs_enhanced"] = Imp_demucs
Improve_df["fullsubnet_enhanced"] = Imp_fullsubnet

In [11]:
MAE_df

Unnamed: 0,noisy,demucs_enhanced,fullsubnet_enhanced
Loudness_sma3,0.170484,0.036405,0.201656
alphaRatio_sma3,5.849057,3.091248,3.377025
hammarbergIndex_sma3,7.115644,4.116732,4.262281
slope0-500_sma3,0.031842,0.016648,0.020932
slope500-1500_sma3,0.011632,0.007806,0.009824
spectralFlux_sma3,0.080228,0.012163,0.13175
mfcc1_sma3,8.656893,4.552648,4.815643
mfcc2_sma3,7.910419,4.75393,4.991512
mfcc3_sma3,8.024676,4.967277,5.86754
mfcc4_sma3,7.785574,5.171243,5.575912


In [12]:
Improve_df

Unnamed: 0,demucs_enhanced,fullsubnet_enhanced
Loudness_sma3,0.786459,-0.182841
alphaRatio_sma3,0.471496,0.422638
hammarbergIndex_sma3,0.421453,0.400999
slope0-500_sma3,0.477165,0.342616
slope500-1500_sma3,0.328931,0.155407
spectralFlux_sma3,0.848398,-0.642204
mfcc1_sma3,0.474101,0.443722
mfcc2_sma3,0.399029,0.368995
mfcc3_sma3,0.381,0.268813
mfcc4_sma3,0.335792,0.283815


In [13]:
np.argsort(Imp_demucs)

array([23, 17, 20, 22, 19, 16,  4,  9,  8, 12,  7, 11,  2,  1,  6,  3, 15,
       14, 24, 18, 21, 10,  0, 13,  5])

In [14]:
LLD = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.LowLevelDescriptors)

FEATURE_NAMES = LLD.feature_names

In [15]:
len(FEATURE_NAMES)

25

In [16]:
FEATURE_NAMES

['Loudness_sma3',
 'alphaRatio_sma3',
 'hammarbergIndex_sma3',
 'slope0-500_sma3',
 'slope500-1500_sma3',
 'spectralFlux_sma3',
 'mfcc1_sma3',
 'mfcc2_sma3',
 'mfcc3_sma3',
 'mfcc4_sma3',
 'F0semitoneFrom27.5Hz_sma3nz',
 'jitterLocal_sma3nz',
 'shimmerLocaldB_sma3nz',
 'HNRdBACF_sma3nz',
 'logRelF0-H1-H2_sma3nz',
 'logRelF0-H1-A3_sma3nz',
 'F1frequency_sma3nz',
 'F1bandwidth_sma3nz',
 'F1amplitudeLogRelF0_sma3nz',
 'F2frequency_sma3nz',
 'F2bandwidth_sma3nz',
 'F2amplitudeLogRelF0_sma3nz',
 'F3frequency_sma3nz',
 'F3bandwidth_sma3nz',
 'F3amplitudeLogRelF0_sma3nz']

In [17]:
import plotly
import plotly.subplots
import plotly.graph_objects

In [23]:
fig = plotly.graph_objects.Figure()

ORDER = np.argsort(Imp_demucs)
FEATURES = [FEATURE_NAMES[i].split("_")[0] for i in ORDER]

fig.add_trace(
    trace = plotly.graph_objects.Bar(
        y=FEATURES, 
        x=Imp_demucs[ORDER], 
        orientation='h', name="Demucs Improvement Over Noisy"))


fig.add_trace(
    trace = plotly.graph_objects.Bar(
        y=FEATURES, 
        x=Imp_fullsubnet[ORDER], 
        orientation='h', name="FullSubNet Improvement Over Noisy"))


HEIGHT = 290
WIDTH  = 240
fig.update_layout(
    height = 3*HEIGHT, 
    width  = 3*WIDTH,
    legend=dict(orientation="h", yanchor="bottom"),
    margin = dict(l=0, r=0, t=0, b=0),
    bargap = 0.50, 
    xaxis_range=[-1,1],
    xaxis=dict(tickmode='linear', dtick=0.2))

FONT_FAMILY = "Times New Roman"
FONT_SIZE   = 8

keys = list(locals().keys())
for l in keys:
    if l[:3] == 'fig':
        locals()[l].update_layout(font_family=FONT_FAMILY, font_size = 2*FONT_SIZE)


fig.show()