# Numerical Analysis of Metrics

In [1]:
import os
import json
import time
import numpy as np
import pandas as pd

In [2]:
STEMS = ['bass', 'drums', 'other', 'vocals']
DATASETS = ['binaural', 'stereo']
MODELS = ['htdemucs', 'spleeter', 'umxhq', 'bs_roformer']

## SPAUQ: Spatial Audio Quality Evaluation

### Data

In [3]:
# load computed SPAUQ metrics
ht_binaural_df = pd.read_csv('../results/spauq/spauq_2025-03-23_htdemucs_ft_binaural.csv')
ht_stereo_df = pd.read_csv('../results/spauq/spauq_2025-03-23_htdemucs_ft_stereo.csv')

spleet_binaural_df = pd.read_csv('../results/spauq/spauq_2025-03-26_spleeter_binaural.csv')
spleet_stereo_df = pd.read_csv('../results/spauq/spauq_2025-03-26_spleeter_stereo.csv')

umx_binaural_df = pd.read_csv('../results/spauq/spauq_2025-03-24_umxhq_binaural.csv')
umx_stereo_df = pd.read_csv('../results/spauq/spauq_2025-03-23_umxhq_stereo.csv')

bsr_binaural_df = pd.read_csv('../results/spauq/spauq_2025-07-30_bs_roformer_binaural.csv')
bsr_stereo_df = pd.read_csv('../results/spauq/spauq_2025-07-30_bs_roformer_stereo.csv')

scn_binaural_df = pd.read_csv('../results/spauq/spauq_2025-08-11_scnet_binaural.csv')
scn_stereo_df = pd.read_csv('../results/spauq/spauq_2025-08-11_scnet_stereo.csv')

In [4]:
# add dataset column to each dataframe
ht_binaural_df['dataset'] = 'Binaural'
ht_stereo_df['dataset'] = 'Stereo'

spleet_binaural_df['dataset'] = 'Binaural'
spleet_stereo_df['dataset'] = 'Stereo'

umx_binaural_df['dataset'] = 'Binaural'
umx_stereo_df['dataset'] = 'Stereo'

bsr_binaural_df['dataset'] = 'Binaural'
bsr_stereo_df['dataset'] = 'Stereo'

scn_binaural_df['dataset'] = 'Binaural'
scn_stereo_df['dataset'] = 'Stereo'

In [5]:
htdemucs_df = pd.concat([ht_binaural_df, ht_stereo_df], ignore_index=True)
spleet_df = pd.concat([spleet_binaural_df, spleet_stereo_df], ignore_index=True)
umx_df = pd.concat([umx_binaural_df, umx_stereo_df], ignore_index=True)
bsr_df = pd.concat([bsr_binaural_df, bsr_stereo_df], ignore_index=True)
scn_df = pd.concat([scn_binaural_df, scn_stereo_df], ignore_index=True)

In [6]:
htdemucs_df['model'] = 'Demucs'
spleet_df['model'] = 'Spleeter'
umx_df['model'] = 'OpenUnmix'
bsr_df['model'] = 'BS-RoFormer'
scn_df['model'] = 'SCNet'

In [7]:
all_models_spauq = pd.concat([htdemucs_df, spleet_df, umx_df, bsr_df, scn_df],
                             ignore_index=True)[['title', 'source', 'SSR', 'SRR', 'dataset', 'model']]

## Interaural Time Difference (ITD) & Interaural Level Difference (ILD)

### Data

In [8]:
# load computed ITD and ILD metrics
ht_binaural_df = pd.read_csv('../results/interaural/interaural_2025-06-26_htdemucs_ft_binaural.csv')
ht_stereo_df = pd.read_csv('../results/interaural/interaural_2025-06-26_htdemucs_ft_stereo.csv')

spleet_binaural_df = pd.read_csv('../results/interaural/interaural_2025-06-26_spleeter_binaural.csv')
spleet_stereo_df = pd.read_csv('../results/interaural/interaural_2025-06-26_spleeter_stereo.csv')

umx_binaural_df = pd.read_csv('../results/interaural/interaural_2025-06-26_umxhq_binaural.csv')
umx_stereo_df = pd.read_csv('../results/interaural/interaural_2025-06-26_umxhq_stereo.csv')

bsr_binaural_df = pd.read_csv('../results/interaural/interaural_2025-07-30_bs_roformer_binaural.csv')
bsr_stereo_df = pd.read_csv('../results/interaural/interaural_2025-07-30_bs_roformer_stereo.csv')

scn_binaural_df = pd.read_csv('../results/interaural/interaural_2025-08-11_scnet_binaural.csv')
scn_stereo_df = pd.read_csv('../results/interaural/interaural_2025-08-11_scnet_stereo.csv')

In [9]:
ht_binaural_df['dataset'] = 'Binaural'
ht_stereo_df['dataset'] = 'Stereo'

spleet_binaural_df['dataset'] = 'Binaural'
spleet_stereo_df['dataset'] = 'Stereo'

umx_binaural_df['dataset'] = 'Binaural'
umx_stereo_df['dataset'] = 'Stereo'

bsr_binaural_df['dataset'] = 'Binaural'
bsr_stereo_df['dataset'] = 'Stereo'

scn_binaural_df['dataset'] = 'Binaural'
scn_stereo_df['dataset'] = 'Stereo'

In [10]:
htdemucs_df = pd.concat([ht_binaural_df, ht_stereo_df], ignore_index=True)
spleet_df = pd.concat([spleet_binaural_df, spleet_stereo_df], ignore_index=True)
umx_df = pd.concat([umx_binaural_df, umx_stereo_df], ignore_index=True)
bsr_df = pd.concat([bsr_binaural_df, bsr_stereo_df], ignore_index=True)
scn_df = pd.concat([scn_binaural_df, scn_stereo_df], ignore_index=True)

In [11]:
htdemucs_df['model'] = 'Demucs'
spleet_df['model'] = 'Spleeter'
umx_df['model'] = 'OpenUnmix'
bsr_df['model'] = 'BS-RoFormer'
scn_df['model'] = 'SCNet'

In [12]:
all_models_id = pd.concat([htdemucs_df, spleet_df, umx_df, bsr_df, scn_df],
                          ignore_index=True)[['title', 'source', 'diff_ITD', 'diff_ILD', 'dataset', 'model']]

## All Data

**By Source**

In [13]:
# combine metric dataframes together
all_data = pd.merge(all_models_spauq, all_models_id,
                    how='inner',
                    on=['title', 'source', 'dataset', 'model'])
all_data = all_data[['source', 'dataset', 'model', 'SSR', 'SRR', 'diff_ITD', 'diff_ILD']]

In [14]:
# compute the median metrics by source (rounded to 2 decimal places)
source_metrics_median = all_data.groupby(['dataset', 'model', 'source']).median().round(2)

In [15]:
# save
source_metrics_median.to_csv('../results/median_metrics_by_source.csv')

In [16]:
source_metrics_median

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SSR,SRR,diff_ITD,diff_ILD
dataset,model,source,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Binaural,BS-RoFormer,bass,8.63,10.57,498.87,0.06
Binaural,BS-RoFormer,drums,9.63,14.01,0.0,0.04
Binaural,BS-RoFormer,other,11.48,7.92,0.0,0.26
Binaural,BS-RoFormer,vocals,8.45,10.37,0.0,0.32
Binaural,Demucs,bass,9.13,8.9,476.19,0.2
Binaural,Demucs,drums,10.39,10.58,0.0,0.31
Binaural,Demucs,other,12.62,4.1,22.68,0.57
Binaural,Demucs,vocals,8.7,4.37,0.0,0.42
Binaural,OpenUnmix,bass,10.94,3.37,521.54,0.41
Binaural,OpenUnmix,drums,12.22,6.75,0.0,0.38


**Overall**

In [17]:
# remove source column
overall_data =  all_data[['dataset', 'model', 'SSR', 'SRR', 'diff_ITD', 'diff_ILD']]

In [18]:
# compute median metrics across all sources for each dataset and model
overall_metrics_median = overall_data.groupby(['dataset', 'model']).median().round(2)

In [19]:
# save
overall_metrics_median.to_csv('../results/median_metrics_overall.csv')

In [20]:
overall_metrics_median

Unnamed: 0_level_0,Unnamed: 1_level_0,SSR,SRR,diff_ITD,diff_ILD
dataset,model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Binaural,BS-RoFormer,9.7,10.95,0.0,0.1
Binaural,Demucs,10.59,6.91,68.03,0.39
Binaural,OpenUnmix,10.43,3.51,90.7,0.5
Binaural,SCNet,10.29,8.88,272.11,0.31
Binaural,Spleeter,9.86,2.01,22.68,0.64
Stereo,BS-RoFormer,19.36,8.82,0.0,0.05
Stereo,Demucs,16.01,7.39,0.0,0.08
Stereo,OpenUnmix,10.73,3.14,0.0,0.12
Stereo,SCNet,18.87,8.83,0.0,0.06
Stereo,Spleeter,10.78,3.21,0.0,0.12
