# SPAUQ Metrics

Watcharasupat, K. N., & Lerch, A. (2024). Quantifying Spatial Audio Quality Impairment. In _2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)_ (pp. 746-750). IEEE.

In [None]:
import os
from datetime import datetime
import numpy as np
import pandas as pd
import soundfile as sf
from tqdm import tqdm
from spauq.core.metrics import spauq_eval
from IPython.display import display, Audio

## SPAUQ: Spatial Audio Quality Evaluation

Signal-to-Spatial Distortion Ratio (SSR) and Signal-to-Residual Distortion Ratio (SRR)

```
spauq_eval

    Parameters
    ----------
    reference
        reference signal, shape=(n_channels, n_samples)
    estimate
        reference signal, shape=(n_channels, n_samples)
    fs
        sampling rate in Hz
    return_framewise
        whether to return the framewise evaluation result
    return_cost
        whether to return the estimation error (cost function)
    return_shift
        whether to return the estimated shift parameters
    return_scale
        whether to return the esitmated scale parameters
    verbose
        whether to use verbose mode
    kwargs
        additional keyword arguments to be passed to the internal
        function.


Returns
    -------
    dict
        A dictionary with the following keys:\n
        - SSR (`float` or `np.ndarray`)
            Median signal to spatial distortion ratio in dB.
            An array of shape (n_frames,) if `return_framewise` is true.

        - SRR (`float` or `np.ndarray`)
            Median signal to residual distortion ratio in dB.
            An array of shape (n_frames,) if `return_framewise` is true.

        - cost (`float` or `np.ndarray`)
            Median cost function of the estimates over frames.
            An array of shape (n_frames,) if `return_framewise` is true.

        - shift (`np.ndarray`)
            An array of shape (n_channels, n_channels,) of the elementwise
            median of the shift matrices over frames.
            An array of shape (n_channels, n_channels, n_frames,) if `return_framewise` is true.

        - scale (`np.ndarray`)
            An array of shape (n_channels, n_channels,) of the elementwise
            median of the scale matrices over frames.
            An array of shape (n_channels, n_channels, n_frames,) if `return_framewise` is true.
```

```
_DefaultWindowLengthSeconds = 1.0
_DefaultHopLengthSeconds = 0.5
_DefaultMaximumGlobalShiftSeconds = np.inf
_DefaultMaximumSegmentShiftSeconds = 0.1
_DefaultSilenceThreshold = 1e-8
```

In [None]:
RETURN_FRAMEWISE = False
RETURN_COST = True
RETURN_SHIFT = True
RETURN_SCALE = True

In [None]:
def evaluate(ref_path, est_path):
    # load reference and estimate stems
    y_ref, sr_ref = sf.read(ref_path)
    y_est, sr_est = sf.read(est_path)

    n_ref, c_ref = y_ref.shape
    n_est, c_est = y_est.shape
    
    # check sample rates
    assert sr_ref == sr_est
    
    # check number of channels
    assert c_ref == c_est == 2

    # make sure that the number of samples is the same
    # if not, trim the longer signal
    if n_ref != n_est:
        min_n = min(n_ref, n_est)
        y_ref = y_ref[:min_n, :]
        y_est = y_est[:min_n, :]
        
    # compute SPAUQ metrics
    # transpose array so that the dimensions are (channels, samples)
    eval_out = spauq_eval(reference=y_ref.T, estimate=y_est.T, fs=sr_ref,
                          return_framewise=RETURN_FRAMEWISE, return_cost=RETURN_COST,
                          return_shift=RETURN_SHIFT, return_scale=RETURN_SCALE,
                          forgive_mode=None, verbose=False)
    return eval_out

## Evaluation

In [None]:
MODEL = '' # htdemucs_ft, spleeter, umxhq
STEMS = ["drums", "bass", "other", "vocals"]
DATE = datetime.now().strftime("%Y-%m-%d")
EVAL_DIR = "../data/eval/spauq/"

In [None]:
# create the output directory if it does not already exist
print("Creating evaluation directory, if it does not already exist...")
os.makedirs(EVAL_DIR, exist_ok=True)

### Stereo Data

In [None]:
# set input and output directories
DATASET = 'stereo'
REFERENCE_DIR = f"../data/musdb18hq/test/"
ESTIMATE_DIR = f"../data/output/{MODEL}/{DATASET}/test/"

In [None]:
# get all of the files in the input directory
print("Loading list of files...")
song_list = [f for f in os.listdir(REFERENCE_DIR) if os.path.isdir(os.path.join(REFERENCE_DIR, f))]
print(f"There are {len(song_list)} files in the reference directory.")

In [None]:
title_list = []
source_list = []
ssr_list = []
srr_list = []
cost_list = []
shift_list = []
scale_list = []

# iterate through each source and compute SSR and SRR
print("Beginning to evaluate stems...")
for source in STEMS:
    print(f"\n>>>>{source} <<<<")
    for song in tqdm(song_list):
        # get .wav file paths
        ref_file = os.path.join(REFERENCE_DIR, song, f"{source}.wav")
        est_file = os.path.join(ESTIMATE_DIR, song, f"{source}.wav")

        eval_out = evaluate(ref_file, est_file)

        title_list.append(song)
        source_list.append(source)
        ssr_list.append(eval_out["SSR"])
        srr_list.append(eval_out["SRR"])
        cost_list.append(eval_out["cost"])
        shift_list.append(eval_out["shift"])
        scale_list.append(eval_out["scale"])        

results_df = pd.DataFrame({"title": title_list, "source": source_list,
                             "SSR": ssr_list, "SRR": srr_list,
                             "cost": cost_list, "shift": shift_list,
                             "scale": scale_list})
print("Evaluation complete!")

In [None]:
# spauq metrics by frame
results_df.sort_values(by=['title', 'source'], inplace=True, ignore_index=True)

In [None]:
save_path = os.path.join(EVAL_DIR, f'spauq_{DATE}_{MODEL}_{DATASET}.csv')
results_df.to_csv(save_path, index=False)

### Binaural Data

In [None]:
# set input and output directories
DATASET = 'binaural'
REFERENCE_DIR = f"../data/binaural_musdb18/test/"
ESTIMATE_DIR = f"../data/output/{MODEL}/{DATASET}/test/"

In [None]:
# get all of the files in the input directory
print("Loading list of files...")
song_list = [f for f in os.listdir(REFERENCE_DIR) if os.path.isdir(os.path.join(REFERENCE_DIR, f))]
print(f"There are {len(song_list)} files in the reference directory.")

In [None]:
title_list = []
source_list = []
ssr_list = []
srr_list = []
cost_list = []
shift_list = []
scale_list = []

# iterate through each source and compute SSR and SRR
print("Beginning to evaluate stems...")
for source in STEMS:
    print(f"\n>>>>{source} <<<<")
    for song in tqdm(song_list):
        # get .wav file paths
        ref_file = os.path.join(REFERENCE_DIR, song, f"{source}.wav")
        est_file = os.path.join(ESTIMATE_DIR, song, f"{source}.wav")

        eval_out = evaluate(ref_file, est_file)

        title_list.append(song)
        source_list.append(source)
        ssr_list.append(eval_out["SSR"])
        srr_list.append(eval_out["SRR"])
        cost_list.append(eval_out["cost"])
        shift_list.append(eval_out["shift"])
        scale_list.append(eval_out["scale"])        

results_df = pd.DataFrame({"title": title_list, "source": source_list,
                             "SSR": ssr_list, "SRR": srr_list,
                             "cost": cost_list, "shift": shift_list,
                             "scale": scale_list})
print("Evaluation complete!")

In [None]:
# spauq metrics by frame
results_df.sort_values(by=['title', 'source'], inplace=True, ignore_index=True)

In [None]:
save_path = os.path.join(EVAL_DIR, f'spauq_{DATE}_{MODEL}_{DATASET}.csv')
results_df.to_csv(save_path, index=False)