# Listening Analysis

In [None]:
import os
import json
import pandas as pd
import numpy as np
import soundfile as sf
from scipy import stats
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display, Audio

### Constants

In [None]:
STEMS = ['vocals', 'drums', 'bass', 'other']
ROOT_DIR = ''
MODEL = 'scnet'
DATASETS = ['binaural', 'stereo']
DATE = ''

In [None]:
REF_STEREO = os.path.join(ROOT_DIR, 'data/musdb18hq/test')
REF_BINAURAL = os.path.join(ROOT_DIR, 'data/binaural_musdb18/random/test')

### Metadata

In [None]:
with open(os.path.join(ROOT_DIR, 'binaural-mss/data/binaural_musdb_metadata.json'), 'rb') as f:
    metadata_dict = json.load(f)

metadata = pd.DataFrame.from_dict(metadata_dict['test'], orient='index').reset_index()
metadata.rename(columns={'index': 'title'}, inplace=True)
songs = list(metadata['title'])

In [None]:
angles = metadata.melt(id_vars='title').sort_values('title', ignore_index=True)
angles.rename(columns={'variable': 'source', 'value': 'angle'}, inplace=True)

### Metrics

In [None]:
spauq_binaural = pd.read_csv(os.path.join(ROOT_DIR, f'binaural-mss/results/spauq/spauq_{DATE}_{MODEL}_{DATASETS[0]}.csv'))
spauq_stereo = pd.read_csv(os.path.join(ROOT_DIR, f'binaural-mss/results/spauq/spauq_{DATE}_{MODEL}_{DATASETS[1]}.csv'))
interaural_binaural = pd.read_csv(os.path.join(ROOT_DIR, f'binaural-mss/results/interaural/interaural_{DATE}_{MODEL}_{DATASETS[0]}.csv'))
interaural_stereo = pd.read_csv(os.path.join(ROOT_DIR, f'binaural-mss/results/interaural/interaural_{DATE}_{MODEL}_{DATASETS[1]}.csv'))

In [None]:
bin_df = pd.merge(spauq_binaural, interaural_binaural).drop(['cost', 'shift', 'scale'], axis=1)
stereo_df = pd.merge(spauq_stereo, interaural_stereo).drop(['cost', 'shift', 'scale'], axis=1)

In [None]:
bin_df['dataset'] = 'binaural'
stereo_df['dataset'] = 'stereo'

In [None]:
bin_df = pd.merge(bin_df, angles)
stereo_df['angle'] = np.nan

In [None]:
metrics = pd.concat([bin_df, stereo_df])
metrics.sort_values(['title', 'source', 'dataset'], ignore_index=True, inplace=True)

In [None]:
# metrics.to_csv(f'{ROOT_DIR}/eval/eval_results/{MODEL}_test_metrics.csv', index=False)

## Listening

In [None]:
EST_BINAURAL = os.path.join(ROOT_DIR, f'binaural-mss/data/output/{MODEL}/binaural/test')

In [None]:
song_name = ''

In [None]:
targets = [STEMS[0]]

### Binaural

In [None]:
print(f">>> BINAURAL <<<")
print(f"Model: {MODEL}")
print(f"Song Title: {song_name}")

print(f"\nInput Mixture:")
display(Audio(os.path.join(REF_BINAURAL, song_name, 'mixture.wav')))

song_metrics = metrics[(metrics['title'] == song_name) & (metrics['dataset'] == 'binaural')]
for stem in targets:
    print(f"\n--- {stem.upper()} ---")
    stem_metrics = song_metrics[song_metrics['source'] == stem]
    print(f"Angle: {stem_metrics['angle'].values[0]}")
    print(f"SSR: {stem_metrics['SSR'].values[0]:.2f} dB")
    print(f"SRR: {stem_metrics['SRR'].values[0]:.2f} dB")
    print(f"Diff ITD: {stem_metrics['diff_ITD'].values[0]:.2f} microseconds")
    print(f"Diff ILD: {stem_metrics['diff_ILD'].values[0]:.2f} dB")
    print("\nEstimated:")
    display(Audio(os.path.join(EST_BINAURAL, song_name, f'{stem}.wav')))

    print("\nReference:")
    display(Audio(os.path.join(REF_BINAURAL, song_name, f'{stem}.wav')))

### Stereo

In [None]:
EST_STEREO = os.path.join(ROOT_DIR, f'binaural-mss/data/output/{MODEL}/stereo/test')

In [None]:
print(f">>> STEREO <<<")
print(f"Model: {MODEL}")
print(f"Song Title: {song_name}")

print(f"\nInput Mixture:")
display(Audio(os.path.join(REF_STEREO, song_name, 'mixture.wav')))

song_metrics = metrics[(metrics['title'] == song_name) & (metrics['dataset'] == 'stereo')]
for stem in targets:
    print(f"\n--- {stem.upper()} ---")
    stem_metrics = song_metrics[song_metrics['source'] == stem]
    print(f"SSR: {stem_metrics['SSR'].values[0]:.2f} dB")
    print(f"SRR: {stem_metrics['SRR'].values[0]:.2f} dB")
    print(f"Diff ITD: {stem_metrics['diff_ITD'].values[0]:.2f} microseconds")
    print(f"Diff ILD: {stem_metrics['diff_ILD'].values[0]:.2f} dB")
    print("\nEstimated:")
    display(Audio(os.path.join(EST_STEREO, song_name, f'{stem}.wav')))

    print("\nReference:")
    display(Audio(os.path.join(REF_STEREO, song_name, f'{stem}.wav')))