In [136]:
from functools import partial
from itertools import chain

import librosa.effects
import numpy as np
import pandas as pd
import soundfile as sf

import os

from ipywidgets import Audio
from matplotlib import pyplot as plt
from tqdm.contrib.concurrent import process_map
from tqdm.notebook import tqdm

import librosa

In [137]:
INFERENCE_ROOT = "/home/kwatchar3/Documents/data/moisesdb/"
STEM_SETUP = "vdbgp"
GROUND_TRUTH_ROOT = "/home/kwatchar3/Documents/data/moisesdb"

In [138]:
variants = [
    "vdbgp-d-pre",
    "vdbgp-d-prefz",
    "vdbgp-d-pre-aug",
    "vdbgp-d-pre-bal",
    "vdbgp-d-prefz-bal",
    "vdbgp-d-pre-aug-bal",
]



In [139]:
gt_files = os.listdir(os.path.join(GROUND_TRUTH_ROOT, "npy2"))

In [140]:
def snr(gt, est):
    return 10 * np.log10(np.sum(np.square(gt)) / np.sum(np.square(gt - est)))

In [141]:
allowed_stems = {
    "vocals": [
        "lead_female_singer",
        "lead_male_singer",
    ],
    "drums": [
        "drums",
    ],
    "bass": [
        "bass_guitar",
        # "bass_synthesizer",
    ],
    "guitar": [
        "acoustic_guitar",
        "clean_electric_guitar",
        "distorted_electric_guitar",
    ],
    "piano": [
        "grand_piano",
        "electric_piano",
    ],
}


In [142]:
def get_results_for_song(inputs):

    song_name, inference_mode, variant = inputs

    stems = os.listdir(os.path.join(INFERENCE_ROOT, inference_mode, STEM_SETUP, variant, "audio", song_name))
    stems = [s.replace(".wav", "") for s in stems]

    results = []

    for coarse_stem in allowed_stems:

        coarse_pred = []
        coarse_true = []

        has_gt = False
        for stem in allowed_stems[coarse_stem]:
            stem_has_gt = False
            npy_path = os.path.join(GROUND_TRUTH_ROOT, "npy2", song_name, f"{stem}.npy")
            if os.path.exists(npy_path):
                audio = np.load(npy_path, mmap_mode="r")
                coarse_true.append(audio)
                has_gt = True
                stem_has_gt = True
            else:
                # print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
                # print(f"Ground truth not found for {song_name}/{stem}. Using zeros.")
                # print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
                pass



            audio_path = os.path.join(INFERENCE_ROOT, inference_mode, STEM_SETUP, variant, "audio", song_name, f"{stem}.wav")
            if os.path.exists(audio_path):
                audio, sr = sf.read(audio_path)
                audio = audio.T
                coarse_pred.append(audio)
            else:
                if stem_has_gt:
                    print("******************************************************")
                # print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
                    print(f"Prediction not found for {song_name}/{stem}. Using zeros.")
                    print("******************************************************")
                # print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")

        if not has_gt:
            # print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
            # print(f"No ground truth found for {song_name} - {coarse_stem}. Skipping.")
            # print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
            continue

        coarse_true_path = os.path.join(GROUND_TRUTH_ROOT, "npy2", song_name, f"{coarse_stem}.npy")

        if os.path.exists(coarse_true_path):
            coarse_true_ = np.load(coarse_true_path, mmap_mode="r")
        else:
            coarse_true_ = sum(coarse_true)

        coarse_pred = sum(coarse_pred)

        snr_full = snr(coarse_true_, coarse_pred)

        results.append({
            "song": song_name,
            "stem": coarse_stem,
            "snr": snr_full,
            "variant": variant,
            "inference_mode": inference_mode,
        })


        if coarse_stem == "vocals":
            coarse_true_ = sum(coarse_true)
            snr_full = snr(coarse_true_, coarse_pred)

            results.append({
                "song": song_name,
                "stem": "lead_vocals",
                "snr": snr_full,
                "variant": variant,
                "inference_mode": inference_mode,
            })

    return results


In [143]:

df = []

for inference_mode in ["inference-d", "inference-o"]:

    for v in variants:
        print(f"Processing {v}...")

        test_files = os.listdir(os.path.join(INFERENCE_ROOT, inference_mode, STEM_SETUP, v, "audio"))

        # for song in tqdm(test_files):
        #     results = get_results_for_song(song, inference_mode, v)
        #     df.extend(results)

        inputs = [(song, inference_mode, v) for song in test_files]

        results = process_map(get_results_for_song, inputs, max_workers=16)
        results = list(chain(*results))

        df.extend(results)


df = pd.DataFrame(df)

Processing vdbgp-d-pre...


  0%|          | 0/48 [00:00<?, ?it/s]

Processing vdbgp-d-prefz...


  0%|          | 0/48 [00:00<?, ?it/s]

Processing vdbgp-d-pre-aug...


  0%|          | 0/48 [00:00<?, ?it/s]

Processing vdbgp-d-pre-bal...


  0%|          | 0/48 [00:00<?, ?it/s]

Processing vdbgp-d-prefz-bal...


  0%|          | 0/48 [00:00<?, ?it/s]

Processing vdbgp-d-pre-aug-bal...


  0%|          | 0/48 [00:00<?, ?it/s]

Processing vdbgp-d-pre...


  0%|          | 0/48 [00:00<?, ?it/s]

Processing vdbgp-d-prefz...


  0%|          | 0/48 [00:00<?, ?it/s]

Processing vdbgp-d-pre-aug...


  0%|          | 0/48 [00:00<?, ?it/s]

Processing vdbgp-d-pre-bal...


  0%|          | 0/48 [00:00<?, ?it/s]

Processing vdbgp-d-prefz-bal...


  0%|          | 0/48 [00:00<?, ?it/s]

Processing vdbgp-d-pre-aug-bal...


  0%|          | 0/48 [00:00<?, ?it/s]

In [144]:
df

Unnamed: 0,song,stem,snr,variant,inference_mode
0,704f1de9-1d02-4c2b-af05-107a7700a51d,vocals,7.440387,vdbgp-d-pre,inference-d
1,704f1de9-1d02-4c2b-af05-107a7700a51d,lead_vocals,7.361787,vdbgp-d-pre,inference-d
2,704f1de9-1d02-4c2b-af05-107a7700a51d,drums,12.501521,vdbgp-d-pre,inference-d
3,704f1de9-1d02-4c2b-af05-107a7700a51d,bass,9.033754,vdbgp-d-pre,inference-d
4,704f1de9-1d02-4c2b-af05-107a7700a51d,guitar,1.717476,vdbgp-d-pre,inference-d
...,...,...,...,...,...
3007,1f98fe4d-26c7-460f-9f68-33964bc4d8d3,vocals,1.829890,vdbgp-d-pre-aug-bal,inference-o
3008,1f98fe4d-26c7-460f-9f68-33964bc4d8d3,lead_vocals,1.847866,vdbgp-d-pre-aug-bal,inference-o
3009,1f98fe4d-26c7-460f-9f68-33964bc4d8d3,drums,10.670884,vdbgp-d-pre-aug-bal,inference-o
3010,1f98fe4d-26c7-460f-9f68-33964bc4d8d3,bass,8.909531,vdbgp-d-pre-aug-bal,inference-o


In [145]:
df["snr"] = df["snr"].replace(-np.inf, np.nan)

In [146]:
df.to_csv(os.path.join(INFERENCE_ROOT, "bandit_vdbgp_merged.csv"), index=False)

In [159]:
df = pd.read_csv(os.path.join(INFERENCE_ROOT, "bandit_vdbgp_merged.csv"))

stem_dtype = pd.CategoricalDtype(categories=["vocals",
                                             "lead_vocals",
                                             "drums",
                                             "bass",
                                             "guitar",
                                             "piano"], ordered=True)
df["stem"] = df["stem"].astype(stem_dtype)

bool_dtype = pd.CategoricalDtype(categories=["Y", "N"], ordered=True)
ibool_dtype = pd.CategoricalDtype(categories=["N", "Y"], ordered=True)

df["is_frozen"] = df["variant"].str.contains("prefz").apply(lambda x: "Y" if x else "N").astype(bool_dtype)
df["is_balanced"] = df["variant"].str.contains("bal").apply(lambda x: "Y" if x else "N").astype(ibool_dtype)
df["is_augmented"] = df["variant"].str.contains("aug").apply(lambda x: "Y" if x else "N").astype(ibool_dtype)
df["query_same"] = df["inference_mode"].str.contains("-o").apply(lambda x: "same" if x else "diff.")

In [160]:
def q25(x):
    return x.quantile(0.25)

def q75(x):
    return x.quantile(0.75)

def q50(x):
    return x.quantile(0.5)

dfagg = df.groupby([
    "is_frozen",
    "is_augmented",
    "is_balanced",
    "query_same",
    "stem"
]).agg({"snr": [q50]})
dfagg.columns = ["q50"]
dfagg = dfagg.reset_index()

dfagg = dfagg[dfagg.query_same == "diff."]

dfagg = dfagg.reset_index().pivot_table(
    index=["is_frozen", "is_augmented", "is_balanced",],
    columns="stem",
    values=["q50"]
)


  dfagg = df.groupby([


In [161]:
dfagg = dfagg.swaplevel(axis=1).sort_index(axis=1)
dfagg_max = dfagg.max(axis=0)


def bold_formatter(x, val):
    if round(x, 1) == round(val, 1):
        return r"\bfseries " + f"{x:.1f}"
    return f"{x:.1f}"

formatters = {
    (c, d): partial(bold_formatter, val=dfagg_max.loc[c, d])
    for c, d in dfagg.columns
}


In [162]:
str_ = dfagg.to_latex(
    formatters=formatters,
    sparsify=True,
    multirow=False,
)

print(str_)

\begin{tabular}{lllrrrrrr}
\toprule
 &  & stem & vocals & lead_vocals & drums & bass & guitar & piano \\
 &  &  & q50 & q50 & q50 & q50 & q50 & q50 \\
is_frozen & is_augmented & is_balanced &  &  &  &  &  &  \\
\midrule
Y & N & N & \bfseries 8.0 & 7.9 & 9.8 & 10.5 & 2.3 & 0.8 \\
 &  & Y & 7.9 & 7.7 & 9.6 & 10.5 & 2.2 & 0.9 \\
N & N & N & 7.4 & \bfseries 8.0 & 9.6 & 10.6 & 3.0 & 2.3 \\
 &  & Y & 7.6 & 7.7 & 9.3 & 10.2 & 2.9 & \bfseries 2.5 \\
 & Y & N & 7.8 & 7.9 & \bfseries 10.1 & 10.9 & 3.2 & 2.2 \\
 &  & Y & 7.6 & 7.9 & 9.5 & \bfseries 11.0 & \bfseries 3.3 & \bfseries 2.5 \\
\bottomrule
\end{tabular}



In [153]:
dfagg

Unnamed: 0_level_0,Unnamed: 1_level_0,stem,vocals,drums,bass,guitar,piano
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,q50,q50,q50,q50,q50
is_frozen,is_augmented,is_balanced,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Y,N,N,7.957611,9.817667,10.542386,2.267256,0.848644
Y,N,Y,7.908581,9.589573,10.480804,2.236992,0.872272
N,N,N,7.375209,9.624549,10.621386,3.000009,2.278973
N,N,Y,7.558992,9.343448,10.151619,2.945651,2.454445
N,Y,N,7.773301,10.107697,10.948157,3.182601,2.173553
N,Y,Y,7.584758,9.542869,10.974755,3.269581,2.477942
