In [23]:
import librosa.effects
import numpy as np
import pandas as pd
import soundfile as sf

import os

from ipywidgets import Audio
from matplotlib import pyplot as plt
from tqdm.notebook import tqdm

import librosa

In [24]:
INFERENCE_ROOT = "/home/kwatchar3/Documents/data/moisesdb/inference/bandit-vdbo/5583712/lightning_logs/version_0"
GROUND_TRUTH_ROOT = "/home/kwatchar3/Documents/data/moisesdb"

In [25]:
test_files = os.listdir(os.path.join(INFERENCE_ROOT, "audio"))
gt_files = os.listdir(os.path.join(GROUND_TRUTH_ROOT, "npy2"))

In [35]:
def snr(gt, est):
    return 10 * np.log10(np.sum(np.square(gt)) / np.sum(np.square(gt - est)))

In [34]:
df = []

for song in tqdm(test_files):
    song_name = song.split(".")[0]

    stems = os.listdir(os.path.join(INFERENCE_ROOT, "audio", song))
    stems = [s.replace(".wav", "") for s in stems]

    print(f"Processing {song_name}...")

    for stem in stems:
        print(f"Processing {stem}...")

        audio_est, sr = sf.read(os.path.join(INFERENCE_ROOT, "audio", song, f"{stem}.wav"))
        audio_est = audio_est.T


        npy_path = os.path.join(GROUND_TRUTH_ROOT, "npy2", song_name, f"{stem}.npy")
        if os.path.exists(npy_path):
            audio = np.load(npy_path, mmap_mode="r")
        else:
            print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
            print(f"Ground truth not found for {song_name}/{stem}. Using zeros.")
            print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
            audio = np.zeros_like(audio_est)

        snr_full = snr(audio, audio_est)

        df.append({
            "song": song_name,
            "stem": stem,
            "snr": snr_full,
        })

        print(f"{stem} - SNR: {snr_full:-2.3f}")
        # print("Ground truth duration:", audio.shape[1] / sr)
        # print("Estimation duration:", audio_est.shape[1] / sr)



df = pd.DataFrame(df)

  0%|          | 0/48 [00:00<?, ?it/s]

Processing 704f1de9-1d02-4c2b-af05-107a7700a51d...
Processing bass...
bass - SNR: 9.485
Processing drums...
drums - SNR: 12.619
Processing vdbo_others...
vdbo_others - SNR: 1.341
Processing vocals...
vocals - SNR: 10.221
Processing 8a6c9c1f-4865-404f-a805-1949de36a33c...
Processing bass...
bass - SNR: 17.693
Processing drums...
drums - SNR: 11.757
Processing vdbo_others...
vdbo_others - SNR: 11.729
Processing vocals...
vocals - SNR: 13.991
Processing 0358fd1e-244a-4422-9a42-29b5d68f6e4b...
Processing bass...
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Ground truth not found for 0358fd1e-244a-4422-9a42-29b5d68f6e4b/bass. Using zeros.
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!


  if 10 * np.log10(np.sum(np.square(gt))) < -80 and 10 * np.log10(np.sum(np.square(est))) < -80:
  return 10 * np.log10(np.sum(np.square(gt)) / np.sum(np.square(gt - est)))


bass - SNR: -inf
Processing drums...
drums - SNR: 3.137
Processing vdbo_others...
vdbo_others - SNR: 4.323
Processing vocals...
vocals - SNR: 7.371
Processing 1afe1b3b-3e2e-48d3-b859-f50e222cbaf4...
Processing bass...
bass - SNR: 15.237
Processing drums...
drums - SNR: 6.423
Processing vdbo_others...
vdbo_others - SNR: 7.214
Processing vocals...
vocals - SNR: 7.470
Processing 78ef22ce-472f-4f82-8656-16df73b9465f...
Processing bass...
bass - SNR: 14.379
Processing drums...
drums - SNR: 8.823
Processing vdbo_others...
vdbo_others - SNR: 7.623
Processing vocals...
vocals - SNR: 10.831
Processing 491c1ff5-1e7b-4046-8029-a82d4a8aefb4...
Processing bass...
bass - SNR: 11.747
Processing drums...
drums - SNR: 8.677
Processing vdbo_others...
vdbo_others - SNR: 10.092
Processing vocals...
vocals - SNR: 14.714
Processing f0c565c5-fc73-4da1-b979-0fac0167f671...
Processing bass...
bass - SNR: 15.670
Processing drums...
drums - SNR: 12.890
Processing vdbo_others...
vdbo_others - SNR: 9.983
Processin

In [37]:
df["snr"] = df["snr"].replace(-np.inf, np.nan)

In [38]:
df.to_csv(os.path.join(INFERENCE_ROOT, "bandit4.csv"), index=False)