In [1]:
import numpy as np
np.random.seed(5941)

In [2]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

%set_env TF_FORCE_GPU_ALLOW_GROWTH=true


import tensorflow as tf
import librosa
import librosa.display
import matplotlib.pyplot as plt

import os

from grog.audioreader import AudioReader
from grog.models.model import Model
from grog.models.infer import Inference
from grog.config import Config
from grog.evaluation.preparation import generate_mixtures
from grog.evaluation.evaluate import eval_generated, eval_estimated
from grog.evaluation.plot import plot_metrics, mean_all_metrics, N_METRICS
from grog.util import pad_or_truncate
from grog.fft import stft_default as stft, to_log_spec
import pandas as pd
import IPython.display as ipd
import hickle

env: TF_FORCE_GPU_ALLOW_GROWTH=true


# Generate mixtures to evaluate

In [3]:
data_dir = "../../workspace/data/WSJ0/train"
sampling_rate = 8000

#mixes, reference, labels = generate_mixtures(data_dir, sampling_rate, 1)

y1, y2 = reference[0]
display(ipd.Audio(y1, rate=sampling_rate))
display(ipd.Audio(y2, rate=sampling_rate))
display(ipd.Audio(mixes[0], rate=sampling_rate))

NameError: name 'reference' is not defined

# Generate mixtures

In [7]:
def generate_eval_data(n, sample_dir, out):
    if os.path.isfile(out):
        return hickle.load(open(out, "rb"))

    sampling_rate = 8000
    print(out)
    generated_mixtures = generate_mixtures(sample_dir, sampling_rate, n)
    #hickle.dump(generated_mixtures, open(out, "wb"), compression='gzip')
    display(ipd.Audio(generated_mixtures[0][0], rate=sampling_rate))
    return generated_mixtures

n = 100

wsj0_train = generate_eval_data(
    n, 
    "../../workspace/data/WSJ0/train", 
    "../../workspace/eval-data/WSJ0/train.pkl"
)

wsj0_evaluation = generate_eval_data(
    n, 
    "../../workspace/data/WSJ0/evaluation", 
    "../../workspace/eval-data/WSJ0/evaluation.pkl"
)

timit_train = generate_eval_data(
    n, 
    "../../workspace/data/TIMIT/train", 
    "../../workspace/eval-data/TIMIT/train.pkl"
)

timit_evaluation = generate_eval_data(
    n, 
    "../../workspace/data/TIMIT/evaluation", 
    "../../workspace/eval-data/TIMIT/evaluation.pkl"
)

tedlium_train = generate_eval_data(
    n, 
    "../../workspace/data/TEDLIUM/train", 
    "../../workspace/eval-data/TEDLIUM/train.pkl"
)

tedlium_evaluation = generate_eval_data(
    n, 
    "../../workspace/data/TEDLIUM/evaluation", 
    "../../workspace/eval-data/TEDLIUM/evaluation.pkl"
)

In [5]:
display(ipd.Audio(wsj0_train[0][0], rate=sampling_rate))

## Evaluate mixtures

In [9]:
evaluations = [
    #["0", "../../workspace/exp-models/0"],
    #["1a", "../../workspace/exp-models/1a"],
    #["1c", "../../workspace/exp-models/1c"],
    #["1d", "../../workspace/exp-models/1d"],
    #["1e", "../../workspace/exp-models/1e"],
    #["1f", "../../workspace/exp-models/1f"],
    #["1g", "../../workspace/exp-models/1g"],
    #["1h", "../../workspace/exp-models/1h"],
    #["1i", "../../workspace/exp-models/1i"],
    
    #["2a", "../../workspace/exp-models/2a"],
    #["2b", "../../workspace/exp-models-gpu3/2b"],
    #["2d", "../../workspace/exp-models/2d"],
    
    #["3a", "../../workspace/exp-models/3a"],
    #["3b", "../../workspace/exp-models-gpu3/3b"],
    #["3c", "../../workspace/exp-models-gpu3/3c"],
    #["3d", "../../workspace/exp-models-gpu3/3d"],
    
    #["4a", "../../workspace/exp-models-gpu3/4a"],
    #["4b", "../../workspace/exp-models-gpu3/4b"],
    #["4c", "../../workspace/exp-models-gpu3/4c"],
    
    #["5a", "../../workspace/exp-models-gpu3/5a"],
    #["5b", "../../workspace/exp-models-gpu3/5b"],
    #["5c", "../../workspace/exp-models-gpu3/5c"],
    ["gru", "../../workspace/exp-models-gpu3/gru"],
]


eval_datasets = [("wsj0_train", wsj0_train), ("wsj0_evaluation", wsj0_evaluation), ("timit_train", timit_train), ("timit_evaluation", timit_evaluation), ("tedlium_train", tedlium_train), ("tedlium_evaluation", tedlium_evaluation)]
#eval_datasets = [("wsj0_train", wsj0_train)]

eval_results = []

for (name, directory) in evaluations:
    print(name)
    config_path = "../../sync/experiments/" + name + "/config.json"
    model_dir = directory + "/seeds"
    for (set_name, generated_mixtures) in eval_datasets:
        config = Config()
        config.load_json(config_path)
        eval_results.append((config_path, config, name, set_name, eval_generated(model_dir, config, generated_mixtures))) # FIXME: take care of datasets

gru


In [12]:
#eval_results = hickle.load(open("../../workspace/0-2a.pkl", "rb"))
hickle.dump(eval_results, open("../../workspace/gru.pkl", "wb"), compression='gzip')

In [10]:
filtered = list(filter(lambda result: result[2] == 'gru', eval_results))

In [13]:
columns=[
    'name', 'set_name', 
    'sdr', 'sir', 'isr', 'sar',
    'baseline_sdr', 'baseline_sir', 'baseline_isr', 'baseline_sar'
]
final_results = pd.DataFrame(columns=columns)

for config_path, config, name, set_name, eval_result in filtered:
    metrics, mixes, reference, labels, sources = eval_result
    
    #print("%s - %s" % (name, set_name))
    #plot_metrics(metrics)
    
    y = []
    y_baseline = []

    for against_sources, against_rev_sources, baseline in metrics:
        metric_values = mean_all_metrics(against_sources)
        assert len(metric_values) - 1 == N_METRICS

        y.append(metric_values[1:])

        if baseline:
            baseline_metric_values = mean_all_metrics(baseline)
            y_baseline.append(baseline_metric_values[1:])
            
    y_baseline = np.array(y_baseline).transpose(1, 0, 2).reshape((N_METRICS, -1))
    y = np.array(y).transpose(1, 0, 2).reshape((N_METRICS, -1))
    mean_mean_y = np.mean(y, axis=1)
    mean_mean_baseline = np.mean(y_baseline, axis=1)
    
    result_frame = pd.DataFrame(data=[[name, set_name, 
                                 mean_mean_y[0], mean_mean_y[1], mean_mean_y[2], mean_mean_y[3], 
                                 mean_mean_baseline[0], mean_mean_baseline[1], mean_mean_baseline[2], mean_mean_baseline[3]
                                ]], columns=columns)
    final_results = pd.concat([final_results, result_frame],ignore_index=True)


print(final_results)
final_results.to_csv("../../workspace/gru.csv")

  name            set_name       sdr       sir        isr       sar  \
0  gru          wsj0_train  2.696229  7.357329  10.263668  4.995753   
1  gru     wsj0_evaluation  1.121753  4.623556   7.662422  3.556583   
2  gru         timit_train  2.844743  6.739312   9.247375  5.442338   
3  gru    timit_evaluation  2.376382  6.406199   8.661369  5.540945   
4  gru       tedlium_train  0.590798  2.416476   8.214010  4.578219   
5  gru  tedlium_evaluation  0.725986  2.103364   8.146034  4.519753   

   baseline_sdr  baseline_sir  baseline_isr  baseline_sar  
0  3.411521e-10      0.244840     18.581430    151.885979  
1  3.686582e-10      0.484758     17.571010    151.778972  
2  1.076549e-09      0.325240     16.339949    152.036881  
3  5.887976e-10      0.445164     16.294310    152.069667  
4  8.881784e-18      0.106448     19.253322    242.771978  
5  1.776357e-17      0.049251     19.215987    244.839288  


## For debugging

In [35]:
sampling_rate = 8000

def debug(result_index, idx):
    metrics, mixes, reference, labels, sources = eval_results[result_index][-1]

    mix = mixes[idx]
    source1, source2 = sources[idx]
    ref1, ref2 = reference[idx]
    display(ipd.Audio(mix, rate=sampling_rate))
    display(ipd.Audio(source1, rate=sampling_rate))
    display(ipd.Audio(source2, rate=sampling_rate))
    #display(ipd.Audio(ref1, rate=sampling_rate))
    #display(ipd.Audio(ref2, rate=sampling_rate))
    librosa.output.write_wav('../../workspace/eval/b-wsj0-train-mix.wav', mix, sampling_rate)
    librosa.output.write_wav('../../workspace/eval/b-wsj0-train-source1.wav', source1, sampling_rate)
    librosa.output.write_wav('../../workspace/eval/b-wsj0-train-source2.wav', source2, sampling_rate)
    librosa.output.write_wav('../../workspace/eval/b-wsj0-train-ref1.wav', ref1, sampling_rate)
    librosa.output.write_wav('../../workspace/eval/b-wsj0-train-ref2.wav', ref2, sampling_rate)

    print(mix.shape, ref1.shape, ref2.shape, source1.shape, source2.shape)
    
debug(0, 0)

(42894,) (42894,) (42894,) (42880,) (42880,)


## Comparing ways of evaluation

In [None]:
window_size = 256
hop_length = 64
effective_ft_points = 129
min_amp = 10000
amp_fac = 10000
global_mean = 44
global_std = 15.5

ref1_spec, speech1_phase = to_normalized_spec(ref1, window_size, hop_length, effective_ft_points, min_amp, amp_fac)
ref2_spec, speech2_phase = to_normalized_spec(ref2, window_size, hop_length, effective_ft_points, min_amp, amp_fac)
mix_spec, mix_phase = to_normalized_spec(mix, window_size, hop_length, effective_ft_points, min_amp, amp_fac)

In [None]:
Y = np.array([ref1_spec > ref2_spec, ref1_spec < ref2_spec]).astype('bool')
masked_ref1_spec = mix_spec * Y[0]
masked_ref2_spec = mix_spec * Y[1]

masked_ref1, masked_ref2 = istft(len(masked_ref1_spec), window_size, hop_length, amp_fac, masked_ref1_spec, masked_ref2_spec, mix_phase, mix_phase)

In [None]:
display(ipd.Audio(ref1, rate=sampling_rate))
display(ipd.Audio(ref2, rate=sampling_rate))
display(ipd.Audio(masked_ref1, rate=sampling_rate))
display(ipd.Audio(masked_ref2, rate=sampling_rate))

In [None]:
eval_window_size = int(1*sampling_rate)
eval_hop_Length = int(1*sampling_rate)

print('refs vs masked refs')
res = eval_estimated(
    [ref1, ref2], ["ref1", "ref2"], 
    [masked_ref1, masked_ref2], ["masked_ref1", "masked_ref2"], 
    1, eval_window_size, eval_hop_Length, 'v4', compute_permutation=True
)
plot_metrics([(res, None, None)])

print('masked refs vs estimated')
res = eval_estimated(
    [masked_ref1, masked_ref2], ["masked_ref1", "masked_ref2"], 
    [source1, source2], ["source1", "source2"], 
    1, eval_window_size, eval_hop_Length, 'v4'
)
plot_metrics([(res, None, None)])

print('refs vs estimated')
res = eval_estimated(
    [ref1, ref2], ["ref1", "ref2"], 
    [source1, source2], ["source1", "source2"], 
    1, eval_window_size, eval_hop_Length, 'v4'
)
plot_metrics([(res, None, None)])


print('masked refs vs mix')
res = eval_estimated(
    [masked_ref1, masked_ref2], ["masked_ref1", "masked_ref2"], 
    [mix, mix], ["mix", "mix"], 
    1, eval_window_size, eval_hop_Length, 'v4'
)
plot_metrics([(res, None, None)])

print('refs vs mix')
res = eval_estimated(
    [ref1, ref2], ["ref1", "ref2"], 
    [mix, mix], ["mix", "mix"], 
    1, eval_window_size, eval_hop_Length, 'v4'
)
plot_metrics([(res, None, None)])

## Sanity check for evaluation using tookit directly

### Write results for further analysis

In [None]:
librosa.output.write_wav('../../workspace/eval/2019-05-25/test-evaluation.wav', mix, sampling_rate)
librosa.output.write_wav('../../workspace/eval/2019-05-25/test-evaluation/estimated/source1.wav', source1, sampling_rate)
librosa.output.write_wav('../../workspace/eval/2019-05-25/test-evaluation/estimated/source2.wav', source2, sampling_rate)
librosa.output.write_wav('../../workspace/eval/2019-05-25/test-evaluation/reference/ref1.wav', ref1, sampling_rate)
librosa.output.write_wav('../../workspace/eval/2019-05-25/test-evaluation/reference/ref2.wav', ref2, sampling_rate)

In [None]:
from museval.metrics import bss_eval
from museval import eval_dir

In [None]:
eval_dir("../../workspace/eval/2019-05-25/test-evaluation/reference", "../../workspace/eval/2019-05-25/test-evaluation/estimated", mode='v4', win=1, hop=1)

In [None]:
def eval():
    path = "../../workspace/eval/2019-05-25/test-evaluation"
    ref1, _ = librosa.load(os.path.join(path, "reference", "ref1.wav"), sr=sampling_rate)
    ref2, _ = librosa.load(os.path.join(path, "reference", "ref2.wav"), sr=sampling_rate)
    source1, _ = librosa.load(os.path.join(path, "estimated", "source1.wav"), sr=sampling_rate)
    source2, _ = librosa.load(os.path.join(path, "estimated", "source2.wav"), sr=sampling_rate)
    
    ref, est = pad_or_truncate([ref1, ref2], [source1, source2])
    return bss_eval(
        ref,
        est,
        compute_permutation=True,
        window=int(1.0*sampling_rate),
        hop=int(1.0*sampling_rate),
        framewise_filters=False,
        bsseval_sources_version=False
    )
print('Mean SDR: %s' % np.mean(eval()[0], axis=1))
print('Mean ISR: %s' % np.mean(eval()[1], axis=1))

In [None]:
eval_dir("/fast/ammannma/speech-separation/mir_eval/tests/data/separation/ref01/", "/fast/ammannma/speech-separation/mir_eval/tests/data/separation/est01/", mode='v4', win=1, hop=1)

In [None]:
import mir_eval

def eval():
    path = "/fast/ammannma/speech-separation/mir_eval/tests/data/separation"
    ref1, _ = librosa.load(os.path.join(path, "ref01", "0.wav"), sr=sampling_rate)
    ref2, _ = librosa.load(os.path.join(path, "ref01", "1.wav"), sr=sampling_rate)
    source1, _ = librosa.load(os.path.join(path, "est01", "0.wav"), sr=sampling_rate)
    source2, _ = librosa.load(os.path.join(path, "est01", "1.wav"), sr=sampling_rate)

    return mir_eval.separation.bss_eval_sources(np.array([ref1, ref2]), np.array([source1, source2]))
print(eval())