In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import json
from pathlib import Path

import IPython.display as ipd
import torch

import promonet

In [None]:
# Conditions to consider
conditions = [
    'promonet',
    # 'ablate-augment',
    # 'ablate-multiloud',
    # 'ablate-sppg',
    # 'ablate-variable-pitch',
    # 'ablate-viterbi',
    # 'ablate-all',
    # 'mels',
    # 'mels-ours',
    # 'world'
]
edits = [
    'reconstructed-100',
    'scaled-050',
    'scaled-200',
    'shifted-071',
    'shifted-141',
    'stretched-071',
    'stretched-141'
]
metrics = [
    'pitch',
    'periodicity',
    'loudness-loud',
    'ppg',
    # 'wer',
    # 'speaker_similarity',
    # 'formant-average',
]

## Parse objective results on a set of conditions

In [None]:
def parse_results(conditions, edits, metric, dataset):
    results = {condition: {} for condition in conditions}
    for condition in conditions:
        with open(f'/repos/promonet/results/{condition}/{dataset}/results.json') as file:
            for edit, metrics in json.load(file).items():
                if edit not in edits:
                    continue
                # print(edit, json.dumps(metrics, indent=4, sort_keys=True))
                try:
                    results[condition][edit] = metrics[metric]
                except KeyError:
                    pass
    for condition in conditions:
        values = list(results[condition].values())
        results[condition]['average'] = sum(values) / len(values)
    print(
        json.dumps(
            {condition: results[condition]['average'] for condition in conditions},
            indent=4,
            sort_keys=True))


In [None]:
for metric in metrics:
    print(metric)
    parse_results(conditions, edits, metric, 'vctk')

## File-level inspection of objective results

In [None]:
# Load fine-grained objective results
condition = 'ablate-all'
results = {}
for file in Path(f'/repos/promonet/results/{condition}/vctk').glob('0*.json'):
    with open(file) as file:
        results |= json.load(file)['objective']['raw']

In [None]:
# Sort files by a specific metric
metric = 'ppg'
metric_results = {}
for key, edit_metrics in results.items():
    edit = list(edit_metrics.keys())[0]
    if 'shifted-' not in key and 'scaled-' not in key and 'stretched-' not in key and 'original-' not in key:
        continue
    metric_results[key] = edit_metrics[edit][metric]
metric_results = dict(sorted(metric_results.items(), key=lambda item: item[1], reverse=True))

In [None]:
import numpy as np
import scipy.stats

def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    return m, se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)

In [None]:
mean_confidence_interval(list(metric_results.values()))

In [None]:
subjective_directory = Path('/repos/promonet/eval/subjective')
objective_directory = Path('/repos/promonet/eval/objective')
for i, stem in enumerate(metric_results):

    if i > 10:
        break
    print(stem, metric_results[stem])
    predicted = promonet.load.audio(subjective_directory / condition / f'{stem}.wav')
    ipd.display(ipd.Audio(predicted, rate=promonet.SAMPLE_RATE))
    parts = stem.split('-')
    file = subjective_directory / 'original' / f'{"-".join(parts[:3])}-original-100.wav'
    print(file)
    ipd.display(ipd.Audio(file))
    print(promonet.load.text(objective_directory / condition / f'{stem}.txt'))
    print(promonet.load.text(objective_directory / 'original' / f'{stem}.txt'))
    frames = promonet.convert.samples_to_frames(predicted.shape[-1])
    figure = promonet.plot.from_features(
        predicted,
        torch.load(objective_directory / condition / f'{stem}-viterbi-pitch.pt'),
        torch.load(objective_directory / condition / f'{stem}-viterbi-periodicity.pt'),
        promonet.preprocess.loudness.band_average(torch.load(objective_directory / condition / f'{stem}-loudness.pt'), 1),
        promonet.load.ppg(objective_directory / condition / f'{stem}-ppg.pt', frames),
        torch.load(objective_directory / 'original' / f'{stem}-viterbi-pitch.pt'),
        torch.load(objective_directory / 'original' / f'{stem}-viterbi-periodicity.pt'),
        promonet.preprocess.loudness.band_average(torch.load(objective_directory / 'original' / f'{stem}-loudness.pt'), 1),
        promonet.load.ppg(objective_directory / 'original' / f'{stem}-ppg.pt', frames))
    figure.show()