In [None]:
import glob

import numpy as np
import matplotlib.pyplot as plt

from benchmark_utils import read_json

%matplotlib inline

In [None]:
procedures_to_name = {
    'single_site_mh': 'SSMH',
    'lbfgs_with_gibbs':'LBFGS & Gibbs',
    'hamiltonian_monte_carlo_with_gibbs':'HMC & Gibbs',
    'loop_explicitly_over_random_choices':'Ordered MH & Gibbs',
    'resimulation_mh' : 'Resimulation MH',
    'single_site_gibbs': 'Single-site Gibbs',
    'block_gibbs' : 'Block Gibbs'
}

In [None]:
colors = {
    'SSMH': 'red',
    'Resimulation MH' :'blue',
    'LBFGS & MH': 'black',
    'Block Gibbs': 'purple',
    'Single-site Gibbs': 'magenta',
    'LBFGS & Gibbs': 'navy',
    'HMC & Gibbs': 'cyan',
    'Ordered MH & Gibbs': 'magenta',
    'SMC with gradient updates': 'darkgreen',
    'SMC with HMC updates': 'lightgreen',
    'SMC': 'green',
    'MH with parallel chains': 'darkred', 
    'MH with thinning and burn in': 'skyblue',
    'Random sampling for search over inputs': 'orange',
    'LBFS for search over inputs': 'royalblue',
}

In [None]:
def get_result_data(path):
    result_files = glob.glob(path + '/results/*.json')
    return [read_json(filename) for filename in result_files]

In [None]:
path = 'linear-regression-with-outliers/'

In [None]:
def get_all_results(result_data, inf_prog):
    def extract_field(field):
        return [
            result[field]
            for result in result_data  if result['inf-prog-name'] == inf_prog
        ]
    return extract_field('timing'), np.asarray(extract_field('measurement'))

In [None]:
result_data = get_result_data(path)
time_raw, accuracy_raw = get_all_results(result_data, 'single_site_mh')
time = {t:[] for t in np.unique(time_raw)}
for i, timing in enumerate(time_raw):
    time[timing].append(accuracy_raw[i])
median_values = []
std = []
for timing in time.keys():
    median_values.append(np.median(time[timing]))
    std.append(np.median(time[timing]))
    

In [None]:
time

In [None]:
def plot_acc_time_curve(result_data, inf_progs, metric='MSE', title_suffix='', loc='upper right'):
    fig, ax = plt.subplots()
    for inf_prog in inf_progs:
        time_raw, accuracy_raw = get_all_results(result_data, inf_prog)
        time = {t:[] for t in np.unique(time_raw)}
        for i, timing in enumerate(time_raw):
            time[timing].append(accuracy_raw[i])
        median_values = []
        std = []
        for timing in sorted(time.keys()):
            median_values.append(np.median(time[timing]))
            std.append(np.std(time[timing]))
        label = procedures_to_name[inf_prog]
        ax.errorbar(sorted(time.keys()), median_values, yerr=std, color=colors[label], linestyle='--', label=label)
    ax.set_xlabel('Time (seconds)')
    ax.set_ylabel(metric)
    ax.set_title('Accuracy vs time' + title_suffix)
    #ax.set_ylim(ylims)
    fig.set_size_inches(4,3)
    handles, labels = ax.get_legend_handles_labels()
    # sort both labels and handles by labels
    labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0]))
    ax.legend(handles, labels, loc=loc)
    return fig, ax

# Lin reg with outlierse

In [None]:
fig, ax = plot_acc_time_curve(
    get_result_data(path),
    [
        'single_site_mh',
        'lbfgs_with_gibbs',
        'loop_explicitly_over_random_choices',
        'hamiltonian_monte_carlo_with_gibbs'

    ]
);
fig.set_size_inches(8,4)
#ax.set_xlim([0, 30])
#ax.set_ylim(0, 1000)

# Noisy Or

### Data ordered according to dep structure

### Data not odereded

In [None]:
path = 'noisy-or/'

In [None]:
fig, ax = plot_acc_time_curve(
    get_result_data(path),
    [  
        'resimulation_mh',
        'single_site_mh',
        'single_site_gibbs',
        #'particle_gibbs',
        'block_gibbs',
    ],
    metric='KL-divergence'
);
fig.set_size_inches(8,4)
#ax.set_xlim([0, 30])
#ax.set_ylim(0, 1000)

# GP structure learning

# Stochastic Volatility model

# Logistic regression

# Bayesian logistic regression