In [None]:
import glob

import numpy as np
import matplotlib.pyplot as plt

from benchmark_utils import read_json

%matplotlib inline

In [None]:
procedures_to_name = {
    'single_site_mh': 'SSMH',
    'lbfgs_with_gibbs':'LBFGS & Gibbs',
    'hamiltonian_monte_carlo_with_gibbs':'HMC & Gibbs',
    'loop_explicitly_over_random_choices':'Ordered MH & Gibbs',
}

In [None]:
colors = {
    'SSMH': 'red',
    'Resimulation MH' :'blue',
    'LBFGS & MH': 'black',
    'Block Gibbs': 'purple',
    'Single-site Gibbs': 'magenta',
    'LBFGS & Gibbs': 'navy',
    'HMC & Gibbs': 'cyan',
    'Ordered MH & Gibbs': 'magenta',
    'SMC with gradient updates': 'darkgreen',
    'SMC with HMC updates': 'lightgreen',
    'SMC': 'green',
    'MH with parallel chains': 'darkred', 
    'MH with thinning and burn in': 'skyblue',
    'Random sampling for search over inputs': 'orange',
    'LBFS for search over inputs': 'royalblue',
}

In [None]:
path = 'linear-regression-with-outliers/'
result_files = glob.glob(path + '/results/*.json')
result_data = [read_json(filename) for filename in result_files]

In [None]:
def get_all_results(result_data, inf_prog):
    def extract_field(field):
        return [
            result[field]
            for result in result_data  if result['inf-prog-name'] == inf_prog
        ]
    return extract_field('timing'), extract_field('measurement')

In [None]:
def plot_acc_time_cloud(inf_progs, metric='MSE', title_suffix='', loc='upper right'):
    fig, ax = plt.subplots()
    for inf_prog in inf_progs:
        time, accuracy = get_all_results(result_data, inf_prog)
        label = procedures_to_name[inf_prog]
        ax.scatter(time, accuracy, color=colors[label], label=label, alpha = 0.5)
    ax.legend(loc=loc)
    ax.set_xlabel('Time (seconds)')
    ax.set_ylabel(metric)
    ax.set_title('Accuracy vs time' + title_suffix)
    #ax.set_ylim(ylims)
    fig.set_size_inches(4,3)
    handles, labels = ax.get_legend_handles_labels()
    # sort both labels and handles by labels
    labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0]))
    ax.legend(handles, labels)
    return fig, ax

In [None]:
def plot_acc_time_curve(labels, type_f, metric='MSE', title_suffix=''):
    fig, ax = plt.subplots()
    for i, l in enumerate(l_vals):
        ax.plot(times, f(times, l), color=colors[labels[i]], linestyle='--')
        ax.errorbar(times, f(times, l), yerr=fixed_error, label=labels[i], color=colors[labels[i]], linestyle='--')
    ax.legend(loc=loc)
    ax.set_xlabel('Time (seconds)')
    ax.set_ylabel(metric)
    ax.set_title('Accuracy vs time' + title_suffix)
    ax.set_ylim(ylims)
    fig.set_size_inches(4,3)
    handles, labels = ax.get_legend_handles_labels()
    # sort both labels and handles by labels
    labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0]))
    ax.legend(handles, labels)
    return fig, ax

# Lin reg with outlierse

In [None]:
fig, ax = plot_acc_time_cloud(
    [
        'single_site_mh',
        'lbfgs_with_gibbs',
        'loop_explicitly_over_random_choices',
        'hamiltonian_monte_carlo_with_gibbs'

    ]
);
fig.set_size_inches(8,4)
#ax.set_xlim([0,100])
#ax.set_xscale("log")
ax.set_xlim([0, 1])

# Noisy Or

### Data ordered according to dep structure

### Data not odereded

# GP structure learning

# Stochastic Volatility model

# Logistic regression

# Bayesian logistic regression