In [None]:
%matplotlib inline

import os
import likelihoodfree.io as io
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pdb
import seaborn as sns
import yaml

from IPython.display import display, Markdown, Latex
from likelihoodfree.io import first, last, nth
from likelihoodfree.viz import plot_pdf_marginals
from mackelab.plot import saverep
from tqdm import tqdm

def yaml_parse(filename):
    with open(filename, 'r') as stream:
        try:
            return list(yaml.load_all(stream))
        except yaml.YAMLError as exc:
            return exc
        

y = yaml_parse(experiment)
header = y[0].copy()
model = header['model']
prefix = header['prefix']

del y[0]['comment']
display(y)
#display(Markdown(header['body']))
#display(body)

netsdir = '/home/jm/repos/likelihoodfree-models/results/' + model +'/nets/'

In [None]:
def get_runs(prefix):
    def list_of_runs(input_list):
        runs = []
        for k in input_list:
            run = k.split('_')[1]
            if run not in runs:
                runs.append(run)
        return runs 

    runs = os.listdir(netsdir)

    runs = [x for x in runs if prefix in x]
    loruns = list_of_runs(runs)  # to iterate over

    print('# of runs : {}'.format(len(loruns)))
    
    return loruns

todo:
- one entry per run in df
- accumulate data info field
- function to show loss

In [2]:
def get_df(prefix, loruns):
    path = netsdir
    
    df = pd.DataFrame()
    
    for r in tqdm(range(len(loruns))):
        name = loruns[r]
        wildcard = prefix + '_' + name + '*'
        
        info_dicts = io.load_wildcard(path, wildcard + '_info')
        if len(info_dicts) == 0:
            print('{} : empty info_dicts'.format(name))
            continue    
        
        info_first = first(info_dicts)
        info_last = last(info_dicts)
        
        for j in range(len(info_dicts)):
            data = [[name]]
            labels = ['name']

            data.append([info_first['seed']])
            labels.append('seed_start')
            
            data.append([int(info_last['load_trn'] is not None and info_last['load_trn'] != '')])
            labels.append('accumulate_data')
            
            data.append([int('Gaussian' in str(type(info_last['prior_true'])))])
            labels.append('gauss_prior')
            
            info = nth(info_dicts, j)
            
            data.append([int(info['postfix'][-3:])])
            labels.append('round')

            for k, v in info.items():
                labels.append(k)
                data.append([v])

            sim_dicts = io.load_wildcard(path, wildcard + '_sim')
            if len(sim_dicts) == 0:
                print('{} : empty sim_dicts'.format(name))
                continue
            sim = nth(sim_dicts, j)
            labels.append('sim')
            data.append([sim])

            posterior_dicts = io.load_wildcard(path, wildcard + '_posterior')
            if len(posterior_dicts) == 0:
                print('{} : empty posterior_dicts'.format(name))
                continue
            approx_posterior = nth(posterior_dicts, j)
            labels.append('posterior')
            data.append([approx_posterior])

            #dist_dicts = io.load_wildcard(path, wildcard + '_dist')
            #loss_dicts = io.load_wildcard(path, wildcard + '_loss')
            #net_dicts = io.load_wildcard(path, wildcard + '_net')

            if model == 'gauss':

                posterior_mu = approx_posterior.xs[0].m[0]
                posterior_cov = approx_posterior.xs[0].S[0,0]
                true_mu = sim.posterior.m[0]
                true_cov = sim.posterior.S[0,0]
                std1 = np.sqrt(true_cov)
                std2 = np.sqrt(posterior_cov)
                mu1 = true_mu
                mu2 = posterior_mu
                kltp = np.log(std2/std1) + (std1**2 + (mu1-mu2)**2)/(2*std2**2) - 0.5
                abs_err_mu = np.abs(posterior_mu - true_mu)
                abs_err_cov = np.abs(posterior_cov - true_cov)

                data += [[posterior_mu], 
                         [posterior_cov], 
                         [true_mu], 
                         [true_cov], 
                         [std1], 
                         [std2], 
                         [mu1], 
                         [mu2], 
                         [kltp], 
                         [abs_err_mu], 
                         [abs_err_cov]]
                labels += ['posterior_mu', 
                           'posterior_cov', 
                           'true_mu', 
                           'true_cov', 
                           'std1', 
                           'std2', 
                           'mu1', 
                           'mu2', 
                           'kltp', 
                           'abs_err_mu', 
                           'abs_err_cov']
            elif model == 'hh':
                labels.append('duration')
                data.append([sim.duration])
                pass
            elif model == 'mog':
                try:
                    posterior_mua = approx_posterior.xs[0].m[0]  # 1
                    posterior_cova = approx_posterior.xs[0].S[0,0]  # 1
                    posterior_mub = approx_posterior.xs[1].m[0]  # 1
                    posterior_covb = approx_posterior.xs[1].S[0,0]  # 1
                except:
                    print('nf')
                    continue

                if posterior_covb > posterior_cova:
                    posterior_mu = posterior_mub
                    posterior_cov = posterior_covb
                    posterior_mu2 = posterior_mua
                    posterior_cov2 = posterior_cova
                else:
                    posterior_mu = posterior_mua
                    posterior_cov = posterior_cova
                    posterior_mu2 = posterior_mub
                    posterior_cov2 = posterior_covb

                true_mu = sim.posterior.xs[0].m[0]  # wide
                true_cov = sim.posterior.xs[0].S[0,0]  # wide
                true_mu2 = sim.posterior.xs[1].m[0]  # wide
                true_cov2 = sim.posterior.xs[1].S[0,0]  # wide

            data = pd.DataFrame(data).T
            data.columns = labels

            df = df.append(data)    

    df = df.reset_index()
    df = df.drop('index', 1)

    print('# df : {}'.format(len(df)))
    print('head of df')
    display(df.head())

    print('first entry')
    display(df.loc[0])

    subset_err = df.copy().query('errors != ""')
    subset_err = subset_err.reset_index()
    subset_err = subset_err.drop('index', 1)
    print('runs with errors (subset_err) : {}'.format(len(subset_err)))
    
    return df, subset_err

In [None]:
def get_subset(dataframe, subset_str):
    subset = dataframe.query(subset_str).copy()
    subset = subset.reset_index()
    subset = subset.drop('index', 1)
    print('length of subset : {}'.format(len(subset)))
    return subset

In [None]:
"""
basenames = []
filepaths = []

for filename in os.listdir(netsdir):
    if filename.endswith(".pkl") and "iter_1_info" in filename and prefix in filename:
        basenames.append(os.path.splitext(filename)[0][:-len("iter_1_info")-1])
        filepaths.append(os.path.join(netsdir, filename))
        
print('Found {} matching runs'.format(len(basenames)))
"""

In [None]:
"""
def plot(**kwargs):
    ax = plt.gca()
    data = kwargs.pop("data")
    
    plt.plot([0.5, 5.5], [data['trueparam'].values[0], data['trueparam'].values[0]], color='g')

    plt.errorbar(data["iteration"], data["mean"], data["variance"], marker="o", linestyle="", color='b')
    plt.errorbar(data["iteration"]+0.25, data["true_mean"], data["true_variance"], marker="o", linestyle="", color='r')

g = sns.FacetGrid(df, row="parameter", col="N", sharey="row", margin_titles=True)  #duration
g.map_dataframe(plot) 
g.set_axis_labels("iteration", "value")
plt.legend(['x0','epsilonfree', 'ground truth'])
"""

In [None]:
"""
subset = dfc.query('dim == 1 & iw_loss ==  True & svi == False').copy()  #  & prior_true == False & keep == True').copy()
subset = subset.reset_index()
subset = subset.drop('index', 1)

sns.set(font_scale=2)
g = sns.factorplot(x="prior_alpha", y="posterior_cov2", hue="norm", data=subset,
                   capsize=.1, palette="YlGnBu_d", size=10, aspect=1., markers='.',
                   linestyles='', legend=True, row="keep", col="ess", sharey=True)
ax = plt.gca()
ax.set_ylim([0, 0.02])
#ax.set_ylim([0, 0.05])
#ax.semilogy()
#saverep('loss_calib')
"""