Load results from the `blade_runs` directory and save in a tidy format for R

In [1]:
from os.path import join
import pandas as pd
import numpy as np

In [2]:
from load_and_tidy_posteriors_lib import \
    VALID_METHODS, GetMetadataDataframe, GetMethodDataframe, \
    GetTraceDataframe, GetUnconstraintedParamsDataframe, \
    GetMCMCDiagnosticsDataframe

# Set to false for a dry run; doing so will not overwrite the csv
save_output = True

In [3]:
base_folder = '/home/rgiordan/Documents/git_repos/DADVI/dadvi-experiments'
input_folder = join(base_folder, 'comparison/blade_runs/')
output_folder = input_folder

folder_method_list = (
    (join(input_folder, "nuts_results/"), 'NUTS'),
    (join(input_folder, "dadvi_results/"), 'DADVI'),
    (join(input_folder, "raabbvi_results/"), 'RAABBVI'),
    (join(input_folder, "sadvi_results/"), 'SADVI'),
    (join(input_folder, "sfullrank_advi_results/"), 'SADVI_FR'),
    (join(input_folder, "lrvb_Direct_results/"), 'LRVB'),
    (join(input_folder, 'lrvb_doubling_results'), 'LRVB_Doubling'),
    (join(input_folder, 'lrvb_cg_results'), 'LRVB_CG')
)


In [4]:
posterior_dfs = []
for folder, method in folder_method_list:
    print(f'Loading {method}')
    posterior_dfs.append(GetMethodDataframe(folder, method))
posterior_df = pd.concat(posterior_dfs)

Loading NUTS
Loading DADVI
Loading RAABBVI
Loading SADVI
Loading SADVI_FR
Loading LRVB
Loading LRVB_Doubling
Loading LRVB_CG


In [5]:
metadata_dfs = []
for folder, method in folder_method_list:
    print(f'Loading {method}')
    metadata_dfs.append(GetMetadataDataframe(folder, method))
    
metadata_df = pd.concat(metadata_dfs)


Loading NUTS
Loading DADVI




Loading RAABBVI
Loading SADVI
Loading SADVI_FR
/home/rgiordan/Documents/git_repos/DADVI/dadvi-experiments/comparison/blade_runs/sfullrank_advi_results/info/occ_det.pkl not found.
Loading LRVB
Loading LRVB_Doubling
/home/rgiordan/Documents/git_repos/DADVI/dadvi-experiments/comparison/blade_runs/lrvb_doubling_results/lrvb_info/tennis.pkl not found.
Loading LRVB_CG


In [6]:
trace_dfs = []
for folder, method in folder_method_list:
    print(f'Loading {method}')
    trace_dfs.append(GetTraceDataframe(folder, method))
    
trace_df = pd.concat(trace_dfs)


Loading NUTS
Loading DADVI
Loading RAABBVI
Loading SADVI
Loading SADVI_FR
/home/rgiordan/Documents/git_repos/DADVI/dadvi-experiments/comparison/blade_runs/sfullrank_advi_results/info/occ_det.pkl not found.
Loading LRVB
Loading LRVB_Doubling
/home/rgiordan/Documents/git_repos/DADVI/dadvi-experiments/comparison/blade_runs/lrvb_doubling_results/lrvb_info/tennis.pkl not found.
Loading LRVB_CG


In [7]:
if save_output:
    posterior_df.to_csv(join(output_folder, 'posteriors_tidy.csv'), index=False)
    metadata_df.to_csv(join(output_folder, 'metadata_tidy.csv'), index=False)
    trace_df.to_csv(join(output_folder, 'trace_tidy.csv'), index=False)

# Save the names of unconstrained parameters

In [8]:
if save_output:
    folder, method = folder_method_list[1]
    assert method == 'DADVI'
    param_df = GetUnconstraintedParamsDataframe(folder, method)
    param_df.to_csv(join(output_folder, 'params_tidy.csv'), index=False)

# Save the full MCMC diagnostic information

In [9]:
folder, method = folder_method_list[0]
assert method == 'NUTS'
mcmc_df = GetMCMCDiagnosticsDataframe(folder, method)
if save_output:
    mcmc_df.to_csv(join(output_folder, 'mcmc_diagnostics_tidy.csv'), index=False)

In [10]:
raw_metadata = {}
model_names = {}
for folder, method in folder_method_list:
    print(f'Loading {method}')
    raw_metadata[method], model_names[method] = \
        GetMetadataDataframe(folder, method, return_raw_metadata=True) 


Loading NUTS
Loading DADVI
Loading RAABBVI
Loading SADVI
Loading SADVI_FR
/home/rgiordan/Documents/git_repos/DADVI/dadvi-experiments/comparison/blade_runs/sfullrank_advi_results/info/occ_det.pkl not found.
Loading LRVB
Loading LRVB_Doubling
/home/rgiordan/Documents/git_repos/DADVI/dadvi-experiments/comparison/blade_runs/lrvb_doubling_results/lrvb_info/tennis.pkl not found.
Loading LRVB_CG
