Load results from the `blade_runs` directory and save in a tidy format for R

In [1]:
from os.path import join
import pandas as pd
import numpy as np

In [2]:
from load_and_tidy_posteriors_lib import \
    VALID_METHODS, GetMetadataDataframe, GetMethodDataframe, \
    GetTraceDataframe, GetUnconstraintedParamsDataframe

# Set to false for a dry run; doing so will not overwrite the csv
save_output = False

In [3]:
base_folder = '/home/rgiordan/Documents/git_repos/DADVI/dadvi-experiments'
input_folder = join(base_folder, 'comparison/blade_runs/')
output_folder = input_folder

folder_method_list = (
    (join(input_folder, "nuts_results/"), 'NUTS'),
    (join(input_folder, "dadvi_results/"), 'DADVI'),
    (join(input_folder, "raabbvi_results/"), 'RAABBVI'),
    (join(input_folder, "sadvi_results/"), 'SADVI'),
    (join(input_folder, "sfullrank_advi_results/"), 'SADVI_FR'),
    (join(input_folder, "lrvb_Direct_results/"), 'LRVB'),
    (join(input_folder, 'lrvb_doubling_results'), 'LRVB_Doubling')  # Missing
)


In [4]:
posterior_dfs = []
for folder, method in folder_method_list:
    print(f'Loading {method}')
    posterior_dfs.append(GetMethodDataframe(folder, method))
posterior_df = pd.concat(posterior_dfs)

Loading NUTS
Loading DADVI
Loading RAABBVI
Loading SADVI
Loading SADVI_FR
Loading LRVB
Loading LRVB_Doubling


In [5]:
metadata_dfs = []
for folder, method in folder_method_list:
    print(f'Loading {method}')
    metadata_dfs.append(GetMetadataDataframe(folder, method))
    
metadata_df = pd.concat(metadata_dfs)


Loading NUTS
Loading DADVI




Loading RAABBVI
Loading SADVI
Loading SADVI_FR
Loading LRVB
Loading LRVB_Doubling
/home/rgiordan/Documents/git_repos/DADVI/dadvi-experiments/comparison/blade_runs/lrvb_doubling_results/lrvb_info/tennis.pkl not found.


In [6]:
trace_dfs = []
for folder, method in folder_method_list:
    print(f'Loading {method}')
    trace_dfs.append(GetTraceDataframe(folder, method))
    
trace_df = pd.concat(trace_dfs)


Loading NUTS
Loading DADVI
Loading RAABBVI
Loading SADVI
Loading SADVI_FR
Loading LRVB
Loading LRVB_Doubling
/home/rgiordan/Documents/git_repos/DADVI/dadvi-experiments/comparison/blade_runs/lrvb_doubling_results/lrvb_info/tennis.pkl not found.


AssertionError: 

In [None]:
if save_output:
    posterior_df.to_csv(join(output_folder, 'posteriors_tidy.csv'), index=False)
    metadata_df.to_csv(join(output_folder, 'metadata_tidy.csv'), index=False)
    trace_df.to_csv(join(output_folder, 'trace_tidy.csv'), index=False)

# Save the names of unconstrained parameters

In [None]:
if save_output:
    folder, method = folder_method_list[1]
    assert method == 'DADVI'
    param_df = GetUnconstraintedParamsDataframe(folder, method)
    param_df.to_csv(join(output_folder, 'params_tidy.csv'), index=False)

# Save the full MCMC diagnostic information

In [None]:
# TODO: this should be in the library

from load_and_tidy_posteriors_lib import \
    GetDrawFilenames

folder, method = folder_method_list[0]
assert method == 'NUTS'

draw_filenames, model_names = GetDrawFilenames(folder)
raw_metadata model_names = GetMetadataDataframe(folder, method, return_raw_metadata=True)

from load_and_tidy_posteriors_lib import RepList

mcmc_dict = {
    'model': [],
    'param': [],
    'ess': [],
    'rhat': []
}

for model_ind in range(len(raw_metadata)):
    model = model_names[model_ind]
    metadata = raw_metadata[model_ind]

    for varname in metadata['ess'].data_vars:
        ess = metadata['ess'][varname].values.flatten()
        rhat = metadata['rhat'][varname].values.flatten()
        assert(len(ess) == len(rhat))
        mcmc_dict['model'].append(RepList(model, len(ess)))
        mcmc_dict['param'].append(RepList(varname, len(ess)))
        mcmc_dict['ess'].append(ess)
        mcmc_dict['rhat'].append(rhat)

mcmc_df = pd.DataFrame()
for k,v in mcmc_dict.items():
    mcmc_df[k] = np.hstack(v)

if save_output:
    mcmc_df.to_csv(join(output_folder, 'mcmc_diagnostics_tidy.csv'), index=False)

# Explore the contents of the metadata.  

Maybe we want to save additional information.

In [None]:
raw_metadata = {}
for folder, method in folder_method_list:
    print(f'Loading {method}')
    raw_metadata[method], _ = GetMetadataDataframe(folder, method, return_raw_metadata=True) 


In [None]:
metadata = raw_metadata['RAABBVI'][0]
print(metadata.keys())
print(metadata['runtime'])

In [None]:
ind = 2
metadata = raw_metadata['LRVB'][ind]
print(raw_metadata['DADVI'][ind]['opt_result']['evaluation_count'])
print(raw_metadata['LRVB'][ind]['opt_result']['evaluation_count'])
print(raw_metadata['LRVB'][ind]['lrvb_hvp_calls'])

print('\n\n')
print(raw_metadata['DADVI'][ind]['runtime'])
print(raw_metadata['LRVB'][ind]['runtime'])

print(len(raw_metadata['DADVI'][ind]['unconstrained_param_names']))
print(len(raw_metadata['LRVB'][ind]['unconstrained_param_names']))
#print(raw_metadata['LRVB'][ind]['runtime'])

print('\n\n')
print(raw_metadata['LRVB'][ind].keys())
print(raw_metadata['DADVI'][ind].keys())

In [None]:
# for k,v in raw_metadata.items():
#     print('=======================================\n', k, ':')
#     print(v[0])
#     print('\n')

for k in ['LRVB']:
    print(raw_metadata[k][0])