In [None]:
from glob import glob
from os.path import join
import pandas as pd
import numpy as np
import pickle

In [None]:
from load_and_tidy_lib import GetMetdataDataframe, GetMethodDataframe
from load_results_lib import VALID_METHODS

In [None]:
base_folder = '../blade_runs/'

folder_method_list = (
    (join(base_folder, "nuts_results/"), 'NUTS'),
    (join(base_folder, "dadvi_results/"), 'DADVI'),
    (join(base_folder, "lrvb_results/"), 'LRVB'),
    (join(base_folder, "raabbvi_results/"), 'RAABBVI'),
    (join(base_folder, "sadvi_results/"), 'SADVI'),
    (join(base_folder, "sfullrank_advi_results/"), 'SADVI_FR'),
    (join(base_folder, 'lrvb_doubling_results'), 'LRVB_Doubling')
)


In [None]:
# Assert that VALID_METHODS and the loaded methods are the same
assert(len(set(all_results.keys()).symmetric_difference(VALID_METHODS)) == 0)

In [None]:
# Check which models are missing for which method.  If a model / method pair is missing we
# should know why!

method_models = {x: all_results[x]['model_name'].tolist() for x in VALID_METHODS}
all_models = set().union(*[ v for k, v in method_models.items() ])
all_missing_models = set([])

for method in VALID_METHODS:
    print(method)
    missing_models = all_models.difference(method_models[method])
    if len(missing_models) > 0:
        print('Missing models:')
        print('\n'.join(missing_models))
        all_missing_models = all_missing_models.union(missing_models)
    else:
        print('Nothing missing!')
    print('\n')

print('Missing for at least one method: ', all_missing_models)

In [None]:
# Remove models that are missing for at least one method, in order
# to avoid biasing results.
for method in VALID_METHODS:
    is_missing = all_results[method]['model_name'].apply(lambda x: x in all_missing_models)
    print(f'Removing {np.sum(is_missing)} from {method}')
    all_results[method] = all_results[method][np.logical_not(is_missing)]

In [None]:
raabbvi_maxiter = 19900

method_1 = 'LRVB_Doubling'
method_2 = 'RAABBVI'

#method_1_df = add_deviation_stats(all_results[method_1], all_results['NUTS']).dropna()
method_1_df = add_deviation_stats(all_results[method_1], all_results['NUTS'])

#method_2_df = add_deviation_stats(all_results[method_2], all_results['NUTS']).dropna()
method_2_df = add_deviation_stats(all_results[method_2], all_results['NUTS'])


In [None]:
method_1_df = add_derived_stats(method_1_df)
method_2_df = add_derived_stats(method_2_df)

In [None]:
print(method_1_df.keys())

In [None]:
print(method_1_df['mean_deviations_flat'][1])
print(method_1_df['mean_deviations'][1])

In [None]:
# Look at the data saved for each method.  Not all metadata is the same.
for method in VALID_METHODS:
    print(f'{method}: {all_results[method].keys()}')

In [None]:
# Look at the proportion of modles that converged for each method
for method in VALID_METHODS:
    prop_converged = all_results[method].get('converged', None)
    if prop_converged is not None:
        print(f'Proportion converged for {method:10}: {prop_converged.mean()}')

In [None]:
comparison = method_1_df.merge(
    method_2_df, on='model_name',
    suffixes=(f'_{method_1}', f'_{method_2}'))

comparison

In [None]:
import matplotlib.pyplot as plt

f, ax = plt.subplots(1, 1)

xmin, xmax = [comparison[f'mean_rms_{method_1}'].min(), comparison[f'mean_rms_{method_1}'].max()]
# ax.scatter(comparison['mean_rms_raabbvi'], comparison['mean_rms_lrvb'], c=comparison['converged'])
ax.scatter(comparison[f'mean_rms_{method_1}'], comparison[f'mean_rms_{method_2}'])
ax.plot([xmin, xmax], [xmin, xmax])

for row in comparison.itertuples():
    ax.annotate(row.model_name, (getattr(row, f'mean_rms_{method_1}'), getattr(row, f'mean_rms_{method_2}')))

ax.set_xscale('log')
ax.set_yscale('log')

ax.set_xlabel(f'RMSE mean scaled by posterior sd, {method_1}')
ax.set_ylabel(f'RMSE mean scaled by posterior sd, {method_2}')

ax.grid(alpha=0.5, linestyle='--')

f.set_size_inches(12, 8)
f.tight_layout()

# plt.savefig('./mean_comparison.png', dpi=300)


In [None]:
f, ax = plt.subplots(1, 1)

xmin, xmax = [comparison[f'sd_rms_{method_1}'].min(), comparison[f'sd_rms_{method_1}'].max()]
# ax.scatter(comparison['mean_rms_raabbvi'], comparison['mean_rms_lrvb'], c=comparison['converged'])
ax.scatter(comparison[f'sd_rms_{method_1}'], comparison[f'sd_rms_{method_2}'])
ax.plot([xmin, xmax], [xmin, xmax])

for row in comparison.itertuples():
    ax.annotate(row.model_name, (getattr(row, f'sd_rms_{method_1}'), getattr(row, f'sd_rms_{method_2}')))

ax.set_xscale('log')
ax.set_yscale('log')

ax.set_xlabel(f'RMSE sd scaled by posterior sd, {method_1}')
ax.set_ylabel(f'RMSE sd scaled by posterior sd, {method_2}')

ax.grid(alpha=0.5, linestyle='--')

f.set_size_inches(12, 8)
f.tight_layout()

# plt.savefig('./sd_comparison.png', dpi=300)

In [None]:

f, ax = plt.subplots(1, 1)

xmin, xmax = [comparison[f'runtime_{method_1}'].min(), comparison[f'runtime_{method_1}'].max()]
# ax.scatter(comparison['mean_rms_raabbvi'], comparison['mean_rms_lrvb'], c=comparison['converged'])
ax.scatter(comparison[f'runtime_{method_1}'], comparison[f'runtime_{method_2}'])
ax.plot([xmin, xmax], [xmin, xmax])

for row in comparison.itertuples():
    ax.annotate(row.model_name, (getattr(row, f'runtime_{method_1}'), getattr(row, f'runtime_{method_2}')))

ax.set_xscale('log')
ax.set_yscale('log')

ax.set_xlabel(f'Runtime, {method_1}')
ax.set_ylabel(f'Runtime, {method_2}')

ax.grid(alpha=0.5, linestyle='--')

f.set_size_inches(12, 8)
f.tight_layout()

# plt.savefig('runtime_comparison.png', dpi=300)

In [None]:
comparison.head()

In [None]:
all_results['LRVB_Doubling']['M'] = all_results['LRVB_Doubling']['metadata'].apply(lambda x: x['M'])

In [None]:
all_results['LRVB_Doubling'][['model_name', 'runtime', 'M']].sort_values('M', ascending=False).head(20)

In [None]:
all_results['LRVB'][['model_name', 'runtime']].sort_values('runtime')