In [None]:
import mlflow
import datetime
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

In [None]:
# get mlflow runs
experiment_id = '353509075155020587' # TODO update mlflow experiment ID if it changes (check mlruns directory)

mlflow.set_tracking_uri("../../mlruns")

runs = mlflow.search_runs(experiment_ids=[experiment_id])

failed_runs = len(runs[runs['status']=='FAILED'][['params.model']])
print("{} experiment runs failed ({}% of total)".format(failed_runs, failed_runs/len(runs)*100))

In [None]:
# timestamp for saving figures, tables and other outputs from this experiment run
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
print(timestamp)

In [None]:
runs.to_csv(f'results-{timestamp}.csv', index=None)

In [None]:
# cleanup the data fields for analysis
runs['trial'] = runs['params.datafile'].str.split('_').str[-2]
runs['value'] = runs['params.datafile'].str.split('_').str[-4].astype(float)
runs['variable'] = runs['params.datafile'].str.split('_').str[-5]
runs['C'] = runs['params.datafile'].str.split('_').str[-6]
runs['params.causal_distance'] = runs['params.causal_distance'].fillna('')
runs['params.inference_type'] = runs['params.inference_type'].fillna('')
runs['method'] = runs['params.model'] + runs['params.causal_distance'] + runs['params.inference_type']

# get best result for each trial, based on performance for validation set
results = runs[['method','C','value','variable','trial','metrics.RMSE_avg_val','metrics.RMSE_avg_test']].sort_values(by='metrics.RMSE_avg_val').groupby(['method','C','trial','variable','value']).first().reset_index()
results['C'] = results['C'].astype(float)
results['metrics.RMSE_avg_test'] = results['metrics.RMSE_avg_test'].astype(float)

method_names = {'global_bnn_baseline':'Global BNN baseline', 
                'individual_bnn_baseline':'Local BNNs baseline', 
                'bayesian_maml_baseline':'Meta-learning baseline',
                'our_methodground_truth':'Ground truth reference',
                'our_methodOD':'Our method (OD)',
                'our_methodSHD':'Our method (SHD)',
                'our_methodID':'Our method (ID)',
                'our_methodSID':'Our method (SID)',
                'our_methodobservational':'Our method (OP)',
                'our_methodinterventional':'Our method (IP)'}
results['method'] = results['method'].map(method_names)

results

In [None]:
# create plot for N

sns.set_style("whitegrid")

fig, axes = plt.subplots(1, 3, figsize=(16,4))

methods1 = ['Global BNN baseline', 'Local BNNs baseline', 'Meta-learning baseline', 'Our method (OD)', 'Our method (SHD)', 'Our method (ID)', 'Our method (SID)']
methods2 = ['Global BNN baseline', 'Local BNNs baseline', 'Meta-learning baseline', 'Our method (OP)', 'Our method (IP)']
methods3 = ['Ground truth reference', 'Our method (OD)', 'Our method (SHD)', 'Our method (ID)', 'Our method (SID)', 'Our method (OP)', 'Our method (IP)']

colors = {'Global BNN baseline':'tab:blue', 
          'Local BNNs baseline':'tab:pink', 
          'Meta-learning baseline':'tab:olive', 
          'Ground truth reference':'tab:red', 
          'Our method (OD)':'tab:cyan', 
          'Our method (SHD)':'tab:brown', 
          'Our method (ID)':'tab:orange', 
          'Our method (SID)':'tab:gray', 
          'Our method (OP)':'tab:green', 
          'Our method (IP)':'tab:purple'}
          
colors1 = {m:colors[m] for m in methods1}
colors2 = {m:colors[m] for m in methods2}
colors3 = {m:colors[m] for m in methods3}

ax1 = sns.lineplot(data=results[(results['method'].isin(methods1))&(results['variable']=='N')], x='value', y='metrics.RMSE_avg_test', hue='method', style='method', ax=axes[0], palette=colors1)
ax2 = sns.lineplot(data=results[(results['method'].isin(methods2))&(results['variable']=='N')], x='value', y='metrics.RMSE_avg_test', hue='method', style='method', ax=axes[1], palette=colors2)
ax3 = sns.lineplot(data=results[(results['method'].isin(methods3))&(results['variable']=='N')], x='value', y='metrics.RMSE_avg_test', hue='method', style='method', ax=axes[2], palette=colors3)

sns.despine(left=True)
ax1.set_ylabel('RMSE of test tasks')
ax1.set_yscale('log')
ax1.yaxis.set_major_locator(ticker.LogLocator(10,[0.01,0.02,0.03]))
ax1.yaxis.set_major_formatter(ticker.ScalarFormatter())
ax2.set_ylabel('RMSE of test tasks')
ax2.set_yscale('log')
ax2.yaxis.set_major_locator(ticker.LogLocator(10,[0.01,0.02,0.03]))
ax2.yaxis.set_major_formatter(ticker.ScalarFormatter())
ax3.set_ylabel('RMSE of test tasks')
ax1.set_title('Baseline comparison (known CGMs)')
ax2.set_title('Baseline comparison (unknown CGMs)')
ax3.set_title('Causal distance and proxy comparison')
ax1.set_xlabel('N')
ax2.set_xlabel('N')
ax3.set_xlabel('N')

ax1.legend().set_title('')
ax2.legend().set_title('')
ax3.legend().set_title('')

for ax in axes:
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=2, borderpad=0.1, columnspacing=0.5)

plt.savefig(f'results-N-{timestamp}.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# create plot for m

sns.set_style("whitegrid")

fig, axes = plt.subplots(1, 3, figsize=(16,4))

methods1 = ['Global BNN baseline', 'Local BNNs baseline', 'Meta-learning baseline', 'Our method (OD)', 'Our method (SHD)', 'Our method (ID)', 'Our method (SID)']
methods2 = ['Global BNN baseline', 'Local BNNs baseline', 'Meta-learning baseline', 'Our method (OP)', 'Our method (IP)']
methods3 = ['Ground truth reference', 'Our method (OD)', 'Our method (SHD)', 'Our method (ID)', 'Our method (SID)', 'Our method (OP)', 'Our method (IP)']

colors = {'Global BNN baseline':'tab:blue', 
          'Local BNNs baseline':'tab:pink', 
          'Meta-learning baseline':'tab:olive', 
          'Ground truth reference':'tab:red', 
          'Our method (OD)':'tab:cyan', 
          'Our method (SHD)':'tab:brown', 
          'Our method (ID)':'tab:orange', 
          'Our method (SID)':'tab:gray', 
          'Our method (OP)':'tab:green', 
          'Our method (IP)':'tab:purple'}

colors1 = {m:colors[m] for m in methods1}
colors2 = {m:colors[m] for m in methods2}
colors3 = {m:colors[m] for m in methods3}

ax1 = sns.lineplot(data=results[(results['method'].isin(methods1))&(results['variable']=='M')], x='value', y='metrics.RMSE_avg_test', hue='method', style='method', ax=axes[0], palette=colors1)
ax2 = sns.lineplot(data=results[(results['method'].isin(methods2))&(results['variable']=='M')], x='value', y='metrics.RMSE_avg_test', hue='method', style='method', ax=axes[1], palette=colors2)
ax3 = sns.lineplot(data=results[(results['method'].isin(methods3))&(results['variable']=='M')], x='value', y='metrics.RMSE_avg_test', hue='method', style='method', ax=axes[2], palette=colors3)

sns.despine(left=True)
ax1.set_ylabel('RMSE of test tasks')
ax1.set_yscale('log')
ax1.yaxis.set_major_locator(ticker.LogLocator(10,[0.01,0.02,0.03]))
ax1.yaxis.set_major_formatter(ticker.ScalarFormatter())
ax2.set_ylabel('RMSE of test tasks')
ax2.set_yscale('log')
ax2.yaxis.set_major_locator(ticker.LogLocator(10,[0.01,0.02,0.03]))
ax2.yaxis.set_major_formatter(ticker.ScalarFormatter())
ax3.set_ylabel('RMSE of test tasks')
ax1.set_title('Baseline comparison (known CGMs)')
ax2.set_title('Baseline comparison (unknown CGMs)')
ax3.set_title('Causal distance and proxy comparison')
ax1.set_xlabel('M')
ax2.set_xlabel('M')
ax3.set_xlabel('M')


ax1.legend().set_title('')
ax2.legend().set_title('')
ax3.legend().set_title('')

for ax in axes:
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=2, borderpad=0.1, columnspacing=0.5)

plt.savefig(f'results-M-{timestamp}.png', dpi=300, bbox_inches='tight')
plt.show()

