In [None]:
import mlflow
import datetime

experiment_id = '717746269708392697' # TODO update ID if it changes

mlflow.set_tracking_uri("../../mlruns")

runs = mlflow.search_runs(experiment_ids=[experiment_id])

failed_runs = len(runs[runs['status']=='FAILED'][['params.model']])
print("{} experiment runs failed ({}% of total)".format(failed_runs, failed_runs/len(runs)*100))

In [None]:
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
runs.to_csv(f'results-main-{timestamp}.csv', index=None)
print(timestamp)

In [None]:
tmp = runs['params.datafile'].str.split('/').str[-1].str.split('_')
runs['experiment_variable'] = tmp.str[1].tolist()
runs['experiment_variable_value'] = tmp.str[2].tolist()
runs['trial'] = tmp.str[4].tolist()
runs = runs.rename(columns={'tags.mlflow.project.entryPoint':'method'})

In [None]:
results = runs[['method','experiment_variable','experiment_variable_value','trial','status','metrics.RMSE_avg_val','metrics.RMSE_avg_test','params.outprefix','params.datafile']]
results

### Check if any failed runs

In [None]:
results[results['status']=='FAILED'].groupby(['method','experiment_variable','experiment_variable_value']).count()

In [None]:
results = results[results['status']=='FINISHED']

## Utility functions

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import sys
sys.path.append("..")
from f1_score_utils import get_f1_scores

sns.set_style("whitegrid")

method_names = {'global_bnn_model':'Global BNN baseline',
                'global_bnn_model_large':'Global BNN baseline (large)', 
                'individual_bnn_model':'Local BNNs baseline', 
                'bayesian_metalearner_model':'Meta-learning baseline',
                'hsml_model':'HSML',
                'tsa_maml_model':'TSA-MAML',
                'our_method_known_causal_structure':'Our method (ground truth reference)',
                'our_method_unknown_causal_structure':'Our method',
                'our_method_unknown_causal_structure_diagnostic':'Our method (diagnostic)'}

colors = {'Global BNN baseline':'tab:brown', 
          'Global BNN baseline (large)':'tab:blue',
          'Local BNNs baseline':'tab:pink', 
          'Meta-learning baseline':'tab:olive', 
          'HSML':'tab:cyan',
          'TSA-MAML':'tab:orange',
          'Our method (ground truth reference)':'tab:red', 
          'Our method':'tab:grey', 
          'Our method (diagnostic)':'tab:purple'}


def get_results_for_experiment(results, experiment_variable, is_int=True):
    """Get the data for the experiment corresponding to the experiment variable
    """
    exp_results = results[results['experiment_variable']==experiment_variable]
    # get best result for each trial, based on performance for validation set
    exp_results = exp_results[['method','experiment_variable_value','trial','metrics.RMSE_avg_val','metrics.RMSE_avg_test','params.outprefix','params.datafile']].sort_values(by='metrics.RMSE_avg_val').groupby(['method','experiment_variable_value','trial']).first().reset_index()
    exp_results = exp_results.rename(columns={'experiment_variable_value':experiment_variable})
    if is_int: exp_results[experiment_variable] = exp_results[experiment_variable].astype(float)
    exp_results['metrics.RMSE_avg_test'] = exp_results['metrics.RMSE_avg_test'].astype(float)
    exp_results['method'] = exp_results['method'].map(method_names)
    return exp_results

    
def get_causal_group_accuracy(exp_results, experiment_variable):
    """Get the F1 scores for accuracy of causal group identifiability
    """
    method = 'Our method'
    our_results = exp_results[exp_results['method']==method]

    our_results['casualassignment_final_path'] = '../../' + our_results['params.outprefix'] + '_causal_assignments_final.csv'
    our_results['groundtruth_path'] = our_results['params.datafile'].str[0:-8] + 'task_metadata.csv'

    f1_final_train_list = []
    f1_final_val_test_list = []

    for idx, row in our_results.iterrows():
        if experiment_variable=='N': num_train_tasks = int(row['N'])
        else: num_train_tasks = 200
        f1_final_train, f1_final_val_test = get_f1_scores(row['casualassignment_final_path'], row['groundtruth_path'], num_train_tasks=num_train_tasks)
        f1_final_train_list.append(f1_final_train)
        f1_final_val_test_list.append(f1_final_val_test)

    our_results['f1_final_train'] = f1_final_train_list
    our_results['f1_final_val_test'] = f1_final_val_test_list
    return our_results


## Results

In [None]:
experiment_variable='C'
exp_results = get_results_for_experiment(results, experiment_variable)
exp_results.to_csv(f'results-data-for-RMSE-plot-{experiment_variable}-{timestamp}.csv', index=None)
exp_results

In [None]:
f1_results = get_causal_group_accuracy(exp_results, experiment_variable)
f1_results.to_csv(f'results-data-for-f1-plot-{experiment_variable}-{timestamp}.csv', index=None)
f1_results

In [None]:
experiment_variable='eta'
exp_results = get_results_for_experiment(results, experiment_variable)
exp_results.to_csv(f'results-data-for-RMSE-plot-{experiment_variable}-{timestamp}.csv', index=None)
exp_results

In [None]:
f1_results = get_causal_group_accuracy(exp_results, experiment_variable)
f1_results.to_csv(f'results-data-for-f1-plot-{experiment_variable}-{timestamp}.csv', index=None)
f1_results

In [None]:
experiment_variable='N'
exp_results = get_results_for_experiment(results, experiment_variable)
exp_results.to_csv(f'results-data-for-RMSE-plot-{experiment_variable}-{timestamp}.csv', index=None)
exp_results

In [None]:
f1_results = get_causal_group_accuracy(exp_results, experiment_variable)
f1_results.to_csv(f'results-data-for-f1-plot-{experiment_variable}-{timestamp}.csv', index=None)
f1_results

In [None]:
experiment_variable='M'
exp_results = get_results_for_experiment(results, experiment_variable)
exp_results.to_csv(f'results-data-for-RMSE-plot-{experiment_variable}-{timestamp}.csv', index=None)
exp_results

In [None]:
f1_results = get_causal_group_accuracy(exp_results, experiment_variable)
f1_results.to_csv(f'results-data-for-f1-plot-{experiment_variable}-{timestamp}.csv', index=None)
f1_results

In [None]:
experiment_variable='intervp'
exp_results = get_results_for_experiment(results, experiment_variable)
exp_results['intervp'] = exp_results['intervp'].map({0.3:0.7, 0.7:0.3}) # q is defined as 1-p
exp_results.to_csv(f'results-data-for-RMSE-plot-{experiment_variable}-{timestamp}.csv', index=None)
exp_results

In [None]:
f1_results = get_causal_group_accuracy(exp_results, experiment_variable)
f1_results.to_csv(f'results-data-for-f1-plot-{experiment_variable}-{timestamp}.csv', index=None)
f1_results