In [None]:
import mlflow
import datetime

experiment_id = '198191907955949949' # TODO update ID if it changes

mlflow.set_tracking_uri("../../mlruns")

runs = mlflow.search_runs(experiment_ids=[experiment_id])

failed_runs = len(runs[runs['status']=='FAILED'][['params.model']])
print("{} experiment runs failed ({}% of total)".format(failed_runs, failed_runs/len(runs)*100))

In [None]:
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
runs.to_csv(f'results-main-{timestamp}.csv', index=None)
print(timestamp)

In [None]:
tmp = runs['params.datafile'].str.split('/').str[-1].str.split('_')
runs['dataset'] = tmp.str[0].tolist()
runs['trial'] = tmp.str[1].str[7].tolist()
runs = runs.rename(columns={'tags.mlflow.project.entryPoint':'method'})

In [None]:
results = runs[['method','dataset','trial','status','metrics.RMSE_avg_val','metrics.RMSE_avg_test']]
results

### Check if any failed runs

In [None]:
results[results['status']=='FAILED'].groupby(['method','dataset']).count()

In [None]:
results = results[results['status']=='FINISHED']

## Utility functions

In [None]:
for dataset in ['medical', 'covid', 'cognition']:
    df = results[results['dataset']==dataset].sort_values(by='metrics.RMSE_avg_val').groupby(['dataset','method','trial']).first()
    df = df.groupby(['dataset','method'])[['metrics.RMSE_avg_val','metrics.RMSE_avg_test']].agg(['mean', 'std']).reset_index()
    df['metrics.RMSE_avg_val_fmt'] = df.apply(lambda x: f"{x[('metrics.RMSE_avg_val','mean')]:.3f} ({x[('metrics.RMSE_avg_val','std')]:.3f})", axis=1)
    df['metrics.RMSE_avg_test_fmt'] = df.apply(lambda x: f"{x[('metrics.RMSE_avg_test','mean')]:.3f} ({x[('metrics.RMSE_avg_val','std')]:.3f})", axis=1)
    df = df.sort_values(by=('metrics.RMSE_avg_test','mean'))[['method','metrics.RMSE_avg_val_fmt','metrics.RMSE_avg_test_fmt']]
    df.to_csv(f'results-{dataset}-{timestamp}.csv', index=None)
    print(dataset)
    print(display(df))