In [41]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import pandas as pd
import numpy as np
import os

In [42]:
csv_paths = [os.path.join('../', x, 'ft_results_combined.csv') for x in ["5M-MLM", "10M-MLM", "77M-MLM"]]

In [43]:
csv_paths

['../5M-MLM/ft_results_combined.csv',
 '../10M-MLM/ft_results_combined.csv',
 '../77M-MLM/ft_results_combined.csv']

In [44]:
dfs = [pd.read_csv(p) for p in csv_paths]

In [45]:
# only pick run_names from 77
run_names_to_use = dfs[-1].run_name.unique()

In [46]:
mlm_df = pd.concat(dfs, axis=0).reset_index(drop=True).sort_values(by='min_eval_loss')
mlm_df = mlm_df[mlm_df.run_name.isin(run_names_to_use)]

In [47]:
run_names_to_use

array(['run_19', 'run_11', 'run_39', 'run_38', 'run_45'], dtype=object)

In [48]:
clf_col_prefix = [
    "bace_classification",
    "bbbp",
    "clintox",
    "tox21",
]

regr_col_prefix = [
    "bace_regression",
    "clearance",
    "delaney",
    "lipo",
]

In [59]:
# regr_cols = [[r+"_rmse_mean", r+"_rmse_std"] for r in regr_col_prefix]
# regr_cols = [c for i in regr_cols for c in i]

res_dfs = []
task = "77M-MLM"

for i in regr_col_prefix:
    val_sort_col = i+"_test_rmse_mean"
    sub_df = mlm_df[mlm_df.pretraining_task == task].sort_values(by=val_sort_col)
    test_mean_col = i+"_test_rmse_mean"
    test_std_col = i+"_test_rmse_std"
    sub_df = sub_df[[test_mean_col, test_std_col]]
    res_dfs.append(sub_df.reset_index(drop=True))
    

In [60]:
res_df = pd.concat(res_dfs, axis=1)

In [61]:
res_df

Unnamed: 0,bace_regression_test_rmse_mean,bace_regression_test_rmse_std,clearance_test_rmse_mean,clearance_test_rmse_std,delaney_test_rmse_mean,delaney_test_rmse_std,lipo_test_rmse_mean,lipo_test_rmse_std
0,1.088074,0.010344,1.088797,0.003811,0.501106,0.007045,0.720417,0.012898
1,1.102342,0.024425,1.090579,0.010051,0.509699,0.003744,0.735105,0.008643
2,1.129689,0.05825,1.095174,0.006246,0.561608,0.018715,0.793052,0.012488
3,1.162638,0.041988,1.129781,0.019043,0.785991,0.000874,0.798963,0.012064
4,1.262157,9e-06,1.200019,0.004761,1.089642,0.000236,0.811608,0.008315


In [71]:
clf_res_dfs = []
task = "77M-MLM"

for i in clf_col_prefix:
    val_sort_col = i+"_test_roc_auc_score_mean"
    sub_df = mlm_df[mlm_df.pretraining_task == task].sort_values(by=val_sort_col, ascending=False)
    test_mean_col = i+"_test_roc_auc_score_mean"
    test_std_col = i+"_test_roc_auc_score_std"
    sub_df = sub_df[[test_mean_col, test_std_col]]
    clf_res_dfs.append(sub_df.reset_index(drop=True))
    

In [72]:
clf_res_df = pd.concat(clf_res_dfs, axis=1)

In [73]:
clf_res_df

Unnamed: 0,bace_classification_test_roc_auc_score_mean,bace_classification_test_roc_auc_score_std,bbbp_test_roc_auc_score_mean,bbbp_test_roc_auc_score_std,clintox_test_roc_auc_score_mean,clintox_test_roc_auc_score_std,tox21_test_roc_auc_score_mean,tox21_test_roc_auc_score_std
0,0.793152,0.013913,0.73772,0.002678,0.632694,0.002084,0.745829,0.010798
1,0.784493,0.004532,0.667983,0.003819,0.630775,0.019021,0.745281,0.017602
2,0.7225,0.045955,0.645756,0.055059,0.628217,0.008098,0.737179,0.007113
3,0.560181,7.2e-05,0.546777,0.0,0.604396,0.005643,0.735437,0.002723
4,0.40779,0.0,0.448405,0.001027,0.534852,0.034242,0.722692,0.010164


In [27]:
mlm_df.filter(like='clearance')

Unnamed: 0,clearance_valid_pearsonr_mean,clearance_valid_rmse_mean,clearance_valid_pearsonr_std,clearance_valid_rmse_std,clearance_test_pearsonr_mean,clearance_test_rmse_mean,clearance_test_pearsonr_std,clearance_test_rmse_std
20,0.436969,1.162745,0.004418,0.00278,0.484437,1.088797,0.005345,0.003811
14,0.418765,1.170498,0.003088,0.001806,0.484851,1.085561,0.003891,0.002767
16,0.424372,1.176494,0.025706,0.013455,0.443641,1.129781,0.041281,0.019043
9,0.427645,1.16581,0.004334,0.003029,0.467909,1.098456,0.001415,0.00111
15,0.377785,1.218731,0.023918,0.007788,0.408635,1.170315,0.044449,0.012772
3,0.329497,1.228269,0.013655,0.006409,0.446301,1.151314,0.010181,0.012671
19,0.328244,1.221911,0.009218,0.007291,0.479968,1.095174,0.011485,0.006246
18,0.42543,1.168714,0.027825,0.019066,0.486771,1.090579,0.008569,0.010051
10,0.284052,1.238737,0.065997,0.020236,0.454849,1.136953,0.054959,0.036227
13,0.103724,1.285229,0.009838,0.001112,0.284718,1.228692,0.023538,0.003922


In [13]:
# just get the best overall for each task
regr_tasks = ['delaney', 'bace_regression', 'clearance', 'lipo']
clf_tasks = ['bbbp', 'bace_classification', 'clintox', 'tox21']
mm = {
    "roc_auc_score": "ROC AUC",
    "average_precision_score": "PR AUC",
}


In [14]:
mlm_df.filter(like='mean')
regr_df = pd.DataFrame()

for regr_task in regr_tasks:
    regr_df = regr_df.append(mlm_df)

SyntaxError: unexpected EOF while parsing (3928367725.py, line 4)

In [35]:
metric='roc_auc_score'
task = 'tox21'
mlm_df.filter(like=f'{task}_test').sort_values(by=f'{task}_test_{metric}_mean', ascending=False)

Unnamed: 0,tox21_test_roc_auc_score_mean,tox21_test_average_precision_score_mean,tox21_test_roc_auc_score_std,tox21_test_average_precision_score_std
9,0.763433,0.344838,0.00619,0.006467
3,0.763036,0.348931,0.015341,0.007947
13,0.753726,0.345733,0.010551,0.016043
19,0.745829,0.302185,0.010798,0.004848
10,0.745301,0.298567,0.015782,0.015021
18,0.745281,0.316157,0.017602,0.019095
0,0.743784,0.264033,0.011447,0.009164
11,0.740144,0.304292,0.006138,0.021326
14,0.740105,0.357602,0.006108,0.004955
6,0.737463,0.312435,0.009331,0.016
