In [24]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import pandas as pd
import numpy as np
import os

In [25]:
csv_paths = [os.path.join('../', x, 'ft_results_combined.csv') for x in ["5M-MTR", "10M-MTR", "77M-MTR"]]

In [26]:
csv_paths

['../5M-MTR/ft_results_combined.csv',
 '../10M-MTR/ft_results_combined.csv',
 '../77M-MTR/ft_results_combined.csv']

In [27]:
dfs = [pd.read_csv(p) for p in csv_paths]

In [55]:
dfs[-1]

Unnamed: 0,run_name,min_eval_loss,hidden_size,attention_probs_dropout_prob,hidden_dropout_prob,intermediate_size,num_attention_heads,num_hidden_layers,learning_rate,per_device_train_batch_size,...,lipo_test_pearsonr_std,lipo_test_rmse_std,tox21_valid_roc_auc_score_mean,tox21_valid_average_precision_score_mean,tox21_valid_roc_auc_score_std,tox21_valid_average_precision_score_std,tox21_test_roc_auc_score_mean,tox21_test_average_precision_score_mean,tox21_test_roc_auc_score_std,tox21_test_average_precision_score_std
0,run_0,0.070153,264,0.224,0.122,4888,8,3,0.000396,25,...,0.00819,0.008356,0.796362,0.47505,0.00881,0.007511,0.752992,0.408613,0.002091,0.006543
1,run_45,0.040794,384,0.109,0.144,464,12,3,0.000141,25,...,0.001592,0.005084,0.776999,0.492551,0.003705,0.002459,0.772504,0.438593,0.003099,0.003468
2,run_38,0.259809,126,0.109,0.279,456,3,2,2.1e-05,25,...,0.010757,0.006533,0.757841,0.44627,0.00144,0.002977,0.819531,0.394257,0.001457,0.001901
3,run_1,0.144309,407,0.156,0.295,9044,11,2,5e-06,25,...,0.007048,0.003801,0.784034,0.491201,0.010371,0.007209,0.787348,0.403005,0.008803,0.00988
4,run_39,0.235581,209,0.176,0.128,3968,11,3,2e-06,25,...,0.021736,0.016044,0.734626,0.446543,0.009882,0.022625,0.798219,0.40489,0.011476,0.031436


In [28]:
# only pick run_names from 77
run_names_to_use = dfs[-1].run_name.unique()

In [29]:
mtr_df = pd.concat(dfs, axis=0).reset_index(drop=True).sort_values(by='min_eval_loss')
mtr_df = mtr_df[mtr_df.run_name.isin(run_names_to_use)]

In [30]:
run_names_to_use

array(['run_0', 'run_45', 'run_38', 'run_1', 'run_39'], dtype=object)

In [31]:
clf_col_prefix = [
    "bace_classification",
    "bbbp",
    "clintox",
    "tox21",
]

regr_col_prefix = [
    "bace_regression",
    "clearance",
    "delaney",
    "lipo",
]

In [47]:
# regr_cols = [[r+"_rmse_mean", r+"_rmse_std"] for r in regr_col_prefix]
# regr_cols = [c for i in regr_cols for c in i]

res_dfs = []
task = "5M-MTR"

for i in regr_col_prefix:
    val_sort_col = i+"_test_rmse_mean"
    sub_df = mtr_df[mtr_df.pretraining_task == task].sort_values(by=val_sort_col)
    test_mean_col = i+"_test_rmse_mean"
    test_std_col = i+"_test_rmse_std"
    sub_df = sub_df[[test_mean_col, test_std_col]]
    res_dfs.append(sub_df.reset_index(drop=True))
    

In [48]:
res_df = pd.concat(res_dfs, axis=1)

In [46]:
res_df

Unnamed: 0,bace_regression_test_rmse_mean,bace_regression_test_rmse_std,clearance_test_rmse_mean,clearance_test_rmse_std,delaney_test_rmse_mean,delaney_test_rmse_std,lipo_test_rmse_mean,lipo_test_rmse_std
0,1.043666,0.023895,1.116011,0.002129,0.405049,0.002775,0.58717,0.005729
1,1.051768,0.006089,1.117125,0.003085,0.429835,0.001781,0.656534,0.007907
2,1.075564,0.003071,1.138627,0.008223,0.451861,0.006737,0.662727,0.004154
3,1.092537,0.00555,1.147998,0.004632,0.457114,0.001186,0.670677,0.00054
4,1.101416,0.008106,1.192413,0.003933,0.508793,0.017765,0.784134,0.001651


In [23]:
mtr_df.sort_values(by='bace_regression_valid_pearsonr_mean', ascending=False).filter(like='bace_regression')

Unnamed: 0,bace_regression_valid_pearsonr_mean,bace_regression_valid_rmse_mean,bace_regression_valid_pearsonr_std,bace_regression_valid_rmse_std,bace_regression_test_pearsonr_mean,bace_regression_test_rmse_mean,bace_regression_test_pearsonr_std,bace_regression_test_rmse_std
7,0.224502,0.502046,0.004547,0.000522,0.743475,1.101416,0.002809,0.008106
12,0.22096,0.490769,0.00196,0.002745,0.767039,1.009474,0.003511,0.006804
2,0.205019,0.495244,0.000139,0.000377,0.715272,1.087914,0.00038,0.001122
8,0.189893,0.524299,0.001251,0.001338,0.796699,1.051768,0.000279,0.006089
11,0.178072,0.515094,0.000973,0.000484,0.709416,1.128245,0.001843,0.00285
5,0.174632,0.537382,0.003945,0.001562,0.77626,1.075564,0.00182,0.003071
6,0.173427,0.519994,0.013449,0.002541,0.778926,1.043666,0.000871,0.023895
1,0.172812,0.563706,0.006514,0.001981,0.80332,1.076083,0.006065,0.041081
13,0.170358,0.523089,0.002276,0.001221,0.817694,1.009423,0.000744,0.01205
14,0.169626,0.507877,0.012234,0.001355,0.792269,1.037092,0.000865,0.025724


In [56]:
clf_col_prefix

['bace_classification', 'bbbp', 'clintox', 'tox21']

In [68]:
clf_res_dfs = []
task = "77M-MTR"

for i in clf_col_prefix:
    val_sort_col = i+"_test_roc_auc_score_mean"
    sub_df = mtr_df[mtr_df.pretraining_task == task].sort_values(by=val_sort_col, ascending=False)
    test_mean_col = i+"_test_roc_auc_score_mean"
    test_std_col = i+"_test_roc_auc_score_std"
    sub_df = sub_df[[test_mean_col, test_std_col]]
    clf_res_dfs.append(sub_df.reset_index(drop=True))
    

In [69]:
clf_res_df = pd.concat(clf_res_dfs, axis=1)

In [70]:
clf_res_df

Unnamed: 0,bace_classification_test_roc_auc_score_mean,bace_classification_test_roc_auc_score_std,bbbp_test_roc_auc_score_mean,bbbp_test_roc_auc_score_std,clintox_test_roc_auc_score_mean,clintox_test_roc_auc_score_std,tox21_test_roc_auc_score_mean,tox21_test_roc_auc_score_std
0,0.817065,0.004224,0.724636,0.001514,0.770184,0.003883,0.819531,0.001457
1,0.811486,0.001359,0.723538,0.001426,0.736291,0.003736,0.798219,0.011476
2,0.759058,0.0,0.722844,0.001697,0.653637,0.002878,0.787348,0.008803
3,0.718478,0.0,0.717738,0.002059,0.608873,0.006343,0.772504,0.003099
4,0.643768,8.9e-05,0.677464,0.001677,0.521263,0.001196,0.752992,0.002091


In [27]:
mlm_df.filter(like='clearance')

Unnamed: 0,clearance_valid_pearsonr_mean,clearance_valid_rmse_mean,clearance_valid_pearsonr_std,clearance_valid_rmse_std,clearance_test_pearsonr_mean,clearance_test_rmse_mean,clearance_test_pearsonr_std,clearance_test_rmse_std
20,0.436969,1.162745,0.004418,0.00278,0.484437,1.088797,0.005345,0.003811
14,0.418765,1.170498,0.003088,0.001806,0.484851,1.085561,0.003891,0.002767
16,0.424372,1.176494,0.025706,0.013455,0.443641,1.129781,0.041281,0.019043
9,0.427645,1.16581,0.004334,0.003029,0.467909,1.098456,0.001415,0.00111
15,0.377785,1.218731,0.023918,0.007788,0.408635,1.170315,0.044449,0.012772
3,0.329497,1.228269,0.013655,0.006409,0.446301,1.151314,0.010181,0.012671
19,0.328244,1.221911,0.009218,0.007291,0.479968,1.095174,0.011485,0.006246
18,0.42543,1.168714,0.027825,0.019066,0.486771,1.090579,0.008569,0.010051
10,0.284052,1.238737,0.065997,0.020236,0.454849,1.136953,0.054959,0.036227
13,0.103724,1.285229,0.009838,0.001112,0.284718,1.228692,0.023538,0.003922


In [13]:
# just get the best overall for each task
regr_tasks = ['delaney', 'bace_regression', 'clearance', 'lipo']
clf_tasks = ['bbbp', 'bace_classification', 'clintox', 'tox21']
mm = {
    "roc_auc_score": "ROC AUC",
    "average_precision_score": "PR AUC",
}


In [14]:
mlm_df.filter(like='mean')
regr_df = pd.DataFrame()

for regr_task in regr_tasks:
    regr_df = regr_df.append(mlm_df)

SyntaxError: unexpected EOF while parsing (3928367725.py, line 4)

In [35]:
metric='roc_auc_score'
task = 'tox21'
mlm_df.filter(like=f'{task}_test').sort_values(by=f'{task}_test_{metric}_mean', ascending=False)

Unnamed: 0,tox21_test_roc_auc_score_mean,tox21_test_average_precision_score_mean,tox21_test_roc_auc_score_std,tox21_test_average_precision_score_std
9,0.763433,0.344838,0.00619,0.006467
3,0.763036,0.348931,0.015341,0.007947
13,0.753726,0.345733,0.010551,0.016043
19,0.745829,0.302185,0.010798,0.004848
10,0.745301,0.298567,0.015782,0.015021
18,0.745281,0.316157,0.017602,0.019095
0,0.743784,0.264033,0.011447,0.009164
11,0.740144,0.304292,0.006138,0.021326
14,0.740105,0.357602,0.006108,0.004955
6,0.737463,0.312435,0.009331,0.016
