In [1]:
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.model_selection import GridSearchCV, cross_val_score, KFold

from sklearn.metrics import roc_auc_score, precision_recall_curve
from sklearn.metrics import auc as calculate_auc
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from tqdm import tqdm
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)


from sklearn.utils import shuffle 
from joblib import load, dump
import numpy as np
import pandas as pd
import os

In [2]:
from chembench import load_data, dataset
from molmap import feature

In [3]:
bitsinfo = feature.fingerprint.Extraction().bitsinfo
fp_types = bitsinfo.Subtypes.unique()
fp_types

array(['MorganFP', 'RDkitFP', 'AtomPairFP', 'TorsionFP', 'AvalonFP',
       'EstateFP', 'MACCSFP', 'PharmacoErGFP', 'PharmacoPFP', 'PubChemFP',
       'MHFP6', 'MAP4'], dtype=object)

In [4]:
from scipy.stats.stats import pearsonr
def r2(y_true, y_pred):
    pcc, _ = pearsonr(y_true,y_pred)
    return pcc[0]**2

def rmse(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)  
    return rmse


def PRC_AUC(y_true, y_score):
    precision, recall, threshold  = precision_recall_curve(y_true, y_score) #PRC_AUC
    auc = calculate_auc(recall, precision)
    return auc

def ROC_AUC(y_true, y_score):
    auc = roc_auc_score(y_true, y_score)
    return auc

In [5]:
hiv = dataset.load_HIV()
bace = dataset.load_BACE()
bbbp = dataset.load_BBBP()
clintox = dataset.load_ClinTox()
sider = dataset.load_SIDER()

datasets = [clintox, sider, bace, bbbp, hiv] #malaria

total samples: 41127
total samples: 1513
total samples: 2039
total samples: 1478
total samples: 1427


In [6]:
performance = []

for data in datasets:
    for fp_type in fp_types:
        task_name = data.task_name
        print(task_name, fp_type)
        df, induces = load_data(task_name)
        X2 = load('/raid/shenwanxiang/10_FP_effect/tempignore/X2_%s_%s.data' % (task_name, fp_type) )
        n, w, c = X2.sum(axis=-1).shape
        X2 = X2.reshape(n, w*c)
        Y = data.y
        if task_name == 'HIV':
            induces = induces[:1]
        
        for sid, idx in enumerate(induces):
            train_idx, valid_idx, test_idx = idx

            X = X2[train_idx]
            y = Y[train_idx]

            X_valid = X2[valid_idx]
            y_valid = Y[valid_idx]

            X_test = X2[test_idx]
            y_test = Y[test_idx] 

            # Set up possible values of parameters to optimize over
            n_neighbors_list = np.arange(1,15,2)
            if task_name == 'HIV':
                n_neighbors_list = np.arange(1, 11, 2)
                
            weights_list =  ['uniform', 'distance']
            res = []
            for n_neighbors in tqdm(n_neighbors_list, ascii=True):
                for weights in weights_list:                
                    clf = KNeighborsClassifier(n_neighbors=n_neighbors, weights = weights)
                    clf.fit(X, y)
                    score = clf.score(X_valid, y_valid)
                    res.append([n_neighbors, weights, score])

            dfr = pd.DataFrame(res, columns = ['n_neighbors', 'weights', 'score'])
            gidx = dfr['score'].idxmax()
            best_params = dfr.iloc[gidx].to_dict()
            best_params.pop('score')
            best_params

            clf = KNeighborsClassifier(**best_params)
            clf.fit(X, y, )
            
            if Y.shape[1] >= 2:
                probs =  clf.predict_proba(X_test)
                test_roc_aucs = []
                for i, y_probs in enumerate(probs):
                    test_roc_auc = ROC_AUC(y_test[:, i],y_probs[:, 1])
                    test_roc_aucs.append(test_roc_auc)
                test_roc_auc = np.nanmean(test_roc_aucs)
                print(test_roc_aucs)
            else:
                test_roc_auc = ROC_AUC(y_test,clf.predict_proba(X_test)[:, 1])
                
            results = {"task_name":task_name, 'fp_type':fp_type,"split-time":sid, "test_roc_auc":test_roc_auc}
            
            print(results)
            performance.append(results)

  0%|          | 0/14 [00:00<?, ?it/s]

ClinTox MorganFP
loading dataset: ClinTox number of split times: 3


100%|##########| 14/14 [00:27<00:00,  2.03s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.6940222897669706, 0.6004797441364605]
{'task_name': 'ClinTox', 'fp_type': 'MorganFP', 'split-time': 0, 'test_roc_auc': 0.6472510169517156}


100%|##########| 14/14 [00:28<00:00,  2.05s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7147887323943662, 0.6818642350557245]
{'task_name': 'ClinTox', 'fp_type': 'MorganFP', 'split-time': 1, 'test_roc_auc': 0.6983264837250454}


100%|##########| 14/14 [00:26<00:00,  1.91s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.6247511612475116, 0.6017094017094017]
{'task_name': 'ClinTox', 'fp_type': 'MorganFP', 'split-time': 2, 'test_roc_auc': 0.6132302814784567}
ClinTox RDkitFP
loading dataset: ClinTox number of split times: 3


100%|##########| 14/14 [00:26<00:00,  1.85s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.701114488348531, 0.6703091684434969]
{'task_name': 'ClinTox', 'fp_type': 'RDkitFP', 'split-time': 0, 'test_roc_auc': 0.685711828396014}


100%|##########| 14/14 [00:24<00:00,  1.75s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7723004694835681, 0.7436676798378927]
{'task_name': 'ClinTox', 'fp_type': 'RDkitFP', 'split-time': 1, 'test_roc_auc': 0.7579840746607305}


100%|##########| 14/14 [00:24<00:00,  1.74s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.6068347710683476, 0.5934472934472935]
{'task_name': 'ClinTox', 'fp_type': 'RDkitFP', 'split-time': 2, 'test_roc_auc': 0.6001410322578206}
ClinTox AtomPairFP
loading dataset: ClinTox number of split times: 3


100%|##########| 14/14 [00:23<00:00,  1.72s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.5987841945288754, 0.6785714285714285]
{'task_name': 'ClinTox', 'fp_type': 'AtomPairFP', 'split-time': 0, 'test_roc_auc': 0.6386778115501519}


100%|##########| 14/14 [00:24<00:00,  1.79s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.8708920187793427, 0.8617021276595744]
{'task_name': 'ClinTox', 'fp_type': 'AtomPairFP', 'split-time': 1, 'test_roc_auc': 0.8662970732194586}


100%|##########| 14/14 [00:25<00:00,  1.85s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.723291307232913, 0.7108262108262109]
{'task_name': 'ClinTox', 'fp_type': 'AtomPairFP', 'split-time': 2, 'test_roc_auc': 0.717058759029562}
ClinTox TorsionFP
loading dataset: ClinTox number of split times: 3


100%|##########| 14/14 [00:20<00:00,  1.49s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7218844984802433, 0.6265991471215351]
{'task_name': 'ClinTox', 'fp_type': 'TorsionFP', 'split-time': 0, 'test_roc_auc': 0.6742418228008892}


100%|##########| 14/14 [00:21<00:00,  1.46s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.6273474178403756, 0.6610942249240122]
{'task_name': 'ClinTox', 'fp_type': 'TorsionFP', 'split-time': 1, 'test_roc_auc': 0.6442208213821938}


100%|##########| 14/14 [00:17<00:00,  1.27s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.6290643662906437, 0.5735042735042735]
{'task_name': 'ClinTox', 'fp_type': 'TorsionFP', 'split-time': 2, 'test_roc_auc': 0.6012843198974586}
ClinTox AvalonFP
loading dataset: ClinTox number of split times: 3


100%|##########| 14/14 [00:19<00:00,  1.46s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7061803444782169, 0.6100746268656717]
{'task_name': 'ClinTox', 'fp_type': 'AvalonFP', 'split-time': 0, 'test_roc_auc': 0.6581274856719443}


100%|##########| 14/14 [00:19<00:00,  1.42s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7470657276995305, 0.7188449848024316]
{'task_name': 'ClinTox', 'fp_type': 'AvalonFP', 'split-time': 1, 'test_roc_auc': 0.7329553562509811}


100%|##########| 14/14 [00:20<00:00,  1.53s/it]
 21%|##1       | 3/14 [00:00<00:00, 20.23it/s]

[0.6114797611147976, 0.5923076923076922]
{'task_name': 'ClinTox', 'fp_type': 'AvalonFP', 'split-time': 2, 'test_roc_auc': 0.601893726711245}
ClinTox EstateFP
loading dataset: ClinTox number of split times: 3


100%|##########| 14/14 [00:00<00:00, 21.10it/s]
 21%|##1       | 3/14 [00:00<00:00, 25.39it/s]

[0.8135764944275582, 0.7961087420042644]
{'task_name': 'ClinTox', 'fp_type': 'EstateFP', 'split-time': 0, 'test_roc_auc': 0.8048426182159113}


100%|##########| 14/14 [00:00<00:00, 24.30it/s]
 21%|##1       | 3/14 [00:00<00:00, 25.97it/s]

[0.8086854460093897, 0.7406281661600811]
{'task_name': 'ClinTox', 'fp_type': 'EstateFP', 'split-time': 1, 'test_roc_auc': 0.7746568060847354}


100%|##########| 14/14 [00:00<00:00, 25.42it/s]
  7%|7         | 1/14 [00:00<00:01,  8.34it/s]

[0.6688785666887856, 0.6427350427350428]
{'task_name': 'ClinTox', 'fp_type': 'EstateFP', 'split-time': 2, 'test_roc_auc': 0.6558068047119142}
ClinTox MACCSFP
loading dataset: ClinTox number of split times: 3


100%|##########| 14/14 [00:01<00:00,  7.98it/s]
  7%|7         | 1/14 [00:00<00:01,  8.67it/s]

[0.8378926038500507, 0.7412046908315565]
{'task_name': 'ClinTox', 'fp_type': 'MACCSFP', 'split-time': 0, 'test_roc_auc': 0.7895486473408035}


100%|##########| 14/14 [00:01<00:00,  7.97it/s]
  7%|7         | 1/14 [00:00<00:01,  8.36it/s]

[0.7024647887323944, 0.7882472137791288]
{'task_name': 'ClinTox', 'fp_type': 'MACCSFP', 'split-time': 1, 'test_roc_auc': 0.7453560012557616}


100%|##########| 14/14 [00:01<00:00,  8.15it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.6542800265428004, 0.6737891737891739]
{'task_name': 'ClinTox', 'fp_type': 'MACCSFP', 'split-time': 2, 'test_roc_auc': 0.6640346001659871}
ClinTox PharmacoErGFP
loading dataset: ClinTox number of split times: 3


100%|##########| 14/14 [00:04<00:00,  3.26it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7619047619047619, 0.8627398720682303]
{'task_name': 'ClinTox', 'fp_type': 'PharmacoErGFP', 'split-time': 0, 'test_roc_auc': 0.8123223169864962}


100%|##########| 14/14 [00:04<00:00,  3.54it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7769953051643191, 0.7087132725430597]
{'task_name': 'ClinTox', 'fp_type': 'PharmacoErGFP', 'split-time': 1, 'test_roc_auc': 0.7428542888536894}


100%|##########| 14/14 [00:03<00:00,  3.72it/s]
  7%|7         | 1/14 [00:00<00:01,  7.65it/s]

[0.7929661579296616, 0.7413105413105413]
{'task_name': 'ClinTox', 'fp_type': 'PharmacoErGFP', 'split-time': 2, 'test_roc_auc': 0.7671383496201014}
ClinTox PharmacoPFP
loading dataset: ClinTox number of split times: 3


100%|##########| 14/14 [00:02<00:00,  6.33it/s]
  7%|7         | 1/14 [00:00<00:01,  7.92it/s]

[0.8201621073961499, 0.789179104477612]
{'task_name': 'ClinTox', 'fp_type': 'PharmacoPFP', 'split-time': 0, 'test_roc_auc': 0.8046706059368809}


100%|##########| 14/14 [00:02<00:00,  6.22it/s]
  7%|7         | 1/14 [00:00<00:01,  8.04it/s]

[0.7928403755868545, 0.7912867274569403]
{'task_name': 'ClinTox', 'fp_type': 'PharmacoPFP', 'split-time': 1, 'test_roc_auc': 0.7920635515218974}


100%|##########| 14/14 [00:02<00:00,  6.33it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7637690776376908, 0.7444444444444445]
{'task_name': 'ClinTox', 'fp_type': 'PharmacoPFP', 'split-time': 2, 'test_roc_auc': 0.7541067610410677}
ClinTox PubChemFP
loading dataset: ClinTox number of split times: 3


100%|##########| 14/14 [00:05<00:00,  2.37it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7411347517730497, 0.6471215351812367]
{'task_name': 'ClinTox', 'fp_type': 'PubChemFP', 'split-time': 0, 'test_roc_auc': 0.6941281434771431}


100%|##########| 14/14 [00:05<00:00,  2.35it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7928403755868545, 0.7249240121580547]
{'task_name': 'ClinTox', 'fp_type': 'PubChemFP', 'split-time': 1, 'test_roc_auc': 0.7588821938724546}


100%|##########| 14/14 [00:05<00:00,  2.35it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.6645653616456536, 0.6521367521367522]
{'task_name': 'ClinTox', 'fp_type': 'PubChemFP', 'split-time': 2, 'test_roc_auc': 0.6583510568912029}
ClinTox MHFP6
loading dataset: ClinTox number of split times: 3


100%|##########| 14/14 [00:16<00:00,  1.19s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7391084093211753, 0.6897654584221748]
{'task_name': 'ClinTox', 'fp_type': 'MHFP6', 'split-time': 0, 'test_roc_auc': 0.7144369338716751}


100%|##########| 14/14 [00:16<00:00,  1.19s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7863849765258216, 0.7087132725430598]
{'task_name': 'ClinTox', 'fp_type': 'MHFP6', 'split-time': 1, 'test_roc_auc': 0.7475491245344408}


100%|##########| 14/14 [00:16<00:00,  1.19s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7030524220305242, 0.656125356125356]
{'task_name': 'ClinTox', 'fp_type': 'MHFP6', 'split-time': 2, 'test_roc_auc': 0.6795888890779401}
ClinTox MAP4
loading dataset: ClinTox number of split times: 3


100%|##########| 14/14 [00:16<00:00,  1.19s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.5719351570415401, 0.5031982942430704]
{'task_name': 'ClinTox', 'fp_type': 'MAP4', 'split-time': 0, 'test_roc_auc': 0.5375667256423052}


100%|##########| 14/14 [00:16<00:00,  1.19s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7347417840375587, 0.6767983789260384]
{'task_name': 'ClinTox', 'fp_type': 'MAP4', 'split-time': 1, 'test_roc_auc': 0.7057700814817987}


100%|##########| 14/14 [00:16<00:00,  1.19s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.6088254810882547, 0.5495726495726496]
{'task_name': 'ClinTox', 'fp_type': 'MAP4', 'split-time': 2, 'test_roc_auc': 0.5791990653304522}
SIDER MorganFP
loading dataset: SIDER number of split times: 3


100%|##########| 14/14 [00:15<00:00,  1.15s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.698943661971831, 0.6166905786704926, 0.4929078014184397, 0.6219077568134173, 0.6397804054054055, 0.6371994342291372, 0.6791338582677166, 0.6946095076400678, 0.644298245614035, 0.6282051282051282, 0.6137174910759817, 0.5445670628183362, 0.6060606060606061, 0.4995555555555556, 0.6171596124426313, 0.6969604243166055, 0.5807692307692308, 0.5403981264637002, 0.6321995464852608, 0.618962219033955, 0.6142985208426714, 0.6897727272727272, 0.6048872180451128, 0.6123493975903613, 0.6744463742943986, 0.6129429133858268, 0.5536828963795256]
{'task_name': 'SIDER', 'fp_type': 'MorganFP', 'split-time': 0, 'test_roc_auc': 0.6172743074469688}


100%|##########| 14/14 [00:15<00:00,  1.14s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.6755424063116371, 0.5212121212121212, 0.4854014598540146, 0.6642771804062126, 0.5623188405797102, 0.6100788288288288, 0.681203007518797, 0.6431129476584022, 0.5487468671679198, 0.7266236306729266, 0.7073934837092732, 0.6302083333333334, 0.6755291005291005, 0.6086440677966103, 0.6246894409937889, 0.6547149122807018, 0.39323308270676693, 0.5135135135135136, 0.5226308345120226, 0.547360248447205, 0.6318204997450281, 0.6698789780367549, 0.7337786259541985, 0.5890612725844462, 0.6799802761341223, 0.5907407407407408, 0.6749433106575964]
{'task_name': 'SIDER', 'fp_type': 'MorganFP', 'split-time': 1, 'test_roc_auc': 0.6135791856253989}


100%|##########| 14/14 [00:15<00:00,  1.15s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7713079340141398, 0.573921028466483, 0.4855072463768116, 0.6374239350912778, 0.5447032306536439, 0.6682558139534884, 0.6579881656804734, 0.6556261343012705, 0.629100529100529, 0.7297058823529412, 0.6852617079889807, 0.5541420118343195, 0.6047330097087379, 0.6443129520052596, 0.5350506756756757, 0.6764462809917355, 0.5895316804407714, 0.5225609756097561, 0.6211692597831211, 0.6216216216216217, 0.6799802761341223, 0.7204168534289557, 0.6778645833333333, 0.6393491124260355, 0.6830097087378642, 0.6263020833333334, 0.633131067961165]
{'task_name': 'SIDER', 'fp_type': 'MorganFP', 'split-time': 2, 'test_roc_auc': 0.6321638430002166}
SIDER RDkitFP
loading dataset: SIDER number of split times: 3


100%|##########| 14/14 [00:15<00:00,  1.13s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7134194053208138, 0.5750836920133907, 0.49645390070921985, 0.5986373165618448, 0.584177927927928, 0.6370815652993871, 0.6129429133858268, 0.7190152801358234, 0.5403508771929826, 0.6705128205128206, 0.6218765935747068, 0.4980899830220714, 0.5712121212121213, 0.5113333333333333, 0.5907700152983172, 0.7317421460628315, 0.5499999999999999, 0.5683060109289617, 0.604875283446712, 0.5923003347680535, 0.5490811295383236, 0.6113636363636363, 0.5323308270676692, 0.6178714859437751, 0.6030178028658272, 0.5504429133858268, 0.5596129837702871]
{'task_name': 'SIDER', 'fp_type': 'RDkitFP', 'split-time': 0, 'test_roc_auc': 0.5930334185052774}


100%|##########| 14/14 [00:15<00:00,  1.13s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7114398422090731, 0.6088154269972451, 0.4781021897810219, 0.6674631620868181, 0.6030797101449274, 0.6385135135135134, 0.7161654135338346, 0.6035812672176308, 0.5411027568922306, 0.7627151799687011, 0.7403508771929824, 0.5403645833333334, 0.6907407407407407, 0.5757627118644066, 0.703416149068323, 0.6151315789473685, 0.48796992481203005, 0.5380067567567567, 0.6051390853371051, 0.625776397515528, 0.6120601733809281, 0.673352756611385, 0.6087786259541985, 0.6460133542812254, 0.7405078895463512, 0.7092592592592593, 0.7445578231292517]
{'task_name': 'SIDER', 'fp_type': 'RDkitFP', 'split-time': 1, 'test_roc_auc': 0.6365987833361545}


100%|##########| 14/14 [00:15<00:00,  1.13s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7723880597014926, 0.6097337006427915, 0.5159420289855072, 0.6495943204868154, 0.6675432006010518, 0.6355813953488372, 0.7485207100591715, 0.7147610405323654, 0.6894179894179894, 0.7135294117647059, 0.6806703397612489, 0.46183431952662723, 0.6143203883495145, 0.646285338593031, 0.6463963963963963, 0.7391528925619835, 0.5864325068870523, 0.589430894308943, 0.6153936822253654, 0.5915349311575726, 0.6818293885601576, 0.7108919766920663, 0.5497395833333334, 0.6026627218934911, 0.7060679611650486, 0.69140625, 0.5888349514563107]
{'task_name': 'SIDER', 'fp_type': 'RDkitFP', 'split-time': 2, 'test_roc_auc': 0.6451813474225507}
SIDER AtomPairFP
loading dataset: SIDER number of split times: 3


100%|##########| 14/14 [00:15<00:00,  1.14s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7133215962441314, 0.5995934959349594, 0.4858156028368794, 0.5797693920335429, 0.6021959459459459, 0.5696605374823196, 0.6284448818897638, 0.7007640067911715, 0.5401315789473684, 0.6551282051282052, 0.6624171341152474, 0.4838709677419355, 0.6106060606060606, 0.5313333333333334, 0.5725395206527282, 0.7141982864137086, 0.5846153846153845, 0.6159250585480094, 0.5706349206349206, 0.5799856527977043, 0.5439264903630658, 0.5943181818181819, 0.593609022556391, 0.5530120481927712, 0.6074685193226227, 0.5856299212598426, 0.5764669163545568]
{'task_name': 'SIDER', 'fp_type': 'AtomPairFP', 'split-time': 0, 'test_roc_auc': 0.5946438023170649}


100%|##########| 14/14 [00:15<00:00,  1.14s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7549309664694279, 0.5170798898071626, 0.48175182481751827, 0.5887096774193548, 0.5664855072463769, 0.6227477477477478, 0.6398496240601504, 0.6377410468319559, 0.6115288220551378, 0.7284820031298904, 0.719548872180451, 0.6380208333333333, 0.7507936507936508, 0.5123728813559322, 0.6436335403726708, 0.6725877192982457, 0.5409774436090226, 0.5865709459459459, 0.6217586044318717, 0.6838509316770187, 0.6545130035696074, 0.6889287315105334, 0.6825699745547074, 0.6239198743126473, 0.6316568047337279, 0.6342592592592593, 0.6438775510204081]
{'task_name': 'SIDER', 'fp_type': 'AtomPairFP', 'split-time': 1, 'test_roc_auc': 0.6325610270942131}


100%|##########| 14/14 [00:15<00:00,  1.13s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.8008641005498821, 0.5848255280073463, 0.4891304347826087, 0.6732251521298174, 0.7116829451540195, 0.6638372093023256, 0.6860946745562131, 0.6737749546279491, 0.5716931216931216, 0.6994117647058824, 0.6980027548209367, 0.5263313609467455, 0.6570388349514564, 0.6203155818540435, 0.7128378378378378, 0.7387396694214875, 0.5946969696969697, 0.46280487804878045, 0.6118576143328618, 0.6463539010708823, 0.7148668639053255, 0.7134692962796952, 0.5924479166666666, 0.6351084812623273, 0.66747572815534, 0.6760416666666667, 0.6095873786407767]
{'task_name': 'SIDER', 'fp_type': 'AtomPairFP', 'split-time': 2, 'test_roc_auc': 0.6456487637062209}
SIDER TorsionFP
loading dataset: SIDER number of split times: 3


100%|##########| 14/14 [00:14<00:00,  1.06s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7551838810641627, 0.5944524151123864, 0.48936170212765956, 0.5592243186582809, 0.6755349099099099, 0.6410891089108911, 0.7455708661417322, 0.6339134125636672, 0.555921052631579, 0.6782051282051282, 0.6759306476287609, 0.5284380305602717, 0.5484848484848485, 0.5313333333333334, 0.5982916879143294, 0.6997144022847817, 0.5384615384615384, 0.5526932084309133, 0.604875283446712, 0.665351506456241, 0.5828103989242492, 0.6420454545454546, 0.612406015037594, 0.5724899598393575, 0.5963960052105949, 0.5386318897637795, 0.5354764877236787]
{'task_name': 'SIDER', 'fp_type': 'TorsionFP', 'split-time': 0, 'test_roc_auc': 0.6056402775322902}


100%|##########| 14/14 [00:14<00:00,  1.06s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7282051282051282, 0.5818181818181818, 0.5, 0.6189765033851055, 0.5304347826086957, 0.6972128378378378, 0.5548872180451128, 0.5818181818181818, 0.5234335839598998, 0.6998239436619719, 0.643859649122807, 0.5903645833333333, 0.6826719576719578, 0.581864406779661, 0.6186335403726708, 0.6491228070175439, 0.5823308270676693, 0.5396959459459459, 0.5925271098538425, 0.6586956521739131, 0.621239163691994, 0.6565441506051098, 0.6208651399491094, 0.6274548311076198, 0.6810897435897436, 0.6393518518518518, 0.6988662131519274]
{'task_name': 'SIDER', 'fp_type': 'TorsionFP', 'split-time': 1, 'test_roc_auc': 0.6185847383195118}


100%|##########| 14/14 [00:14<00:00,  1.07s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.783483896307934, 0.5779384756657484, 0.4855072463768116, 0.6546653144016227, 0.6880165289256198, 0.7320930232558139, 0.7325443786982249, 0.6291591046581971, 0.6419312169312169, 0.8158823529411765, 0.720959595959596, 0.4893491124260355, 0.6520631067961165, 0.6163708086785011, 0.6638513513513513, 0.7378099173553718, 0.5823002754820937, 0.6451219512195121, 0.6777463460631777, 0.6899541050484447, 0.6920611439842209, 0.7250112057373376, 0.5848958333333333, 0.6795857988165681, 0.7088592233009708, 0.7255208333333333, 0.587135922330097]
{'task_name': 'SIDER', 'fp_type': 'TorsionFP', 'split-time': 2, 'test_roc_auc': 0.6636969655325343}
SIDER AvalonFP
loading dataset: SIDER number of split times: 3


100%|##########| 14/14 [00:15<00:00,  1.13s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.755281690140845, 0.5384983261597321, 0.49645390070921985, 0.619706498951782, 0.677927927927928, 0.654997642621405, 0.6793799212598426, 0.6926994906621392, 0.5924342105263158, 0.6794871794871796, 0.6900815910249872, 0.4859932088285229, 0.537878787878788, 0.5313333333333334, 0.6177970423253442, 0.7201142390860873, 0.5384615384615384, 0.5962138953942233, 0.6544217687074829, 0.6556671449067432, 0.5651053339309726, 0.5681818181818181, 0.5511278195488721, 0.5785140562248996, 0.6283108988276161, 0.5583169291338583, 0.5854140657511443]
{'task_name': 'SIDER', 'fp_type': 'AvalonFP', 'split-time': 0, 'test_roc_auc': 0.6092518614812081}


100%|##########| 14/14 [00:15<00:00,  1.14s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7414201183431952, 0.5831955922865014, 0.4927007299270073, 0.643568299482278, 0.5539855072463769, 0.59375, 0.6063909774436089, 0.627823691460055, 0.5416040100250626, 0.7903951486697967, 0.7175438596491228, 0.5453125000000001, 0.6853174603174603, 0.5430508474576271, 0.7027950310559006, 0.6666666666666666, 0.5766917293233083, 0.5159065315315315, 0.6206977840641208, 0.5805900621118013, 0.6332228454869964, 0.6683101748095024, 0.6733460559796438, 0.6197957580518461, 0.7069773175542406, 0.5092592592592593, 0.7160997732426304]
{'task_name': 'SIDER', 'fp_type': 'AvalonFP', 'split-time': 1, 'test_roc_auc': 0.6243117678313163}


100%|##########| 14/14 [00:15<00:00,  1.14s/it]
 21%|##1       | 3/14 [00:00<00:00, 22.32it/s]

[0.7773959151610369, 0.58356290174472, 0.4782608695652174, 0.6026369168356998, 0.6596543951915853, 0.6329069767441862, 0.6772189349112425, 0.6512401693889897, 0.6218253968253968, 0.7501960784313726, 0.713269054178145, 0.6035502958579881, 0.6098300970873786, 0.6531886916502301, 0.6328828828828829, 0.721797520661157, 0.6308539944903582, 0.5154471544715447, 0.5634134842055635, 0.5507394186639469, 0.6734467455621301, 0.7714029583146571, 0.5700520833333332, 0.6330374753451677, 0.6786407766990291, 0.7520833333333333, 0.5887135922330098]
{'task_name': 'SIDER', 'fp_type': 'AvalonFP', 'split-time': 2, 'test_roc_auc': 0.6406388190284927}
SIDER EstateFP
loading dataset: SIDER number of split times: 3


100%|##########| 14/14 [00:00<00:00, 21.32it/s]
 21%|##1       | 3/14 [00:00<00:00, 22.11it/s]

[0.7450117370892019, 0.5235533237685317, 0.45390070921985815, 0.6147798742138364, 0.6492117117117117, 0.5754361150400754, 0.6323818897637796, 0.6086587436332768, 0.6315789473684211, 0.5934911242603551, 0.6991330953595105, 0.5500848896434635, 0.591046831955923, 0.5464444444444445, 0.6175420703722589, 0.6839045287637698, 0.457396449704142, 0.6219750195160032, 0.6972789115646258, 0.5836920133907221, 0.5595024652622143, 0.6762396694214876, 0.5981203007518797, 0.5325301204819277, 0.6152844116369952, 0.5755413385826772, 0.5215355805243446]
{'task_name': 'SIDER', 'fp_type': 'EstateFP', 'split-time': 0, 'test_roc_auc': 0.5983428265720533}


100%|##########| 14/14 [00:00<00:00, 21.17it/s]
 21%|##1       | 3/14 [00:00<00:00, 22.19it/s]

[0.669723865877712, 0.5486225895316804, 0.44525547445255476, 0.562126642771804, 0.5608695652173913, 0.48761261261261263, 0.5909774436090225, 0.5515151515151515, 0.6030075187969925, 0.6807511737089202, 0.6652882205513784, 0.6544270833333333, 0.6876984126984127, 0.5274576271186441, 0.7341614906832299, 0.5980263157894736, 0.731203007518797, 0.6148648648648649, 0.582036775106082, 0.5886645962732919, 0.5845232024477307, 0.5918870461676379, 0.6708015267175572, 0.6187156323644933, 0.7271942800788955, 0.6337962962962962, 0.644671201814059]
{'task_name': 'SIDER', 'fp_type': 'EstateFP', 'split-time': 1, 'test_roc_auc': 0.6131807265895562}


100%|##########| 14/14 [00:00<00:00, 21.15it/s]
  7%|7         | 1/14 [00:00<00:01,  9.28it/s]

[0.7134721131186175, 0.5865472910927456, 0.4782608695652174, 0.6064908722109533, 0.6495116453794141, 0.652093023255814, 0.7165680473372781, 0.61826981246219, 0.6435185185185186, 0.6669607843137255, 0.6696510560146924, 0.5727810650887574, 0.6064320388349514, 0.5473372781065089, 0.5617961711711711, 0.6547520661157025, 0.553374655647383, 0.6083333333333334, 0.6957802923149459, 0.5940846506884242, 0.645586785009862, 0.6825414612281488, 0.640625, 0.6185404339250493, 0.625, 0.6830729166666667, 0.6475728155339805]
{'task_name': 'SIDER', 'fp_type': 'EstateFP', 'split-time': 2, 'test_roc_auc': 0.6273687035901501}
SIDER MACCSFP
loading dataset: SIDER number of split times: 3


100%|##########| 14/14 [00:01<00:00,  8.99it/s]
  7%|7         | 1/14 [00:00<00:01,  9.44it/s]

[0.6778169014084506, 0.5676709708273553, 0.48936170212765956, 0.6119496855345912, 0.662302927927928, 0.657944365865158, 0.6988188976377953, 0.625848896434635, 0.5716008771929825, 0.6064102564102564, 0.6265935747067822, 0.5203735144312394, 0.5606060606060607, 0.4915555555555556, 0.5590260071392147, 0.7055283557731538, 0.5923076923076923, 0.6356362217017955, 0.5673469387755102, 0.614179818268771, 0.5547960555804572, 0.6011363636363637, 0.5823308270676693, 0.5762048192771084, 0.6070343030829353, 0.5856299212598426, 0.5483770287141073]
{'task_name': 'SIDER', 'fp_type': 'MACCSFP', 'split-time': 0, 'test_roc_auc': 0.5962366125648544}


100%|##########| 14/14 [00:01<00:00,  9.00it/s]
  7%|7         | 1/14 [00:00<00:01,  9.43it/s]

[0.6910256410256411, 0.6000000000000001, 0.48905109489051096, 0.6066308243727598, 0.5262681159420289, 0.5632038288288288, 0.6898496240601504, 0.5803030303030303, 0.5578947368421052, 0.6501369327073553, 0.6606516290726816, 0.5609375, 0.5888888888888888, 0.4394915254237288, 0.5959627329192547, 0.6388157894736842, 0.6161654135338346, 0.5664414414414415, 0.5796793965110797, 0.5877329192546584, 0.6124426313105559, 0.6228148812191843, 0.4767811704834605, 0.6331500392772977, 0.5977564102564102, 0.5143518518518518, 0.6456916099773242]
{'task_name': 'SIDER', 'fp_type': 'MACCSFP', 'split-time': 1, 'test_roc_auc': 0.5885970244395463}


100%|##########| 14/14 [00:01<00:00,  9.00it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7681657501963864, 0.6181129476584022, 0.4963768115942029, 0.6825557809330629, 0.5101427498121713, 0.6823255813953489, 0.6177514792899409, 0.6642468239564429, 0.5998677248677249, 0.666372549019608, 0.6604683195592287, 0.5760355029585799, 0.5986650485436893, 0.6313280736357659, 0.6100788288288289, 0.6581611570247934, 0.543732782369146, 0.5046747967479676, 0.574964639321075, 0.6417644059153493, 0.5862919132149902, 0.7454056476916182, 0.5791666666666667, 0.6511834319526627, 0.7196601941747574, 0.6791666666666667, 0.6455097087378641]
{'task_name': 'SIDER', 'fp_type': 'MACCSFP', 'split-time': 2, 'test_roc_auc': 0.6263768882493683}
SIDER PharmacoErGFP
loading dataset: SIDER number of split times: 3


100%|##########| 14/14 [00:03<00:00,  3.90it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.685348200312989, 0.6164514586322334, 0.4929078014184397, 0.5958071278825996, 0.5595439189189189, 0.5588165959453089, 0.609005905511811, 0.5812818336162988, 0.5403508771929826, 0.6128205128205128, 0.6077256501784803, 0.4475806451612903, 0.5636363636363637, 0.4517777777777778, 0.5719020907700153, 0.6908404732762137, 0.5576923076923077, 0.5247853239656518, 0.5460317460317461, 0.5114777618364419, 0.5072837292693859, 0.675, 0.4973684210526316, 0.6160642570281124, 0.6172383847155883, 0.515255905511811, 0.594361215147732]
{'task_name': 'SIDER', 'fp_type': 'PharmacoErGFP', 'split-time': 0, 'test_roc_auc': 0.5684576401964313}


100%|##########| 14/14 [00:03<00:00,  3.89it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.5717948717948718, 0.5924242424242424, 0.4927007299270073, 0.5857228195937874, 0.5221014492753623, 0.5052083333333333, 0.543609022556391, 0.5712121212121212, 0.5270676691729324, 0.650039123630673, 0.613032581453634, 0.58046875, 0.6224867724867725, 0.5025423728813558, 0.5737577639751552, 0.6183114035087719, 0.47368421052631576, 0.559543918918919, 0.6024280999528524, 0.5032608695652174, 0.5577511473737888, 0.6056701030927836, 0.5486641221374046, 0.55901413982718, 0.655448717948718, 0.5180555555555555, 0.5784580498866214]
{'task_name': 'SIDER', 'fp_type': 'PharmacoErGFP', 'split-time': 1, 'test_roc_auc': 0.5642392208152507}


100%|##########| 14/14 [00:03<00:00,  3.91it/s]
  7%|7         | 1/14 [00:00<00:01,  7.32it/s]

[0.7953652788688138, 0.6707988980716254, 0.6579710144927536, 0.6772819472616632, 0.6782494365138995, 0.6238372093023256, 0.6281065088757397, 0.6007259528130671, 0.6477513227513227, 0.6987254901960784, 0.648989898989899, 0.5840236686390533, 0.6449029126213592, 0.6259040105193951, 0.7374718468468469, 0.7278925619834711, 0.5878099173553719, 0.4418699186991869, 0.5967703913248467, 0.6483936766955635, 0.6783777120315582, 0.7479829672792468, 0.440625, 0.6660749506903354, 0.7422330097087378, 0.7216145833333334, 0.6446601941747573]
{'task_name': 'SIDER', 'fp_type': 'PharmacoErGFP', 'split-time': 2, 'test_roc_auc': 0.6505337140755649}
SIDER PharmacoPFP
loading dataset: SIDER number of split times: 3


100%|##########| 14/14 [00:02<00:00,  6.18it/s]
  7%|7         | 1/14 [00:00<00:01,  7.35it/s]

[0.651017214397496, 0.48134863701578195, 0.6790780141843972, 0.6162473794549267, 0.6099380630630631, 0.5664780763790666, 0.6574803149606299, 0.6814516129032258, 0.6232456140350878, 0.6061143984220907, 0.6837072921978582, 0.40237691001697795, 0.6159779614325069, 0.6226666666666667, 0.6281234064252932, 0.7018563851489188, 0.659171597633136, 0.5187353629976581, 0.6664399092970521, 0.610832137733142, 0.5153518601523981, 0.5941115702479339, 0.6045112781954888, 0.5588353413654619, 0.5760963960052106, 0.5947342519685039, 0.5925925925925927]
{'task_name': 'SIDER', 'fp_type': 'PharmacoPFP', 'split-time': 0, 'test_roc_auc': 0.6043896386997246}


100%|##########| 14/14 [00:02<00:00,  6.21it/s]
  7%|7         | 1/14 [00:00<00:01,  7.56it/s]

[0.673767258382643, 0.5323691460055097, 0.4124087591240876, 0.6985264834727201, 0.6865942028985508, 0.6057150900900902, 0.7323308270676692, 0.6400826446280992, 0.593358395989975, 0.7131259780907667, 0.6585213032581454, 0.5361979166666666, 0.7777777777777778, 0.5716949152542373, 0.6531055900621117, 0.6180921052631579, 0.6218045112781954, 0.5579954954954955, 0.6623055162659123, 0.565527950310559, 0.6495410504844467, 0.6646122814881219, 0.6603053435114504, 0.6415946582875098, 0.6421351084812623, 0.6273148148148148, 0.696938775510204]
{'task_name': 'SIDER', 'fp_type': 'PharmacoPFP', 'split-time': 1, 'test_roc_auc': 0.6331016259244512}


100%|##########| 14/14 [00:02<00:00,  6.20it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.8168695993715631, 0.7191230486685033, 0.55, 0.6408722109533469, 0.7227648384673178, 0.6856976744186047, 0.6914201183431953, 0.6194797338173019, 0.6388888888888888, 0.675, 0.744605142332415, 0.4792899408284023, 0.6802184466019418, 0.5631163708086784, 0.6219031531531531, 0.7546487603305785, 0.4731404958677686, 0.4207317073170732, 0.6138613861386137, 0.6481387047424783, 0.650887573964497, 0.7519049753473779, 0.5708333333333333, 0.6502958579881657, 0.6841019417475728, 0.6888020833333334, 0.7004854368932039]
{'task_name': 'SIDER', 'fp_type': 'PharmacoPFP', 'split-time': 2, 'test_roc_auc': 0.6465585712465669}
SIDER PubChemFP
loading dataset: SIDER number of split times: 3


100%|##########| 14/14 [00:05<00:00,  2.42it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.6779147104851331, 0.5873983739837398, 0.4929078014184397, 0.6269392033542978, 0.5817849099099099, 0.5756718528995757, 0.6481299212598426, 0.7270797962648556, 0.5612938596491228, 0.6538461538461539, 0.6536206017338093, 0.49002546689303905, 0.5651515151515152, 0.5353333333333334, 0.61907190209077, 0.6443288453692371, 0.5384615384615384, 0.6044106167056987, 0.6321995464852608, 0.5921807747489239, 0.5279022859704169, 0.5772727272727273, 0.5548872180451128, 0.6132530120481928, 0.5821754233608337, 0.5895669291338583, 0.5946733250104036]
{'task_name': 'SIDER', 'fp_type': 'PubChemFP', 'split-time': 0, 'test_roc_auc': 0.5943511720328053}


100%|##########| 14/14 [00:05<00:00,  2.44it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.726429980276134, 0.6298898071625344, 0.5650851581508517, 0.5695937873357227, 0.5434782608695653, 0.5240709459459459, 0.6849624060150376, 0.6789256198347109, 0.5578947368421053, 0.7150821596244131, 0.7759398496240603, 0.6286458333333333, 0.7207671957671957, 0.5283050847457627, 0.6295031055900622, 0.6555921052631579, 0.525563909774436, 0.4966216216216216, 0.6262376237623762, 0.5451863354037266, 0.6703212646608873, 0.6295383236216943, 0.6784351145038168, 0.6567164179104477, 0.6671597633136095, 0.4930555555555556, 0.6452380952380953]
{'task_name': 'SIDER', 'fp_type': 'PubChemFP', 'split-time': 1, 'test_roc_auc': 0.6210459282128468}


100%|##########| 14/14 [00:05<00:00,  2.41it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7698350353495679, 0.621097337006428, 0.5891304347826086, 0.6171399594320487, 0.6709241172051088, 0.6465116279069768, 0.608284023668639, 0.6742286751361162, 0.6023809523809522, 0.7187254901960785, 0.651744719926538, 0.5260355029585798, 0.5751213592233009, 0.5956607495069034, 0.6144425675675675, 0.6970041322314049, 0.5168732782369145, 0.5861788617886179, 0.6638378123526638, 0.6291432942376338, 0.6108234714003945, 0.7312864186463469, 0.5929687499999999, 0.6800788954635109, 0.7235436893203884, 0.7065104166666667, 0.5716019417475728]
{'task_name': 'SIDER', 'fp_type': 'PubChemFP', 'split-time': 2, 'test_roc_auc': 0.636707907938501}
SIDER MHFP6
loading dataset: SIDER number of split times: 3


100%|##########| 14/14 [00:15<00:00,  1.12s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.6922926447574335, 0.5823768531802965, 0.49645390070921985, 0.6680293501048218, 0.646677927927928, 0.6242338519566243, 0.671505905511811, 0.648132427843803, 0.5667763157894736, 0.6615384615384616, 0.6030086690464049, 0.5730050933786078, 0.593939393939394, 0.5431111111111111, 0.5954869964303926, 0.6820685434516524, 0.5846153846153845, 0.5593286494925838, 0.5937641723356009, 0.6092778574844573, 0.5822501120573733, 0.5829545454545455, 0.5586466165413534, 0.6049196787148595, 0.6327616152844115, 0.6481299212598426, 0.5520183104452767]
{'task_name': 'SIDER', 'fp_type': 'MHFP6', 'split-time': 0, 'test_roc_auc': 0.6058260855690046}


100%|##########| 14/14 [00:15<00:00,  1.12s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.6615384615384615, 0.5969696969696969, 0.4927007299270073, 0.653126244524094, 0.5295289855072464, 0.5905123873873874, 0.6661654135338346, 0.6090909090909091, 0.5531328320802005, 0.7197769953051643, 0.7012531328320801, 0.55703125, 0.6460317460317461, 0.524915254237288, 0.5872670807453416, 0.5922149122807017, 0.5586466165413533, 0.4925394144144144, 0.5826261197548326, 0.5563664596273292, 0.6542580316165222, 0.6531824294038547, 0.6774809160305343, 0.5906323644933228, 0.655448717948718, 0.525462962962963, 0.6850340136054421]
{'task_name': 'SIDER', 'fp_type': 'MHFP6', 'split-time': 1, 'test_roc_auc': 0.6041827436440905}


100%|##########| 14/14 [00:15<00:00,  1.12s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.7786724273369992, 0.6485307621671258, 0.5992753623188406, 0.6828600405679512, 0.7105559729526671, 0.687093023255814, 0.7979289940828402, 0.6367211131276467, 0.6226190476190476, 0.6912745098039216, 0.6425619834710744, 0.6100591715976331, 0.6651699029126215, 0.653353057199211, 0.5817849099099099, 0.7391528925619835, 0.5420110192837465, 0.4558943089430894, 0.6685525695426686, 0.6357725650178481, 0.743836291913215, 0.7185118780815777, 0.7049479166666666, 0.6659763313609468, 0.7226941747572816, 0.7617187500000001, 0.6923543689320387]
{'task_name': 'SIDER', 'fp_type': 'MHFP6', 'split-time': 2, 'test_roc_auc': 0.6688845683475692}
SIDER MAP4
loading dataset: SIDER number of split times: 3


100%|##########| 14/14 [00:15<00:00,  1.11s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.6421165884194053, 0.543400286944046, 0.49645390070921985, 0.5602725366876311, 0.537302927927928, 0.6392032060348892, 0.6323818897637795, 0.6277589134125636, 0.581578947368421, 0.6371794871794871, 0.6130800611932687, 0.5021222410865874, 0.5712121212121212, 0.5195555555555557, 0.5184854665986742, 0.6415748674010608, 0.6192307692307693, 0.5534738485558158, 0.5363945578231293, 0.5020325203252033, 0.5382115643209323, 0.5818181818181818, 0.6560150375939849, 0.630421686746988, 0.5928137212331741, 0.49950787401574803, 0.5764669163545568]
{'task_name': 'SIDER', 'fp_type': 'MAP4', 'split-time': 0, 'test_roc_auc': 0.5759283583523379}


100%|##########| 14/14 [00:15<00:00,  1.11s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.6538461538461539, 0.5757575757575758, 0.4927007299270073, 0.5595380326563122, 0.4878623188405797, 0.6587837837837838, 0.624812030075188, 0.5757575757575757, 0.5236842105263158, 0.6651995305164319, 0.6581453634085214, 0.6432291666666667, 0.6444444444444444, 0.5503389830508475, 0.5293478260869565, 0.6015350877192982, 0.5161654135338345, 0.5309684684684685, 0.5756718528995757, 0.5920807453416149, 0.5989291177970424, 0.6267368892873152, 0.6125954198473282, 0.6072270227808327, 0.6650641025641025, 0.6245370370370371, 0.732766439909297]
{'task_name': 'SIDER', 'fp_type': 'MAP4', 'split-time': 1, 'test_roc_auc': 0.5973231600937077}


100%|##########| 14/14 [00:15<00:00,  1.12s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

[0.6477808326787118, 0.5020661157024793, 0.4891304347826087, 0.5953346855983772, 0.40627347858752816, 0.6627906976744187, 0.6171597633136094, 0.5343315184513008, 0.6156084656084656, 0.6763725490196079, 0.6105371900826447, 0.5428994082840236, 0.6343446601941748, 0.5994411571334648, 0.4811373873873874, 0.641012396694215, 0.4820936639118457, 0.5727642276422764, 0.5313531353135313, 0.5525242223355431, 0.6008382642998028, 0.6267368892873151, 0.6765625, 0.6032544378698225, 0.5718446601941748, 0.6625000000000001, 0.6728155339805825]
{'task_name': 'SIDER', 'fp_type': 'MAP4', 'split-time': 2, 'test_roc_auc': 0.5855373435565893}
BACE MorganFP
loading dataset: BACE number of split times: 3


100%|##########| 14/14 [00:16<00:00,  1.21s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'MorganFP', 'split-time': 0, 'test_roc_auc': 0.8419384057971013}


100%|##########| 14/14 [00:16<00:00,  1.22s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'MorganFP', 'split-time': 1, 'test_roc_auc': 0.8419384057971013}


100%|##########| 14/14 [00:16<00:00,  1.21s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'MorganFP', 'split-time': 2, 'test_roc_auc': 0.8419384057971013}
BACE RDkitFP
loading dataset: BACE number of split times: 3


100%|##########| 14/14 [00:17<00:00,  1.22s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'RDkitFP', 'split-time': 0, 'test_roc_auc': 0.7547101449275362}


100%|##########| 14/14 [00:17<00:00,  1.22s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'RDkitFP', 'split-time': 1, 'test_roc_auc': 0.7547101449275362}


100%|##########| 14/14 [00:17<00:00,  1.22s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'RDkitFP', 'split-time': 2, 'test_roc_auc': 0.7547101449275362}
BACE AtomPairFP
loading dataset: BACE number of split times: 3


100%|##########| 14/14 [00:17<00:00,  1.22s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'AtomPairFP', 'split-time': 0, 'test_roc_auc': 0.8332427536231884}


100%|##########| 14/14 [00:17<00:00,  1.22s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'AtomPairFP', 'split-time': 1, 'test_roc_auc': 0.8332427536231884}


100%|##########| 14/14 [00:17<00:00,  1.24s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'AtomPairFP', 'split-time': 2, 'test_roc_auc': 0.8332427536231884}
BACE TorsionFP
loading dataset: BACE number of split times: 3


100%|##########| 14/14 [00:15<00:00,  1.15s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'TorsionFP', 'split-time': 0, 'test_roc_auc': 0.8503623188405798}


100%|##########| 14/14 [00:16<00:00,  1.18s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'TorsionFP', 'split-time': 1, 'test_roc_auc': 0.8503623188405798}


100%|##########| 14/14 [00:16<00:00,  1.22s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'TorsionFP', 'split-time': 2, 'test_roc_auc': 0.8503623188405798}
BACE AvalonFP
loading dataset: BACE number of split times: 3


100%|##########| 14/14 [00:17<00:00,  1.27s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'AvalonFP', 'split-time': 0, 'test_roc_auc': 0.8494565217391303}


100%|##########| 14/14 [00:18<00:00,  1.30s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'AvalonFP', 'split-time': 1, 'test_roc_auc': 0.8494565217391303}


100%|##########| 14/14 [00:18<00:00,  1.31s/it]
 21%|##1       | 3/14 [00:00<00:00, 25.36it/s]

{'task_name': 'BACE', 'fp_type': 'AvalonFP', 'split-time': 2, 'test_roc_auc': 0.8494565217391303}
BACE EstateFP
loading dataset: BACE number of split times: 3


100%|##########| 14/14 [00:00<00:00, 24.23it/s]
 21%|##1       | 3/14 [00:00<00:00, 27.88it/s]

{'task_name': 'BACE', 'fp_type': 'EstateFP', 'split-time': 0, 'test_roc_auc': 0.7966485507246377}


100%|##########| 14/14 [00:00<00:00, 23.97it/s]
 21%|##1       | 3/14 [00:00<00:00, 26.00it/s]

{'task_name': 'BACE', 'fp_type': 'EstateFP', 'split-time': 1, 'test_roc_auc': 0.7966485507246377}


100%|##########| 14/14 [00:00<00:00, 25.57it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'EstateFP', 'split-time': 2, 'test_roc_auc': 0.7966485507246377}
BACE MACCSFP
loading dataset: BACE number of split times: 3


100%|##########| 14/14 [00:01<00:00,  7.77it/s]
 14%|#4        | 2/14 [00:00<00:01, 10.12it/s]

{'task_name': 'BACE', 'fp_type': 'MACCSFP', 'split-time': 0, 'test_roc_auc': 0.8461956521739131}


100%|##########| 14/14 [00:01<00:00,  8.76it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'MACCSFP', 'split-time': 1, 'test_roc_auc': 0.8461956521739131}


100%|##########| 14/14 [00:01<00:00,  8.34it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'MACCSFP', 'split-time': 2, 'test_roc_auc': 0.8461956521739131}
BACE PharmacoErGFP
loading dataset: BACE number of split times: 3


100%|##########| 14/14 [00:03<00:00,  3.57it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'PharmacoErGFP', 'split-time': 0, 'test_roc_auc': 0.7660326086956523}


100%|##########| 14/14 [00:03<00:00,  3.57it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'PharmacoErGFP', 'split-time': 1, 'test_roc_auc': 0.7660326086956523}


100%|##########| 14/14 [00:03<00:00,  3.31it/s]
  7%|7         | 1/14 [00:00<00:01,  6.86it/s]

{'task_name': 'BACE', 'fp_type': 'PharmacoErGFP', 'split-time': 2, 'test_roc_auc': 0.7660326086956523}
BACE PharmacoPFP
loading dataset: BACE number of split times: 3


100%|##########| 14/14 [00:02<00:00,  6.25it/s]
  7%|7         | 1/14 [00:00<00:01,  8.09it/s]

{'task_name': 'BACE', 'fp_type': 'PharmacoPFP', 'split-time': 0, 'test_roc_auc': 0.7638586956521739}


100%|##########| 14/14 [00:02<00:00,  5.99it/s]
  7%|7         | 1/14 [00:00<00:01,  6.56it/s]

{'task_name': 'BACE', 'fp_type': 'PharmacoPFP', 'split-time': 1, 'test_roc_auc': 0.7638586956521739}


100%|##########| 14/14 [00:02<00:00,  5.97it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'PharmacoPFP', 'split-time': 2, 'test_roc_auc': 0.7638586956521739}
BACE PubChemFP
loading dataset: BACE number of split times: 3


100%|##########| 14/14 [00:06<00:00,  2.02it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'PubChemFP', 'split-time': 0, 'test_roc_auc': 0.8129528985507246}


100%|##########| 14/14 [00:06<00:00,  2.09it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'PubChemFP', 'split-time': 1, 'test_roc_auc': 0.8129528985507246}


100%|##########| 14/14 [00:06<00:00,  2.06it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'PubChemFP', 'split-time': 2, 'test_roc_auc': 0.8129528985507246}
BACE MHFP6
loading dataset: BACE number of split times: 3


100%|##########| 14/14 [00:19<00:00,  1.37s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'MHFP6', 'split-time': 0, 'test_roc_auc': 0.8248188405797101}


100%|##########| 14/14 [00:18<00:00,  1.36s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'MHFP6', 'split-time': 1, 'test_roc_auc': 0.8248188405797101}


100%|##########| 14/14 [00:19<00:00,  1.34s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'MHFP6', 'split-time': 2, 'test_roc_auc': 0.8248188405797101}
BACE MAP4
loading dataset: BACE number of split times: 3


100%|##########| 14/14 [00:19<00:00,  1.39s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'MAP4', 'split-time': 0, 'test_roc_auc': 0.7307971014492753}


100%|##########| 14/14 [00:18<00:00,  1.35s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'MAP4', 'split-time': 1, 'test_roc_auc': 0.7307971014492753}


100%|##########| 14/14 [00:18<00:00,  1.36s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BACE', 'fp_type': 'MAP4', 'split-time': 2, 'test_roc_auc': 0.7307971014492753}
BBBP MorganFP
loading dataset: BBBP number of split times: 3


100%|##########| 14/14 [00:29<00:00,  2.12s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'MorganFP', 'split-time': 0, 'test_roc_auc': 0.6226515078523942}


100%|##########| 14/14 [00:29<00:00,  2.15s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'MorganFP', 'split-time': 1, 'test_roc_auc': 0.6226515078523942}


100%|##########| 14/14 [00:29<00:00,  2.10s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'MorganFP', 'split-time': 2, 'test_roc_auc': 0.6226515078523942}
BBBP RDkitFP
loading dataset: BBBP number of split times: 3


100%|##########| 14/14 [00:30<00:00,  2.15s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'RDkitFP', 'split-time': 0, 'test_roc_auc': 0.6543501300703343}


100%|##########| 14/14 [00:29<00:00,  2.14s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'RDkitFP', 'split-time': 1, 'test_roc_auc': 0.6543501300703343}


100%|##########| 14/14 [00:29<00:00,  2.09s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'RDkitFP', 'split-time': 2, 'test_roc_auc': 0.6543501300703343}
BBBP AtomPairFP
loading dataset: BBBP number of split times: 3


100%|##########| 14/14 [00:29<00:00,  2.15s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'AtomPairFP', 'split-time': 0, 'test_roc_auc': 0.6577223239233067}


100%|##########| 14/14 [00:29<00:00,  2.09s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'AtomPairFP', 'split-time': 1, 'test_roc_auc': 0.6577223239233067}


100%|##########| 14/14 [00:30<00:00,  2.14s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'AtomPairFP', 'split-time': 2, 'test_roc_auc': 0.6577223239233067}
BBBP TorsionFP
loading dataset: BBBP number of split times: 3


100%|##########| 14/14 [00:26<00:00,  1.95s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'TorsionFP', 'split-time': 0, 'test_roc_auc': 0.6309374698911263}


100%|##########| 14/14 [00:26<00:00,  1.95s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'TorsionFP', 'split-time': 1, 'test_roc_auc': 0.6309374698911263}


100%|##########| 14/14 [00:26<00:00,  1.96s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'TorsionFP', 'split-time': 2, 'test_roc_auc': 0.6309374698911263}
BBBP AvalonFP
loading dataset: BBBP number of split times: 3


100%|##########| 14/14 [00:29<00:00,  2.13s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'AvalonFP', 'split-time': 0, 'test_roc_auc': 0.6141728490220638}


100%|##########| 14/14 [00:29<00:00,  2.13s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'AvalonFP', 'split-time': 1, 'test_roc_auc': 0.6141728490220638}


100%|##########| 14/14 [00:29<00:00,  2.13s/it]
 14%|#4        | 2/14 [00:00<00:00, 14.30it/s]

{'task_name': 'BBBP', 'fp_type': 'AvalonFP', 'split-time': 2, 'test_roc_auc': 0.6141728490220638}
BBBP EstateFP
loading dataset: BBBP number of split times: 3


100%|##########| 14/14 [00:01<00:00, 13.89it/s]
 14%|#4        | 2/14 [00:00<00:00, 15.13it/s]

{'task_name': 'BBBP', 'fp_type': 'EstateFP', 'split-time': 0, 'test_roc_auc': 0.6720300606994893}


100%|##########| 14/14 [00:00<00:00, 14.98it/s]
 14%|#4        | 2/14 [00:00<00:00, 13.71it/s]

{'task_name': 'BBBP', 'fp_type': 'EstateFP', 'split-time': 1, 'test_roc_auc': 0.6720300606994893}


100%|##########| 14/14 [00:00<00:00, 14.14it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'EstateFP', 'split-time': 2, 'test_roc_auc': 0.6720300606994893}
BBBP MACCSFP
loading dataset: BBBP number of split times: 3


100%|##########| 14/14 [00:02<00:00,  5.44it/s]
  7%|7         | 1/14 [00:00<00:02,  5.70it/s]

{'task_name': 'BBBP', 'fp_type': 'MACCSFP', 'split-time': 0, 'test_roc_auc': 0.6450525098757105}


100%|##########| 14/14 [00:02<00:00,  5.57it/s]
  7%|7         | 1/14 [00:00<00:02,  5.90it/s]

{'task_name': 'BBBP', 'fp_type': 'MACCSFP', 'split-time': 1, 'test_roc_auc': 0.6450525098757105}


100%|##########| 14/14 [00:02<00:00,  5.51it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'MACCSFP', 'split-time': 2, 'test_roc_auc': 0.6450525098757105}
BBBP PharmacoErGFP
loading dataset: BBBP number of split times: 3


100%|##########| 14/14 [00:06<00:00,  2.20it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'PharmacoErGFP', 'split-time': 0, 'test_roc_auc': 0.6619134791405723}


100%|##########| 14/14 [00:06<00:00,  2.23it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'PharmacoErGFP', 'split-time': 1, 'test_roc_auc': 0.6619134791405723}


100%|##########| 14/14 [00:06<00:00,  2.18it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'PharmacoErGFP', 'split-time': 2, 'test_roc_auc': 0.6619134791405723}
BBBP PharmacoPFP
loading dataset: BBBP number of split times: 3


100%|##########| 14/14 [00:03<00:00,  3.46it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'PharmacoPFP', 'split-time': 0, 'test_roc_auc': 0.6334425281819057}


100%|##########| 14/14 [00:03<00:00,  3.48it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'PharmacoPFP', 'split-time': 1, 'test_roc_auc': 0.6334425281819057}


100%|##########| 14/14 [00:03<00:00,  3.75it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'PharmacoPFP', 'split-time': 2, 'test_roc_auc': 0.6334425281819057}
BBBP PubChemFP
loading dataset: BBBP number of split times: 3


100%|##########| 14/14 [00:10<00:00,  1.32it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'PubChemFP', 'split-time': 0, 'test_roc_auc': 0.6654301955872435}


100%|##########| 14/14 [00:10<00:00,  1.33it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'PubChemFP', 'split-time': 1, 'test_roc_auc': 0.6654301955872435}


100%|##########| 14/14 [00:10<00:00,  1.29it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'PubChemFP', 'split-time': 2, 'test_roc_auc': 0.6654301955872435}
BBBP MHFP6
loading dataset: BBBP number of split times: 3


100%|##########| 14/14 [00:29<00:00,  2.14s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'MHFP6', 'split-time': 0, 'test_roc_auc': 0.6324790442239137}


100%|##########| 14/14 [00:29<00:00,  2.05s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'MHFP6', 'split-time': 1, 'test_roc_auc': 0.6324790442239137}


100%|##########| 14/14 [00:28<00:00,  2.08s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'MHFP6', 'split-time': 2, 'test_roc_auc': 0.6324790442239137}
BBBP MAP4
loading dataset: BBBP number of split times: 3


100%|##########| 14/14 [00:28<00:00,  2.04s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'MAP4', 'split-time': 0, 'test_roc_auc': 0.6182676558435303}


100%|##########| 14/14 [00:28<00:00,  2.01s/it]
  0%|          | 0/14 [00:00<?, ?it/s]

{'task_name': 'BBBP', 'fp_type': 'MAP4', 'split-time': 1, 'test_roc_auc': 0.6182676558435303}


100%|##########| 14/14 [00:27<00:00,  1.95s/it]


{'task_name': 'BBBP', 'fp_type': 'MAP4', 'split-time': 2, 'test_roc_auc': 0.6182676558435303}
HIV MorganFP
loading dataset: HIV number of split times: 3


100%|##########| 5/5 [1:18:24<00:00, 901.67s/it]


{'task_name': 'HIV', 'fp_type': 'MorganFP', 'split-time': 0, 'test_roc_auc': 0.7051845342706502}
HIV RDkitFP
loading dataset: HIV number of split times: 3


100%|##########| 5/5 [1:13:00<00:00, 875.97s/it]


{'task_name': 'HIV', 'fp_type': 'RDkitFP', 'split-time': 0, 'test_roc_auc': 0.74650340871782}
HIV AtomPairFP
loading dataset: HIV number of split times: 3


100%|##########| 5/5 [1:11:53<00:00, 863.00s/it]


{'task_name': 'HIV', 'fp_type': 'AtomPairFP', 'split-time': 0, 'test_roc_auc': 0.6956671623631202}
HIV TorsionFP
loading dataset: HIV number of split times: 3


100%|##########| 5/5 [1:04:46<00:00, 782.51s/it]


{'task_name': 'HIV', 'fp_type': 'TorsionFP', 'split-time': 0, 'test_roc_auc': 0.7046679155642249}
HIV AvalonFP
loading dataset: HIV number of split times: 3


100%|##########| 5/5 [1:11:15<00:00, 854.90s/it]
  0%|          | 0/5 [00:00<?, ?it/s]

{'task_name': 'HIV', 'fp_type': 'AvalonFP', 'split-time': 0, 'test_roc_auc': 0.6986413410842234}
HIV EstateFP
loading dataset: HIV number of split times: 3


100%|##########| 5/5 [01:39<00:00, 19.91s/it]
  0%|          | 0/5 [00:00<?, ?it/s]

{'task_name': 'HIV', 'fp_type': 'EstateFP', 'split-time': 0, 'test_roc_auc': 0.6875943915486973}
HIV MACCSFP
loading dataset: HIV number of split times: 3


100%|##########| 5/5 [05:40<00:00, 67.97s/it]


{'task_name': 'HIV', 'fp_type': 'MACCSFP', 'split-time': 0, 'test_roc_auc': 0.6807808184785338}
HIV PharmacoErGFP
loading dataset: HIV number of split times: 3


100%|##########| 5/5 [14:26<00:00, 173.48s/it]
  0%|          | 0/5 [00:00<?, ?it/s]

{'task_name': 'HIV', 'fp_type': 'PharmacoErGFP', 'split-time': 0, 'test_roc_auc': 0.7561144479422159}
HIV PharmacoPFP
loading dataset: HIV number of split times: 3


100%|##########| 5/5 [06:13<00:00, 75.15s/it]


{'task_name': 'HIV', 'fp_type': 'PharmacoPFP', 'split-time': 0, 'test_roc_auc': 0.7076401630004443}
HIV PubChemFP
loading dataset: HIV number of split times: 3


100%|##########| 5/5 [25:22<00:00, 304.17s/it]


{'task_name': 'HIV', 'fp_type': 'PubChemFP', 'split-time': 0, 'test_roc_auc': 0.7147105969601577}
HIV MHFP6
loading dataset: HIV number of split times: 3


100%|##########| 5/5 [1:12:53<00:00, 874.68s/it]


{'task_name': 'HIV', 'fp_type': 'MHFP6', 'split-time': 0, 'test_roc_auc': 0.7592218853203035}
HIV MAP4
loading dataset: HIV number of split times: 3


100%|##########| 5/5 [1:13:03<00:00, 876.44s/it]


{'task_name': 'HIV', 'fp_type': 'MAP4', 'split-time': 0, 'test_roc_auc': 0.6985148419243322}


In [7]:
pd.DataFrame(performance).to_csv('./knn_classification.csv')

In [8]:
pd.DataFrame(performance).groupby(['task_name', 'fp_type'])[['test_roc_auc']].apply(np.mean)

Unnamed: 0_level_0,Unnamed: 1_level_0,test_roc_auc
task_name,fp_type,Unnamed: 2_level_1
BACE,AtomPairFP,0.833243
BACE,AvalonFP,0.849457
BACE,EstateFP,0.796649
BACE,MACCSFP,0.846196
BACE,MAP4,0.730797
BACE,MHFP6,0.824819
BACE,MorganFP,0.841938
BACE,PharmacoErGFP,0.766033
BACE,PharmacoPFP,0.763859
BACE,PubChemFP,0.812953
