In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
from sklearn.cross_validation import KFold, StratifiedKFold

In [4]:
from scipy.stats import pearsonr, spearmanr, kendalltau

In [5]:
import oddt
from oddt.scoring.functions import rfscore
from oddt.scoring import ensemble_model, scorer
from oddt.scoring.models.regressors import randomforest, svm, neuralnetwork
from oddt.metrics import enrichment_factor, roc_auc, roc_log_auc
from sklearn.metrics import accuracy_score, precision_score, mean_squared_error, recall_score
from sklearn.utils import compute_sample_weight

In [6]:
# from cluster_helper import cluster
# cluster.dill = dill

In [7]:
# from cluster_helper.cluster import cluster_view

In [8]:
dude_ids = ['aa2ar', 'abl1', 'ace', 'aces', 'ada', 'ada17', 'adrb1', 'adrb2', 'akt1', 'akt2', 'aldr', 'ampc', 'andr', 'aofb', 'bace1', 'braf', 'cah2', 'casp3', 'cdk2', 'comt', 'cp2c9', 'cp3a4', 'csf1r', 'cxcr4', 'def', 'dhi1', 'dpp4', 'drd3', 'dyr', 'egfr', 'esr1', 'esr2', 'fa10', 'fa7', 'fabp4', 'fak1', 'fgfr1', 'fkb1a', 'fnta', 'fpps', 'gcr', 'glcm', 'gria2', 'grik1', 'hdac2', 'hdac8', 'hivint', 'hivpr', 'hivrt', 'hmdh', 'hs90a', 'hxk4', 'igf1r', 'inha', 'ital', 'jak2', 'kif11', 'kit', 'kith', 'kpcb', 'lck', 'lkha4', 'mapk2', 'mcr', 'met', 'mk01', 'mk10', 'mk14', 'mmp13', 'mp2k1', 'nos1', 'nram', 'pa2ga', 'parp1', 'pde5a', 'pgh1', 'pgh2', 'plk1', 'pnph', 'ppara', 'ppard', 'pparg', 'prgr', 'ptn1', 'pur2', 'pygm', 'pyrd', 'reni', 'rock1', 'rxra', 'sahh', 'src', 'tgfr1', 'thb', 'thrb', 'try1', 'tryb1', 'tysy', 'urok', 'vgfr2', 'wee1', 'xiap']
#dude_ids = ['lck', 'src', 'ada17', 'hivpr', 'mk14', 'mmp13', 'aa2ar', 'bace1', 'pparg', 'parp1', 'ace', 'thrb', 'cdk2', 'esr1', 'esr2', 'vgfr2', 'fnta', 'drd3', 'csf1r', 'dhi1', 'casp3', 'gria2', 'kit', 'dyr', 'braf', 'tryb1', 'hdac8', 'aldr', 'akt1', 'ital', 'kpcb', 'tysy', 'ppard', 'hivint', 'ppara', 'urok', 'wee1', 'reni', 'grik1', 'aces', 'fa10', 'dpp4', 'adrb2', 'jak2', 'hivrt', 'fkb1a', 'cah2', 'kif11', 'try1', 'adrb1', 'akt2', 'rock1', 'pa2ga', 'pygm', 'mapk2', 'fa7', 'tgfr1', 'mk10', 'fak1', 'gcr', 'hdac2', 'prgr', 'ptn1', 'nram', 'abl1', 'hs90a', 'egfr', 'hxk4', 'mk01', 'cxcr4', 'lkha4', 'ada', 'pur2', 'pnph', 'andr', 'rxra', 'fpps', 'cp3a4', 'met', 'ampc', 'mp2k1', 'pyrd', 'pgh1', 'kith', 'thb', 'comt', 'cp2c9', 'aofb', 'fabp4', 'mcr', 'inha', 'pgh2', 'def', 'xiap', 'glcm', 'pde5a', 'nos1', 'sahh', 'hmdh', 'igf1r', 'plk1', 'fgfr1']

In [9]:
np.__version__

'1.10.4'

In [10]:
%%time
out = []
n_folds = 5
for engine, v in [(e, v) for e in ['dude', 'dock', 'vina'] for v in [1,2,3]]:
# for engine, v in [(e, v) for e in ['vina'] for v in [3]]:

    if v == 1:
        col_range = range(1,37)
        np_type = np.uint16
    elif v == 2:
        col_range = range(1,217)
        np_type = np.uint16
    elif v == 3:
        col_range = range(1,43)   
        np_type = np.float16

    act_cutoff = 6.
    inactive_cutoff = 5.95

    train = []
    test = []
    for n in range(n_folds):
        train.append([])
        test.append([])

    for dude_id in dude_ids:
        # trap errors on reading
        try:
            actives_full = pd.read_csv('head1_full/%s/%s/%s_desc_v%i.csv.gz' % (dude_id, engine, 'actives', v), dtype={i: np_type for i in col_range})
            decoys_full = pd.read_csv('head1_full/%s/%s/%s_desc_v%i.csv.gz' % (dude_id, engine, 'decoys', v), dtype={i: np_type for i in col_range})
        except:
            continue

        #trap empty ones
        if len(actives_full) == 0 or len(decoys_full) == 0:
            continue

        decoys_full['act'] = inactive_cutoff if act_cutoff > 10 else 10**(9-inactive_cutoff)

        # generate one big table for dude_id
        data = pd.concat((actives_full, decoys_full))

        # normalize
        if act_cutoff >= 10:
            data['act'] = np.clip(data['act'], 1e-9, inactive_cutoff)
        else:
            data['act'] = np.clip(-np.log10(np.clip(data['act'], 1e-9, 1e9) * 1e-9), 0 , 15)
        # binary
        data['act_bin'] = data['act'] < act_cutoff if act_cutoff > 10 else data['act'] > act_cutoff# 1e-6 # close to zero

        # manual CV, use Stratified for even distribution
        oddt.random_seed(0)
        cv_split = StratifiedKFold(data['act_bin'], n_folds=n_folds, shuffle=True)
        for n, (cv_train, cv_test) in enumerate(cv_split):
            if len(train[n]) == 0:
                train[n] = data.iloc[cv_train]
            else:
                train[n] = pd.concat((train[n], data.iloc[cv_train]))

            if len(test[n]) == 0:
                test[n] = data.iloc[cv_test]
            else:
                test[n] = pd.concat((test[n], data.iloc[cv_test]))

    if v == 2:
        mtry = 100
    else:
        mtry = 15

    rfs = []
    for n in range(n_folds):
        oddt.random_seed(0)
        # Random Forest
        rfs.append(randomforest(n_estimators=500, n_jobs=-1, verbose=1,
                                max_features=mtry, 
                                oob_score=True,
                                bootstrap = True,
                                random_state = 0,
                               ))
        # non-parallel
        rfs[n].fit(train[n][col_range], 
                   train[n]['act'],
                   sample_weight=compute_sample_weight('balanced', train[n]['act_bin']))



    test_dfs = []
    #print 'engine: %s\tv: %i\t head: %i' % (engine, v, head)
    for n in range(n_folds):
        rfs[n].verbose = 0
        test[n]['pred'] = rfs[n].predict(test[n][col_range])
        test[n]['pred_bin'] = test[n]['pred']  < act_cutoff if act_cutoff > 10 else test[n]['pred'] > act_cutoff

        test_df = test[n].sort_values('pred', ascending=(act_cutoff >= 10))# log is descending                                        
        test_dfs.append(test_df)

    for i, (dude_id, test_df) in enumerate(pd.concat(test_dfs).groupby('dude_id')): 
        print i, dude_id, len(test_df)
        test_df = test_df.sort_values('pred', ascending=(act_cutoff >= 10))
        if test_df['act_bin'].sum() == 0 or len(test_df) == 0:
            continue
        d = {'engine': engine,
             'v': v,
             'dude_id': dude_id,
             'roc_auc': roc_auc(test_df['act_bin'], test_df['pred_bin'], ascending_score=False),# Binary is descending


             'n_1perc': len(test_df['act_bin'])/100,
             'n_100perc': len(test_df['act_bin']),
             'a_1perc': test_df['act_bin'][:len(test_df['act_bin'])/100].sum(),
             'a_100perc': test_df['act_bin'].sum(),
             'hitrate_1perc': float(test_df['act_bin'][:len(test_df['act_bin'])/100].sum())/float(len(test_df['act_bin'])/100),
             'hitrate_100perc': float(test_df['act_bin'].sum())/float(len(test_df['act_bin'])),

             'ef1_perc': enrichment_factor(test_df['act_bin'], test_df['pred_bin'], kind='percentage'),
             'ef0.1': enrichment_factor(test_df['act_bin'], test_df['pred_bin'], percentage=0.1),
             'ef1': enrichment_factor(test_df['act_bin'], test_df['pred_bin']),
             'ef2': enrichment_factor(test_df['act_bin'], test_df['pred_bin'], percentage=2),
             'ef5': enrichment_factor(test_df['act_bin'], test_df['pred_bin'], percentage=5),
             'ef10': enrichment_factor(test_df['act_bin'], test_df['pred_bin'], percentage=10),

             'rp': pearsonr(test_df['act'], test_df['pred'])[0],
             'rs': spearmanr(test_df['act'], test_df['pred'])[0],
             'rk': kendalltau(test_df['act'], test_df['pred'])[0],

             'rp_active': pearsonr(test_df['act'][test_df['act_bin']], test_df['pred'][test_df['act_bin']])[0],
             'rs_active': spearmanr(test_df['act'][test_df['act_bin']], test_df['pred'][test_df['act_bin']])[0],
             'rk_active': kendalltau(test_df['act'][test_df['act_bin']], test_df['pred'][test_df['act_bin']])[0],

             'rp_inactive': pearsonr(test_df['act'][~test_df['act_bin']], test_df['pred'][~test_df['act_bin']])[0],
             'rs_inactive': spearmanr(test_df['act'][~test_df['act_bin']], test_df['pred'][~test_df['act_bin']])[0],
             'rk_inactive': kendalltau(test_df['act'][~test_df['act_bin']], test_df['pred'][~test_df['act_bin']])[0],

             'mse': mean_squared_error(test_df['act'], test_df['pred']),
             'roc_log_auc': roc_log_auc(test_df['act_bin'], test_df['pred_bin'], ascending_score=False),# Binary is descending
             'precision': precision_score(test_df['act_bin'], test_df['pred_bin']),
             'accuracy': accuracy_score(test_df['act_bin'], test_df['pred_bin']),
             'recall': recall_score(test_df['act_bin'], test_df['pred_bin']),
            }
        #print d
        out.append(d)

pd.DataFrame(out).to_csv('dude_horizontal_multitarget.csv')

[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  3.8min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  4.7min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  3.9min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  4.8min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  4.1min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  5.0min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  4.0min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  4.9min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  4.0min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  5.0min finished


0 aa2ar 31764
1 abl1 10791
2 ace 17079
3 aces 22826
4 ada 5520
5 ada17 36256
6 adrb1 15374
7 adrb2 13459
8 akt1 16340
9 akt2 6887
10 aldr 9085
11 ampc 2877
12 andr 13286
13 aofb 6851
14 bace1 18307
15 braf 10056
16 cah2 31475
17 casp3 10873
18 cdk2 28233
19 comt 3866
20 cp2c9 7366
21 cp3a4 11931
22 csf1r 12255
23 cxcr4 3441
24 def 5788
25 dhi1 19438
26 dpp4 41315
27 drd3 33842
28 dyr 17330
29 egfr 35411
30 esr1 20792
31 esr2 20454
32 fa10 28686
33 fa7 6328
34 fabp4 2731
35 fak1 5416
36 fgfr1 7920
37 fkb1a 5905
38 fnta 51863
39 fpps 8616
40 gcr 14352
41 glcm 3793
42 gria2 11846
43 grik1 6581
44 hdac2 10337
45 hdac8 10594
46 hivint 6730
47 hivpr 36104
48 hivrt 18628
49 hmdh 8896
50 hs90a 4900
51 hxk4 4477
52 igf1r 9416
53 inha 2336
54 ital 4076
55 jak2 6557
56 kif11 6949
57 kit 10570
58 kith 2902
59 kpcb 8777
60 lck 27703
61 lkha4 7615
62 mapk2 6215
63 mcr 4208
64 met 11373
65 mk01 4503
66 mk10 6681
67 mk14 36297
68 mmp13 37604
69 mp2k1 8178
70 nos1 8101
71 nram 6280
72 pa2ga 5238
73 par

[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  8.3min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed: 22.1min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 27.8min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  8.3min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed: 21.9min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 27.4min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  8.2min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed: 22.2min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 27.7min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  8.5min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed: 21.9min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 27.5min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  8.1min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed: 21.8min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 27.2min finished


0 aa2ar 31764
1 abl1 10791
2 ace 17079
3 aces 22826
4 ada 5520
5 ada17 36256
6 adrb1 15374
7 adrb2 13459
8 akt1 16340
9 akt2 6887
10 aldr 9085
11 ampc 2877
12 andr 13286
13 aofb 6851
14 bace1 18307
15 braf 10056
16 cah2 31475
17 casp3 10873
18 cdk2 28233
19 comt 3866
20 cp2c9 7366
21 cp3a4 11931
22 csf1r 12255
23 cxcr4 3441
24 def 5788
25 dhi1 19438
26 dpp4 41315
27 drd3 33842
28 dyr 17330
29 egfr 35411
30 esr1 20792
31 esr2 20454
32 fa10 28686
33 fa7 6328
34 fabp4 2731
35 fak1 5416
36 fgfr1 7920
37 fkb1a 5905
38 fnta 51863
39 fpps 8616
40 gcr 14352
41 glcm 3793
42 gria2 11846
43 grik1 6581
44 hdac2 10337
45 hdac8 10594
46 hivint 6730
47 hivpr 36104
48 hivrt 18628
49 hmdh 8896
50 hs90a 4900
51 hxk4 4477
52 igf1r 9416
53 inha 2336
54 ital 4076
55 jak2 6557
56 kif11 6949
57 kit 10570
58 kith 2902
59 kpcb 8777
60 lck 27703
61 lkha4 7615
62 mapk2 6215
63 mcr 4208
64 met 11373
65 mk01 4503
66 mk10 6681
67 mk14 36297
68 mmp13 37604
69 mp2k1 8178
70 nos1 8101
71 nram 6280
72 pa2ga 5238
73 par

[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  5.8min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  4.5min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  5.7min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  4.5min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  5.7min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  4.4min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  5.5min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  4.5min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  5.6min finished


0 aa2ar 31764
1 abl1 10791
2 ace 17079
3 aces 22826
4 ada 5520
5 ada17 36256
6 adrb1 15374
7 adrb2 13459
8 akt1 16340
9 akt2 6887
10 aldr 9085
11 ampc 2877
12 andr 13286
13 aofb 6851
14 bace1 18307
15 braf 10056
16 cah2 31475
17 casp3 10873
18 cdk2 28233
19 comt 3866
20 cp2c9 7366
21 cp3a4 11931
22 csf1r 12255
23 cxcr4 3441
24 def 5788
25 dhi1 19438
26 dpp4 41315
27 drd3 33842
28 dyr 17330
29 egfr 35411
30 esr1 20792
31 esr2 20454
32 fa10 28686
33 fa7 6328
34 fabp4 2731
35 fak1 5416
36 fgfr1 7920
37 fkb1a 5905
38 fnta 51863
39 fpps 8616
40 gcr 14352
41 glcm 3793
42 gria2 11846
43 grik1 6581
44 hdac2 10337
45 hdac8 10594
46 hivint 6730
47 hivpr 36104
48 hivrt 18628
49 hmdh 8896
50 hs90a 4900
51 hxk4 4477
52 igf1r 9416
53 inha 2336
54 ital 4076
55 jak2 6557
56 kif11 6949
57 kit 10570
58 kith 2902
59 kpcb 8777
60 lck 27703
61 lkha4 7615
62 mapk2 6215
63 mcr 4208
64 met 11373
65 mk01 4503
66 mk10 6681
67 mk14 36297
68 mmp13 37604
69 mp2k1 8178
70 nos1 8101
71 nram 6280
72 pa2ga 5238
73 par

[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:   59.0s
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  3.2min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:   54.8s
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  3.0min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:   55.6s
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  3.0min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:   56.5s
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  3.1min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:   55.4s
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  3.2min finished


0 aa2ar 30840
1 abl1 10462
2 ace 16612
3 aces 25576
4 ada 5451
5 ada17 35010
6 adrb1 15580
7 adrb2 14578
8 akt1 16214
9 akt2 6784
10 aldr 8857
11 ampc 2809
12 andr 12809
13 aofb 6773
14 bace1 17768
15 braf 9743
16 cah2 29341
17 casp3 10596
18 cdk2 21580
19 comt 2422
20 csf1r 10536
21 cxcr4 3391
22 dhi1 19084
23 drd3 33627
24 dyr 16813
25 esr1 20382
26 esr2 19887
27 fa10 1344
28 fa7 3767
29 fabp4 2615
30 fak1 5326
31 fkb1a 5670
32 fpps 8604
33 gcr 14327
34 glcm 3718
35 gria2 11450
36 grik1 6527
37 hdac2 10209
38 hdac8 10431
39 hivint 6523
40 hivpr 23642
41 hivrt 9176
42 hs90a 4716
43 hxk4 4622
44 inha 2291
45 ital 8254
46 jak2 6406
47 kif11 6782
48 kit 10215
49 kith 2842
50 kpcb 8414
51 lck 26746
52 lkha4 8973
53 mapk2 6031
54 mcr 4463
55 mk01 3853
56 mk10 6458
57 mk14 35047
58 mmp13 24459
59 mp2k1 5179
60 nram 6102
61 pa2ga 5097
62 parp1 29617
63 pgh1 559
64 pnph 3995
65 ppara 18723
66 ppard 11284
67 pparg 24523
68 prgr 14377
69 ptn1 6981
70 pur2 2659
71 pygm 3858
72 pyrd 6235
73 reni 

[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  5.0min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed: 13.2min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 16.5min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  4.8min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed: 13.2min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 16.6min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  4.8min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed: 12.8min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 16.0min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  5.1min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed: 13.3min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 16.7min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  4.7min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed: 12.5min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 15.8min finished


0 aa2ar 30840
1 abl1 10462
2 ace 16612
3 aces 25576
4 ada 5451
5 ada17 35010
6 adrb1 15580
7 adrb2 14578
8 akt1 16214
9 akt2 6784
10 aldr 8857
11 ampc 2809
12 andr 12809
13 aofb 6773
14 bace1 17768
15 braf 9743
16 cah2 29341
17 casp3 10596
18 cdk2 21580
19 comt 2422
20 csf1r 10536
21 cxcr4 3391
22 dhi1 19084
23 drd3 33627
24 dyr 16813
25 esr1 20382
26 esr2 19887
27 fa10 1344
28 fa7 3767
29 fabp4 2615
30 fak1 5326
31 fkb1a 5670
32 fpps 8604
33 gcr 14327
34 glcm 3718
35 gria2 11450
36 grik1 6527
37 hdac2 10209
38 hdac8 10431
39 hivint 6523
40 hivpr 23642
41 hivrt 9176
42 hs90a 4716
43 hxk4 4622
44 inha 2291
45 ital 8254
46 jak2 6406
47 kif11 6782
48 kit 10215
49 kith 2842
50 kpcb 8414
51 lck 26746
52 lkha4 8973
53 mapk2 6031
54 mcr 4463
55 mk01 3853
56 mk10 6458
57 mk14 35047
58 mmp13 24459
59 mp2k1 5179
60 nram 6102
61 pa2ga 5097
62 parp1 29617
63 pgh1 559
64 pnph 3995
65 ppara 18723
66 ppard 11284
67 pparg 24523
68 prgr 14377
69 ptn1 6981
70 pur2 2659
71 pygm 3858
72 pyrd 6235
73 reni 

[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.9min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  3.6min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  3.5min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  3.5min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  3.5min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  3.5min finished


0 aa2ar 30840
1 abl1 10462
2 ace 16612
3 aces 25576
4 ada 5451
5 ada17 35010
6 adrb1 15580
7 adrb2 14578
8 akt1 16214
9 akt2 6784
10 aldr 8857
11 ampc 2809
12 andr 12809
13 aofb 6773
14 bace1 17768
15 braf 9743
16 cah2 29341
17 casp3 10596
18 cdk2 21580
19 comt 2422
20 csf1r 10536
21 cxcr4 3391
22 dhi1 19084
23 drd3 33627
24 dyr 16813
25 esr1 20382
26 esr2 19887
27 fa10 1344
28 fa7 3767
29 fabp4 2615
30 fak1 1668
31 fkb1a 5670
32 fpps 8604
33 gcr 14327
34 glcm 3718
35 gria2 11450
36 grik1 6527
37 hdac2 10209
38 hdac8 10431
39 hivint 6523
40 hivpr 23642
41 hivrt 9176
42 hs90a 4716
43 hxk4 4622
44 inha 2291
45 ital 8254
46 jak2 6406
47 kif11 6782
48 kit 9670
49 kith 2842
50 kpcb 8414
51 lck 26746
52 lkha4 8973
53 mapk2 6031
54 mcr 4463
55 mk01 3853
56 mk10 6458
57 mk14 35047
58 mmp13 24459
59 mp2k1 5179
60 nram 6102
61 pa2ga 5097
62 parp1 29617
63 pgh1 559
64 pnph 3995
65 ppara 18723
66 ppard 11284
67 pparg 24523
68 prgr 14377
69 ptn1 6981
70 pur2 2659
71 pygm 3858
72 pyrd 6235
73 reni 6

[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:   50.1s
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  2.8min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:   48.1s
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.1min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  2.7min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:   48.5s
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  2.7min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:   49.1s
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  2.8min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:   48.5s
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  2.7min finished


0 aa2ar 18523
1 abl1 5028
2 ace 16241
3 aces 7676
4 ada 5360
5 ada17 34210
6 adrb1 706
7 adrb2 14032
8 akt1 15640
9 akt2 6797
10 aldr 8374
11 ampc 2756
12 andr 8296
13 aofb 6519
14 bace1 16336
15 braf 9898
16 cah2 2219
17 casp3 10159
18 cdk2 5033
19 comt 3863
20 cp2c9 7492
21 cp3a4 11081
22 csf1r 10886
23 cxcr4 3440
24 def 5209
25 dhi1 13860
26 dpp4 26727
27 drd3 1227
28 dyr 13494
29 egfr 11420
30 esr1 19207
31 esr2 18384
32 fa10 18871
33 fa7 5932
34 fabp4 1077
35 fak1 5105
36 fkb1a 5523
37 fnta 48427
38 fpps 4089
39 gcr 5004
40 glcm 720
41 gria2 5061
42 grik1 6465
43 hdac2 7420
44 hdac8 5966
45 hivint 6191
46 hivpr 33032
47 hivrt 17831
48 hmdh 1090
49 hs90a 4774
50 hxk4 4313
51 igf1r 338
52 inha 2335
53 ital 2714
54 jak2 6397
55 kif11 525
56 kit 10457
57 kith 2498
58 kpcb 1509
59 lck 26769
60 lkha4 9228
61 mapk2 4629
62 mcr 5086
63 met 10302
64 mk01 4427
65 mk10 359
66 mk14 11054
67 mmp13 33779
68 nos1 2334
69 nram 1937
70 pa2ga 4883
71 parp1 15889
72 pgh1 10581
73 pgh2 6134
74 pnph 7

[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed: 12.0min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 15.1min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  4.5min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed: 11.7min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 14.7min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed: 12.0min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 15.2min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed: 11.5min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 14.5min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  4.4min
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed: 11.7min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 14.7min finished


0 aa2ar 18523
1 abl1 5028
2 ace 16241
3 aces 7676
4 ada 5360
5 ada17 34210
6 adrb1 706
7 adrb2 14032
8 akt1 15640
9 akt2 6797
10 aldr 8374
11 ampc 2756
12 andr 8296
13 aofb 6519
14 bace1 16336
15 braf 9898
16 cah2 2219
17 casp3 10159
18 cdk2 5033
19 comt 3863
20 cp2c9 7492
21 cp3a4 11081
22 csf1r 10886
23 cxcr4 3440
24 def 5209
25 dhi1 13860
26 dpp4 26727
27 drd3 1227
28 dyr 13494
29 egfr 11420
30 esr1 19207
31 esr2 18384
32 fa10 18871
33 fa7 5932
34 fabp4 1077
35 fak1 5105
36 fkb1a 5523
37 fnta 48427
38 fpps 4089
39 gcr 5004
40 glcm 720
41 gria2 5061
42 grik1 6465
43 hdac2 7420
44 hdac8 5966
45 hivint 6191
46 hivpr 33032
47 hivrt 17831
48 hmdh 1090
49 hs90a 4774
50 hxk4 4313
51 igf1r 338
52 inha 2335
53 ital 2714
54 jak2 6397
55 kif11 525
56 kit 10457
57 kith 2498
58 kpcb 1509
59 lck 26769
60 lkha4 9228
61 mapk2 4629
62 mcr 5086
63 met 10302
64 mk01 4427
65 mk10 359
66 mk14 11054
67 mmp13 33779
68 nos1 2334
69 nram 1937
70 pa2ga 4883
71 parp1 15889
72 pgh1 10581
73 pgh2 6134
74 pnph 7

[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:   56.7s
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  3.2min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:   54.9s
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  3.1min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:   54.4s
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  3.1min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:   58.8s
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  3.2min finished
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:   58.2s
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  3.2min finished


0 aa2ar 18523
1 abl1 5028
2 ace 16241
3 aces 7676
4 ada 5360
5 ada17 34210
6 adrb1 706
7 adrb2 14032
8 akt1 15640
9 akt2 6797
10 aldr 8374
11 ampc 2756
12 andr 8296
13 aofb 6519
14 bace1 16336
15 braf 9898
16 cah2 2219
17 casp3 10159
18 cdk2 5033
19 comt 3863
20 cp2c9 7492
21 cp3a4 11081
22 csf1r 10886
23 cxcr4 3440
24 def 5209
25 dhi1 13860
26 dpp4 26727
27 drd3 1227
28 dyr 13494
29 egfr 11420
30 esr1 19207
31 esr2 18384
32 fa10 18871
33 fa7 5932
34 fabp4 1077
35 fak1 5105
36 fkb1a 5523
37 fnta 48427
38 fpps 4089
39 gcr 5004
40 glcm 720
41 gria2 5061
42 grik1 6465
43 hdac2 7420
44 hdac8 5966
45 hivint 6191
46 hivpr 33032
47 hivrt 17831
48 hmdh 1090
49 hs90a 4774
50 hxk4 4313
51 igf1r 338
52 inha 2335
53 ital 2714
54 jak2 6397
55 kif11 525
56 kit 10457
57 kith 2498
58 kpcb 1509
59 lck 26769
60 lkha4 9228
61 mapk2 4629
62 mcr 5086
63 met 10302
64 mk01 4427
65 mk10 359
66 mk14 11054
67 mmp13 33779
68 nos1 2334
69 nram 1937
70 pa2ga 4883
71 parp1 15889
72 pgh1 10581
73 pgh2 6134
74 pnph 7

In [11]:
pd.DataFrame(out).groupby(['engine', 'v']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,a_100perc,a_1perc,accuracy,ef0.1,ef1,ef10,ef1_perc,ef2,ef5,hitrate_100perc,...,rk_active,rk_inactive,roc_auc,roc_log_auc,rp,rp_active,rp_inactive,rs,rs_active,rs_inactive
engine,v,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
dock,1,127.117647,31.105882,0.899147,53.526838,23.562085,5.582535,0.263163,15.993852,8.730285,0.016435,...,0.125972,,0.72629,0.275734,0.29367,0.174772,-3.27738e-15,0.118765,0.17955,
dock,2,127.117647,38.647059,0.896099,63.522273,29.456727,6.298625,0.326007,19.348283,10.388531,0.016435,...,0.138704,,0.759806,0.287296,0.34053,0.19328,8.854873e-16,0.133245,0.199461,
dock,3,127.117647,38.564706,0.902571,65.131866,30.207068,6.280322,0.331921,19.741835,10.445709,0.016873,...,0.122754,,0.759908,0.292368,0.352278,0.173029,1.399573e-15,0.134114,0.17894,
dude,1,221.821782,68.128713,0.86087,49.563089,30.160983,6.546291,0.495157,20.228803,10.949892,0.016541,...,0.162721,,0.786451,0.260458,0.433531,0.228242,-2.700297e-16,0.161935,0.237922,
dude,2,221.821782,86.376238,0.852003,55.630863,38.733454,7.445475,0.632333,25.789798,13.027793,0.016541,...,0.177543,,0.827115,0.263897,0.514475,0.255212,-1.670536e-15,0.178004,0.258082,
dude,3,221.821782,83.0,0.871244,55.790865,36.662593,7.408376,0.598228,24.712408,12.726022,0.016541,...,0.167683,,0.825613,0.280144,0.505941,0.242682,6.262829e-16,0.177333,0.244113,
vina,1,159.030928,40.701031,0.838982,42.972438,26.037029,6.345728,0.537498,18.053006,10.145217,0.037142,...,0.158006,,0.78615,0.243784,0.438053,0.233519,-9.717489e-16,0.203236,0.232358,
vina,2,159.030928,51.226804,0.831418,52.192064,32.912191,7.223602,0.648503,22.472685,12.162953,0.037142,...,0.178877,,0.824481,0.247687,0.516533,0.261956,-2.110566e-15,0.221927,0.260101,
vina,3,159.030928,49.680412,0.849513,50.674311,32.006983,7.114702,0.626689,22.221425,11.837989,0.037142,...,0.162176,,0.823679,0.259601,0.497767,0.23061,4.489416e-16,0.220157,0.237265,
