In [1]:
import warnings, os
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
from glob import glob


import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from aggmap import AggMap, loadmap
import seaborn as sns


import xgboost as xgb
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import confusion_matrix, precision_recall_curve, roc_auc_score
from sklearn.metrics import auc as calculate_auc


## LGR, RF, KNN, ROF, XGBoost

In [2]:
from sklearn.ensemble import RandomForestClassifier
from rotation_forest import RotationForestClassifier

In [3]:
def score(dfr):
    y_true = dfr.y_true
    y_score = dfr.y_score
    y_pred = dfr.y_score.round()

    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()

    acc = (tp + tn) / sum([tn, fp, fn, tp])
    auc = roc_auc_score(y_true, y_score)
    ber =  (fp / (fp + tn) + fn / (tp + fn)) * 0.5

    print('acc: %.3f, roc-auc: %.3f, BER: %.3f' % (acc,auc,ber))

    return acc, auc


def get_best_params(X, y):
    clf = RotationForestClassifier()
    parameters = {'max_depth': [4, 6, 9],
                  'n_estimators': [10, 50, 100]}

    grid = GridSearchCV(clf, parameters, scoring='roc_auc', n_jobs=9, iid=False, cv=5, verbose=2)
    gres = grid.fit(X, y)
    return grid.best_params_

In [4]:
lst = glob('/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/*.h5')

n_fold = 5

res = []
for p in lst:
    
    print('#'*50 + ' %s ' % p + '#'*50 )
    dfx = pd.read_hdf(p, key = 'expression')
    dfy = pd.read_hdf(p, key = 'labels')
    print(p, dfy.value_counts().to_dict())

    outer = KFold(n_splits = n_fold, shuffle = True, random_state = 123)
    outer_idx = list(outer.split(dfx, dfy))

    fold_all = []
    for i, idx in enumerate(outer_idx):

        fold_num = "fold_%s" % str(i).zfill(2) 

        train_idx, test_idx = idx

        # subset feature selection by the fold change
        dfxy_train = dfx.iloc[train_idx].join(dfy.iloc[train_idx])
        abds = dfxy_train.groupby(dfy.name).mean()
        fc = abs(abds.iloc[0] - abds.iloc[1])
        selected = fc[fc > 0.5].index

        trainX = dfx[selected].iloc[train_idx].values
        trainY = dfy.iloc[train_idx].values

        testX = dfx[selected].iloc[test_idx].values
        testY = dfy.iloc[test_idx].values

        ## training
        print("\nInput train and test X shape is %s, %s  \n" % (trainX.shape,  testX.shape))
        print("Getting the best parameters by gridsearch \n")
        #best_params = get_best_params(trainX, trainY)
        best_params = {'n_estimators':50, 'n_features_per_subset':10}
        
        clf = RotationForestClassifier(**best_params, n_jobs=20, verbose=1)
        clf.fit(trainX, trainY)

        ## evaluation
        y_true = testY
        y_score = clf.predict_proba(testX)[:,1]
        dfr = pd.DataFrame([y_true, y_score]).T
        dfr.columns = ['y_true', 'y_score']
        dfr.index = dfy.iloc[test_idx].index
        acc, auc = score(dfr)
        
        fold_all.append(auc)
        res.append([auc, i, p, best_params])
        
    print('The auc score for %s is %s.' % (p, np.mean(fold_all)))


################################################## /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_LIHC_grade.h5 ##################################################
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_LIHC_grade.h5 {0: 250, 1: 124}

Input train and test X shape is (299, 2260), (75, 2260)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   18.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   43.7s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.7s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.9s finished


acc: 0.613, roc-auc: 0.583, BER: 0.424

Input train and test X shape is (299, 2854), (75, 2854)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   27.4s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   59.7s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.7s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.2s finished


acc: 0.533, roc-auc: 0.464, BER: 0.528

Input train and test X shape is (299, 1785), (75, 1785)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   15.7s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   37.2s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.2s finished


acc: 0.640, roc-auc: 0.525, BER: 0.429

Input train and test X shape is (299, 1909), (75, 1909)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   15.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   37.0s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.7s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.6s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


acc: 0.613, roc-auc: 0.543, BER: 0.468

Input train and test X shape is (300, 1627), (74, 1627)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   12.7s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   30.2s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.4s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.6s finished


acc: 0.595, roc-auc: 0.637, BER: 0.459
The auc score for /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_LIHC_grade.h5 is 0.5503683877434502.
################################################## /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_KIRC_grade.h5 ##################################################
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_KIRC_grade.h5 {0: 337, 1: 207}

Input train and test X shape is (435, 307), (109, 307)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    2.8s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    5.5s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.6s finished


acc: 0.532, roc-auc: 0.485, BER: 0.488

Input train and test X shape is (435, 173), (109, 173)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.8s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.3s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.5s finished


acc: 0.532, roc-auc: 0.491, BER: 0.492

Input train and test X shape is (435, 209), (109, 209)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.6s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.6s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.7s finished


acc: 0.541, roc-auc: 0.523, BER: 0.490

Input train and test X shape is (435, 284), (109, 284)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    2.4s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    4.7s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.9s finished


acc: 0.495, roc-auc: 0.489, BER: 0.546

Input train and test X shape is (436, 1027), (108, 1027)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    8.1s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   18.2s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.4s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.6s finished


acc: 0.565, roc-auc: 0.583, BER: 0.458
The auc score for /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_KIRC_grade.h5 is 0.5139632783180559.
################################################## /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_STAD_grade.h5 ##################################################
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_STAD_grade.h5 {1: 246, 0: 170}

Input train and test X shape is (332, 4322), (84, 4322)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   55.7s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  2.2min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.9s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    4.2s finished


acc: 0.643, roc-auc: 0.684, BER: 0.366

Input train and test X shape is (333, 1721), (83, 1721)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   13.4s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   34.0s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.1s finished


acc: 0.627, roc-auc: 0.670, BER: 0.374

Input train and test X shape is (333, 5304), (83, 5304)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:  1.3min
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  2.9min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    5.5s finished


acc: 0.627, roc-auc: 0.631, BER: 0.380

Input train and test X shape is (333, 3486), (83, 3486)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   40.6s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  1.6min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.7s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.5s finished


acc: 0.627, roc-auc: 0.634, BER: 0.397

Input train and test X shape is (333, 3015), (83, 3015)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   32.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  1.2min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.7s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.0s finished


acc: 0.614, roc-auc: 0.562, BER: 0.395
The auc score for /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_STAD_grade.h5 is 0.6362670411009275.
################################################## /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_UCEC_grade.h5 ##################################################
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_UCEC_grade.h5 {1: 324, 0: 230}

Input train and test X shape is (443, 5259), (111, 5259)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:  1.3min
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  3.2min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    5.8s finished


acc: 0.721, roc-auc: 0.770, BER: 0.281

Input train and test X shape is (443, 5302), (111, 5302)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:  1.4min
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  3.2min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    6.1s finished


acc: 0.748, roc-auc: 0.774, BER: 0.250

Input train and test X shape is (443, 5521), (111, 5521)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:  1.3min
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  3.2min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    6.1s finished


acc: 0.739, roc-auc: 0.726, BER: 0.287

Input train and test X shape is (443, 5512), (111, 5512)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:  1.4min
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  3.3min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    5.8s finished


acc: 0.748, roc-auc: 0.785, BER: 0.264

Input train and test X shape is (444, 5381), (110, 5381)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:  1.4min
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  3.4min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    6.0s finished


acc: 0.709, roc-auc: 0.798, BER: 0.286
The auc score for /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_UCEC_grade.h5 is 0.7705151800207262.
################################################## /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_PAAD_grade.h5 ##################################################
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_PAAD_grade.h5 {0: 131, 1: 48}

Input train and test X shape is (143, 2606), (36, 2606)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   18.1s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   40.4s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.6s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.3s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


acc: 0.722, roc-auc: 0.359, BER: 0.484

Input train and test X shape is (143, 1927), (36, 1927)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   11.9s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   28.8s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.6s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.8s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


acc: 0.528, roc-auc: 0.522, BER: 0.520

Input train and test X shape is (143, 1935), (36, 1935)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   12.0s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   28.9s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.6s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.3s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


acc: 0.722, roc-auc: 0.409, BER: 0.444

Input train and test X shape is (143, 2736), (36, 2736)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   20.0s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   47.9s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.6s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.7s finished


acc: 0.583, roc-auc: 0.512, BER: 0.584

Input train and test X shape is (144, 1740), (35, 1740)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    9.6s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   24.2s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.4s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.7s finished


acc: 0.571, roc-auc: 0.648, BER: 0.483
The auc score for /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_PAAD_grade.h5 is 0.4902455510560416.
################################################## /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_CESC_grade.h5 ##################################################
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_CESC_grade.h5 {0: 187, 1: 119}

Input train and test X shape is (244, 1925), (62, 1925)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   16.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   37.2s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.3s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


acc: 0.484, roc-auc: 0.409, BER: 0.553

Input train and test X shape is (245, 2420), (61, 2420)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   19.6s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   46.7s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.6s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.7s finished


acc: 0.508, roc-auc: 0.543, BER: 0.503

Input train and test X shape is (245, 1470), (61, 1470)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   10.8s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   24.8s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.3s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


acc: 0.639, roc-auc: 0.538, BER: 0.467

Input train and test X shape is (245, 1944), (61, 1944)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   16.8s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   37.7s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.4s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.9s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


acc: 0.492, roc-auc: 0.466, BER: 0.521

Input train and test X shape is (245, 2096), (61, 2096)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   13.9s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   33.3s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.6s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.4s finished


acc: 0.508, roc-auc: 0.507, BER: 0.515
The auc score for /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_CESC_grade.h5 is 0.4926340683693625.
################################################## /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_test_HNSC_grade.h5 ##################################################
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_test_HNSC_grade.h5 {0: 385, 1: 119}

Input train and test X shape is (403, 1849), (101, 1849)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   17.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   38.6s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.6s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.6s finished


acc: 0.772, roc-auc: 0.684, BER: 0.359

Input train and test X shape is (403, 2578), (101, 2578)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   27.9s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  1.0min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.6s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.5s finished


acc: 0.663, roc-auc: 0.529, BER: 0.493

Input train and test X shape is (403, 1450), (101, 1450)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   12.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   27.0s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.9s finished


acc: 0.634, roc-auc: 0.540, BER: 0.502

Input train and test X shape is (403, 1612), (101, 1612)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   14.1s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   30.4s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.1s finished


acc: 0.703, roc-auc: 0.596, BER: 0.412

Input train and test X shape is (404, 955), (100, 955)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    6.8s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   16.0s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.6s finished


acc: 0.720, roc-auc: 0.587, BER: 0.398
The auc score for /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_test_HNSC_grade.h5 is 0.587228666777675.
################################################## /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_LGG_grade.h5 ##################################################
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_LGG_grade.h5 {1: 269, 0: 263}

Input train and test X shape is (425, 2390), (107, 2390)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   24.1s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   55.9s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.7s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.9s finished


acc: 0.673, roc-auc: 0.657, BER: 0.331

Input train and test X shape is (425, 3233), (107, 3233)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   32.9s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  1.3min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.7s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.9s finished


acc: 0.654, roc-auc: 0.695, BER: 0.346

Input train and test X shape is (426, 3169), (106, 3169)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   32.1s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  1.3min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.8s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.4s finished


acc: 0.660, roc-auc: 0.704, BER: 0.341

Input train and test X shape is (426, 2814), (106, 2814)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   27.8s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  1.1min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.8s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.1s finished


acc: 0.632, roc-auc: 0.668, BER: 0.354

Input train and test X shape is (426, 3157), (106, 3157)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   32.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  1.2min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.9s


acc: 0.557, roc-auc: 0.633, BER: 0.443
The auc score for /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_LGG_grade.h5 is 0.6712292527971315.


[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.4s finished


In [7]:
dfres = pd.DataFrame(res, columns = ['score', 'fold', 'task','best_params'])
dfres.to_csv('./FS_RoationForest_5FCV_results_grade.csv')
dfres.groupby('task').mean().sort_values('score',ascending=False)

Unnamed: 0_level_0,score,fold
task,Unnamed: 1_level_1,Unnamed: 2_level_1
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_UCEC_grade.h5,0.770515,2
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_LGG_grade.h5,0.671229,2
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_STAD_grade.h5,0.636267,2
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_test_HNSC_grade.h5,0.587229,2
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_LIHC_grade.h5,0.550368,2
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_KIRC_grade.h5,0.513963,2
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_CESC_grade.h5,0.492634,2
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-G/O_z-score_train_PAAD_grade.h5,0.490246,2
