In [2]:
import warnings, os
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
from glob import glob


import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from aggmap import AggMap, loadmap
import seaborn as sns


import xgboost as xgb
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import confusion_matrix, precision_recall_curve, roc_auc_score
from sklearn.metrics import auc as calculate_auc


## LGR, RF, KNN, ROF, XGBoost

In [3]:
from sklearn.ensemble import RandomForestClassifier
from rotation_forest import RotationForestClassifier

In [4]:
def score(dfr):
    y_true = dfr.y_true
    y_score = dfr.y_score
    y_pred = dfr.y_score.round()

    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()

    acc = (tp + tn) / sum([tn, fp, fn, tp])
    auc = roc_auc_score(y_true, y_score)
    ber =  (fp / (fp + tn) + fn / (tp + fn)) * 0.5

    print('acc: %.3f, roc-auc: %.3f, BER: %.3f' % (acc,auc,ber))

    return acc, auc


def get_best_params(X, y):
    clf = RotationForestClassifier()
    parameters = {'max_depth': [4, 6, 9],
                  'n_estimators': [10, 50, 100]}

    grid = GridSearchCV(clf, parameters, scoring='roc_auc', n_jobs=9, iid=False, cv=5, verbose=2)
    gres = grid.fit(X, y)
    return grid.best_params_

In [5]:
lst = glob('/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/*.h5')
n_fold = 5

res = []
for p in lst:
    
    print('#'*50 + ' %s ' % p + '#'*50 )
    dfx = pd.read_hdf(p, key = 'expression')
    dfy = pd.read_hdf(p, key = 'labels')
    print(p, dfy.value_counts().to_dict())

    outer = KFold(n_splits = n_fold, shuffle = True, random_state = 123)
    outer_idx = list(outer.split(dfx, dfy))

    fold_all = []
    for i, idx in enumerate(outer_idx):

        fold_num = "fold_%s" % str(i).zfill(2) 

        train_idx, test_idx = idx

        # subset feature selection by the fold change
        dfxy_train = dfx.iloc[train_idx].join(dfy.iloc[train_idx])
        abds = dfxy_train.groupby(dfy.name).mean()
        fc = abs(abds.iloc[0] - abds.iloc[1])
        selected = fc[fc > 0.5].index

        trainX = dfx[selected].iloc[train_idx].values
        trainY = dfy.iloc[train_idx].values

        testX = dfx[selected].iloc[test_idx].values
        testY = dfy.iloc[test_idx].values

        ## training
        print("\nInput train and test X shape is %s, %s  \n" % (trainX.shape,  testX.shape))
        print("Getting the best parameters by gridsearch \n")
        #best_params = get_best_params(trainX, trainY)
        
        best_params = {'n_estimators':50, 'n_features_per_subset':10}
        
        clf = RotationForestClassifier(**best_params, n_jobs=20, verbose=1)
        clf.fit(trainX, trainY)

        ## evaluation
        y_true = testY
        y_score = clf.predict_proba(testX)[:,1]
        dfr = pd.DataFrame([y_true, y_score]).T
        dfr.columns = ['y_true', 'y_score']
        dfr.index = dfy.iloc[test_idx].index
        acc, auc = score(dfr)
        
        fold_all.append(auc)
        res.append([auc, i, p, best_params])
        
    print('The auc score for %s is %s.' % (p, np.mean(fold_all)))


################################################## /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_THCA_stage.h5 ##################################################
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_THCA_stage.h5 {0: 291, 1: 222}

Input train and test X shape is (410, 173), (103, 173)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.2s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.9s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


acc: 0.650, roc-auc: 0.657, BER: 0.350

Input train and test X shape is (410, 137), (103, 137)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.1s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.5s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.9s finished


acc: 0.485, roc-auc: 0.549, BER: 0.504

Input train and test X shape is (410, 145), (103, 145)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.5s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.9s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


acc: 0.563, roc-auc: 0.547, BER: 0.445

Input train and test X shape is (411, 148), (102, 148)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.6s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.0s finished


acc: 0.549, roc-auc: 0.556, BER: 0.467

Input train and test X shape is (411, 165), (102, 165)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.7s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.9s finished


acc: 0.510, roc-auc: 0.493, BER: 0.495
The auc score for /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_THCA_stage.h5 is 0.5603776443023266.
################################################## /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_UCEC_stage.h5 ##################################################
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_UCEC_stage.h5 {0: 342, 1: 212}

Input train and test X shape is (443, 2367), (111, 2367)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   25.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   59.1s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.7s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.9s finished


acc: 0.595, roc-auc: 0.610, BER: 0.417

Input train and test X shape is (443, 2775), (111, 2775)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   33.0s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  1.3min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.7s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.9s finished


acc: 0.631, roc-auc: 0.648, BER: 0.385

Input train and test X shape is (443, 2829), (111, 2829)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   30.9s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  1.3min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.8s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.2s finished


acc: 0.532, roc-auc: 0.573, BER: 0.473

Input train and test X shape is (443, 2782), (111, 2782)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   32.4s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  1.4min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.6s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.6s finished


acc: 0.595, roc-auc: 0.637, BER: 0.419

Input train and test X shape is (444, 3644), (110, 3644)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   48.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  2.0min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.9s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    4.0s finished


acc: 0.564, roc-auc: 0.596, BER: 0.451
The auc score for /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_UCEC_stage.h5 is 0.6127780987261711.
################################################## /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_COAD_stage.h5 ##################################################
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_COAD_stage.h5 {0: 296, 1: 209}

Input train and test X shape is (404, 649), (101, 649)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    4.6s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   10.1s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.4s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.2s finished


acc: 0.604, roc-auc: 0.638, BER: 0.393

Input train and test X shape is (404, 700), (101, 700)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    5.0s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   11.2s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.8s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


acc: 0.634, roc-auc: 0.633, BER: 0.382

Input train and test X shape is (404, 419), (101, 419)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    3.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    6.8s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.0s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


acc: 0.634, roc-auc: 0.618, BER: 0.368

Input train and test X shape is (404, 269), (101, 269)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    2.1s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    4.4s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.0s finished


acc: 0.584, roc-auc: 0.608, BER: 0.417

Input train and test X shape is (404, 610), (101, 610)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    4.0s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    9.4s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.2s finished


acc: 0.614, roc-auc: 0.635, BER: 0.419
The auc score for /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_COAD_stage.h5 is 0.6263503852457064.
################################################## /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_validate_LUSC_stage.h5 ##################################################
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_validate_LUSC_stage.h5 {1: 255, 0: 249}

Input train and test X shape is (403, 153), (101, 153)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.9s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.1s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.8s finished


acc: 0.584, roc-auc: 0.630, BER: 0.418

Input train and test X shape is (403, 228), (101, 228)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    2.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    4.1s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.8s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


acc: 0.535, roc-auc: 0.542, BER: 0.458

Input train and test X shape is (403, 221), (101, 221)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.6s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.6s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.9s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


acc: 0.564, roc-auc: 0.589, BER: 0.435

Input train and test X shape is (403, 246), (101, 246)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.8s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.9s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.7s finished


acc: 0.535, roc-auc: 0.528, BER: 0.469

Input train and test X shape is (404, 237), (100, 237)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.9s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    4.0s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.6s finished


acc: 0.520, roc-auc: 0.563, BER: 0.470
The auc score for /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_validate_LUSC_stage.h5 is 0.5704224356607789.
################################################## /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_KIRC_stage.h5 ##################################################
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_KIRC_stage.h5 {0: 338, 1: 206}

Input train and test X shape is (435, 3919), (109, 3919)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   59.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  2.3min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.8s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.7s finished


acc: 0.615, roc-auc: 0.606, BER: 0.415

Input train and test X shape is (435, 2527), (109, 2527)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   27.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  1.1min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.7s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.2s finished


acc: 0.697, roc-auc: 0.730, BER: 0.296

Input train and test X shape is (435, 3131), (109, 3131)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   44.4s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  1.8min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.8s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.4s finished


acc: 0.670, roc-auc: 0.721, BER: 0.334

Input train and test X shape is (435, 2820), (109, 2820)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   37.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  1.5min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.9s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.5s finished


acc: 0.615, roc-auc: 0.601, BER: 0.404

Input train and test X shape is (436, 3255), (108, 3255)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   47.6s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  1.9min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.0s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    4.4s finished


acc: 0.704, roc-auc: 0.684, BER: 0.329
The auc score for /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_KIRC_stage.h5 is 0.6684028418372271.
################################################## /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_test_BRCA_stage.h5 ##################################################
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_test_BRCA_stage.h5 {0: 861, 1: 273}

Input train and test X shape is (907, 139), (227, 139)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.8s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.9s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.1s finished


acc: 0.634, roc-auc: 0.550, BER: 0.458

Input train and test X shape is (907, 313), (227, 313)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    3.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    7.6s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.8s finished


acc: 0.634, roc-auc: 0.492, BER: 0.512

Input train and test X shape is (907, 162), (227, 162)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    2.0s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    4.3s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.9s finished


acc: 0.678, roc-auc: 0.535, BER: 0.492

Input train and test X shape is (907, 753), (227, 753)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   10.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   22.2s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.4s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.5s finished


acc: 0.674, roc-auc: 0.557, BER: 0.457

Input train and test X shape is (908, 153), (226, 153)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    2.0s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.9s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.9s finished


acc: 0.624, roc-auc: 0.481, BER: 0.514
The auc score for /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_test_BRCA_stage.h5 is 0.522846258885483.
################################################## /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_SKCM_stage.h5 ##################################################
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_SKCM_stage.h5 {0: 157, 1: 92}

Input train and test X shape is (199, 1291), (50, 1291)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    7.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   20.0s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.9s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


acc: 0.660, roc-auc: 0.636, BER: 0.367

Input train and test X shape is (199, 1872), (50, 1872)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   14.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   34.1s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.3s finished


acc: 0.600, roc-auc: 0.597, BER: 0.445

Input train and test X shape is (199, 1163), (50, 1163)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    7.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   17.7s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.8s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


acc: 0.640, roc-auc: 0.671, BER: 0.381

Input train and test X shape is (199, 1976), (50, 1976)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   14.7s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   36.3s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.1s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


acc: 0.740, roc-auc: 0.735, BER: 0.292

Input train and test X shape is (200, 1638), (49, 1638)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   12.1s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   27.6s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.4s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.8s finished


acc: 0.571, roc-auc: 0.611, BER: 0.452
The auc score for /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_SKCM_stage.h5 is 0.6499553289057559.
################################################## /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_LIHC_stage.h5 ##################################################
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_LIHC_stage.h5 {0: 197, 1: 177}

Input train and test X shape is (299, 1040), (75, 1040)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    7.6s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   17.3s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.3s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


acc: 0.640, roc-auc: 0.661, BER: 0.366

Input train and test X shape is (299, 1811), (75, 1811)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   16.4s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   37.5s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.4s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.1s finished


acc: 0.547, roc-auc: 0.586, BER: 0.455

Input train and test X shape is (299, 1818), (75, 1818)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   16.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   38.7s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.6s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.5s finished


acc: 0.560, roc-auc: 0.564, BER: 0.444

Input train and test X shape is (299, 1565), (75, 1565)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   13.0s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   31.5s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.0s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


acc: 0.587, roc-auc: 0.570, BER: 0.420

Input train and test X shape is (300, 1488), (74, 1488)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   12.6s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:   28.6s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.4s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.7s finished


acc: 0.527, roc-auc: 0.565, BER: 0.471
The auc score for /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_LIHC_stage.h5 is 0.5893107953121723.
################################################## /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_LUAD_stage.h5 ##################################################
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_LUAD_stage.h5 {0: 306, 1: 236}

Input train and test X shape is (433, 2581), (109, 2581)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   32.8s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  1.3min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.7s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.1s finished


acc: 0.505, roc-auc: 0.519, BER: 0.496

Input train and test X shape is (433, 2899), (109, 2899)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   39.8s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  1.6min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.7s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    3.1s finished


acc: 0.624, roc-auc: 0.660, BER: 0.370

Input train and test X shape is (434, 4500), (108, 4500)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:  1.2min
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  2.8min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.9s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    4.7s finished


acc: 0.602, roc-auc: 0.611, BER: 0.401

Input train and test X shape is (434, 4902), (108, 4902)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:  1.2min
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  2.8min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    5.5s finished


acc: 0.519, roc-auc: 0.435, BER: 0.488

Input train and test X shape is (434, 3779), (108, 3779)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   48.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:  1.9min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.9s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    4.0s finished


acc: 0.519, roc-auc: 0.487, BER: 0.519
The auc score for /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_LUAD_stage.h5 is 0.542422906954157.
################################################## /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_STAD_stage.h5 ##################################################
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_STAD_stage.h5 {1: 210, 0: 206}

Input train and test X shape is (332, 262), (84, 262)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.8s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    4.0s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.7s finished


acc: 0.536, roc-auc: 0.604, BER: 0.467

Input train and test X shape is (333, 122), (83, 122)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.9s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.8s finished


acc: 0.494, roc-auc: 0.514, BER: 0.508

Input train and test X shape is (333, 203), (83, 203)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.2s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    2.9s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.3s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.9s finished


acc: 0.470, roc-auc: 0.486, BER: 0.528

Input train and test X shape is (333, 105), (83, 105)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.0s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.8s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.1s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    0.8s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


acc: 0.639, roc-auc: 0.607, BER: 0.360

Input train and test X shape is (333, 505), (83, 505)  

Getting the best parameters by gridsearch 



[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    3.5s
[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    7.3s finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.4s


acc: 0.566, roc-auc: 0.577, BER: 0.433
The auc score for /home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_STAD_stage.h5 is 0.557600557426456.


[Parallel(n_jobs=20)]: Done  50 out of  50 | elapsed:    1.4s finished


In [8]:
dfres = pd.DataFrame(res, columns = ['score', 'fold', 'task', 'best_params'])
dfres.to_csv('./FS_RoationForest_5FCV_results_stage.csv')
dfres.groupby('task').mean().sort_values('score',ascending=False)

Unnamed: 0_level_0,score,fold
task,Unnamed: 1_level_1,Unnamed: 2_level_1
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_KIRC_stage.h5,0.668403,2
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_SKCM_stage.h5,0.649955,2
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_COAD_stage.h5,0.62635,2
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_UCEC_stage.h5,0.612778,2
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_LIHC_stage.h5,0.589311,2
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_validate_LUSC_stage.h5,0.570422,2
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_THCA_stage.h5,0.560378,2
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_STAD_stage.h5,0.557601,2
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_train_LUAD_stage.h5,0.542423,2
/home/shenwanxiang/Research/AggMapNet_dataset/TCGA-S/O_z-score_test_BRCA_stage.h5,0.522846,2
