In [1]:
import gc
import glob
import numpy as np
import os
import pandas as pd
import pickle
import random
import time
from tqdm import tqdm

from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit
from sklearn.metrics import log_loss, balanced_accuracy_score, roc_auc_score, roc_curve
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

import xgboost as xgb

#import tensorflow as tf
#import keras
#from keras import losses, regularizers
#from keras.models import Sequential
#from keras.layers import Dense, Dropout
#from keras import backend as K

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

### Input

Classifiers

In [2]:
classifiers = ['LogisticReg','SVM','GBM','NeuralNet']
classifiers = ['GBM']

Hyperopt parameters

In [3]:
n_hyperopt_iterations = 2**8

Data splits

In [4]:
n_splits_trainvalidation_test = 20
test_size = 0.2
k_train_validation = 5
early_stopping_size = 0.125

Seed

In [5]:
seed_ = 1

# implement seed
random.seed(seed_)
np.random.seed(seed_)

### HyperOpt Functions

In [6]:
def hyperopt_function(parameters):

    # load data
    with open('_files/data.pickle', 'rb') as f:
        train_data, X_validation, y_validation = pickle.load(f, encoding='latin1')
    
    # calculate performance
    mean_validation_weightedlogloss = hyperopt_performance(train_data, X_validation, y_validation, parameters)
    gc.collect()
    
    # save validation predictions if best classifier
    with open('_files/validation.pickle','rb') as f:
        best_weightedlogloss = pickle.load(f)
    if mean_validation_weightedlogloss < best_weightedlogloss:
        with open('_files/validation.pickle','wb') as f:
            pickle.dump(mean_validation_weightedlogloss, f)
    
    # return performance
    return {'loss':mean_validation_weightedlogloss, 'status':STATUS_OK}

In [7]:
def hyperopt_performance(train_data, X_validation, y_validation, parameters):
    
    # unpack train data
    if classifiers[a] in ['GBM','NeuralNet']:
        X_training_train, y_training_train, X_earlystopping_train, y_earlystopping_train = train_data[0], train_data[1], train_data[2], train_data[3]
    else:
        X_train, y_train = train_data[0], train_data[1]
    
    # initialize validation performance and predictions
    validation_weightedlogloss = []
    
    # iterate over number of training/validation splits
    for i in range(k_train_validation):
        
        # logistic regression
        if classifiers[a] == 'LogisticReg':
            
            # create classifier
            clf = LogisticRegression(penalty='elasticnet', class_weight='balanced', solver='saga', max_iter=10000, random_state=seed_, **parameters)
            
            # train on training
            clf.fit(X_train[i], y_train[i])
            
            # evaluate on validation
            y_pred = clf.predict_proba(X_validation[i])[:,1]
            pos_weight = len([x for x in y_validation[i] if x==0])/len([x for x in y_validation[i] if x==1])
            sample_weights = [pos_weight if x==1 else 1 for x in y_validation[i]]
            weightedlogloss = log_loss(y_validation[i], y_pred, sample_weight=sample_weights)
            validation_weightedlogloss.append(weightedlogloss)
            
        # SVM
        elif classifiers[a] == 'SVM':
            
            # create classifier
            clf = SVC(gamma='auto', probability=True, random_state=seed_, **parameters)
            
            # train on training
            clf.fit(X_train[i], y_train[i])
            
            # evaluate on validation
            y_pred = clf.predict_proba(X_validation[i])[:,1]
            pos_weight = len([x for x in y_validation[i] if x==0])/len([x for x in y_validation[i] if x==1])
            sample_weights = [pos_weight if x==1 else 1 for x in y_validation[i]]
            weightedlogloss = log_loss(y_validation[i], y_pred, sample_weight=sample_weights)
            validation_weightedlogloss.append(weightedlogloss)
            
        # GBM
        elif classifiers[a] == 'GBM':

            # positive weight
            pos_weight = len([x for x in y_training_train[i] if x==0])/len([x for x in y_training_train[i] if x==1])

            # xgb datasets
            xgb_training = xgb.DMatrix(X_training_train[i], label=y_training_train[i])
            xgb_earlystopping = xgb.DMatrix(X_earlystopping_train[i], label=y_earlystopping_train[i])
            xgb_validation = xgb.DMatrix(X_validation[i], label=y_validation[i])

            # parameters
            param = parameters.copy()
            param['objective'] = 'binary:logistic'
            param['eval_metric'] = 'logloss'
            param['scale_pos_weight'] = pos_weight
            param['seed'] = seed_
            evallist = [(xgb_training, 'train'), (xgb_earlystopping, 'eval')]

            # train on training
            bst = xgb.train(param, xgb_training, num_boost_round=10000, evals=evallist, early_stopping_rounds=10, verbose_eval=False)

            # evaluate on validation
            y_pred = bst.predict(xgb_validation, ntree_limit=bst.best_ntree_limit)
            pos_weight = len([x for x in y_validation[i] if x==0])/len([x for x in y_validation[i] if x==1])
            sample_weights = [pos_weight if x==1 else 1 for x in y_validation[i]]
            weightedlogloss = log_loss(y_validation[i], y_pred, sample_weight=sample_weights)
            validation_weightedlogloss.append(weightedlogloss)
            
        # neural network
        elif classifiers[a] == 'NeuralNet':
            
            # train best model on training+validation set
            with tf.Graph().as_default():
                with tf.Session() as sess:

                    # create model
                    model = Sequential()
                    for j in range(parameters['number_of_layers']):
                        model.add(Dense(parameters['neurons_per_layer'], activation=parameters['activation_function'], kernel_regularizer=regularizers.l1_l2(l1=parameters['l1'], l2=parameters['l2'])))
                        model.add(Dropout(parameters['dropout_rate']))
                    model.add(Dense(1, activation='sigmoid'))

                    # loss and performance metrics
                    pos_weight = len([x for x in y_training_train[i] if x==0])/len([x for x in y_training_train[i] if x==1])
                    model.compile(loss=weighted_cross_entropy(pos_weight), metrics=[weighted_cross_entropy(pos_weight)], optimizer=parameters['optimizer'])      
                    earlystopping = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto', restore_best_weights=True)

                    # fit and evaluate model
                    model.fit(X_training_train[i], y_training_train[i], epochs=1000, verbose=0, validation_data=(X_earlystopping_train[i], y_earlystopping_train[i]), callbacks=[earlystopping])
                    y_pred = model.predict_proba(X_validation[i], batch_size=len(y_validation[i]), verbose=0)
                    pos_weight = len([x for x in y_validation[i] if x==0])/len([x for x in y_validation[i] if x==1])
                    sample_weights = [pos_weight if x==1 else 1 for x in y_validation[i]]
                    weightedlogloss = log_loss(y_validation[i], y_pred, sample_weight=sample_weights)
                    validation_weightedlogloss.append(weightedlogloss)

            # clear session
            K.clear_session()

    # average validation performance over all folds
    mean_validation_weightedlogloss = np.mean(validation_weightedlogloss) + np.std(validation_weightedlogloss)/np.sqrt(len(validation_weightedlogloss))
    return mean_validation_weightedlogloss

### Custom functions

In [8]:
def dummy_y(y):
    
    dummy_y_ = [[],[]]
    for i in range(len(y)):
        if y[i] == 0:
            dummy_y_[0].append(1)
            dummy_y_[1].append(0)
        else:
            dummy_y_[0].append(0)
            dummy_y_[1].append(1)
    dummy_y_ = np.array(dummy_y_).T
    return dummy_y_

In [9]:
def weighted_cross_entropy(pos_weight):

    # calculation of loss
    def calculate_loss(y_true, y_pred):

        # define loss
        def define_loss(target, output):

            import tensorflow as tf
            from tensorflow.python.keras import backend as K
            from tensorflow.python.ops import clip_ops, math_ops
            from tensorflow.python.framework import ops

            epsilon_ = ops.convert_to_tensor(K.epsilon(), dtype=output.dtype.base_dtype)
            output = clip_ops.clip_by_value(output, epsilon_, 1 - epsilon_)
            output = math_ops.log(output / (1 - output))
            return tf.nn.weighted_cross_entropy_with_logits(targets=target, logits=output, pos_weight=pos_weight)

        from tensorflow.python.keras import backend as K
        return K.mean(define_loss(y_true,y_pred), axis=-1)

    return calculate_loss

### Load gene lists

In [10]:
# gene lists
with open('../gene_lists/gene_lists.pickle','rb') as f:
    genelists, genes = pickle.load(f)

### Pipeline

In [11]:
# iterate over classifiers
for a in range(len(classifiers)):
    
    # output folder
    if not os.path.isdir(classifiers[a]):
        os.mkdir(classifiers[a])
    
    # performance files
    performance_weightedlogloss = pd.DataFrame(index=['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)]+['MEAN','STERR'], columns=genelists)
    performance_weightedlogloss.to_csv('%s/weightedlogloss.csv' % classifiers[a])

    performance_balancedaccuracy = pd.DataFrame(index=['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)]+['MEAN','STERR'], columns=genelists)
    performance_balancedaccuracy.to_csv('%s/balancedaccuracy.csv' % classifiers[a])

    performance_auroc = pd.DataFrame(index=['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)]+['MEAN','STERR'], columns=genelists)
    performance_auroc.to_csv('%s/auroc.csv' % classifiers[a])
    
    # iterate over gene lists
    for b in range(len(genelists)):
        print('-------------------------')
        print('CLASSIFIER: %s' % classifiers[a])
        print('GENE LIST: %s' % genelists[b])
        print('-------------------------')

        # load dataset
        with open('../../_datasets/gene_all.pickle', 'rb') as f:
            X_matrix, y_vector, _ = pickle.load(f, encoding='latin1')
        
        # subset features
        X_matrix = X_matrix[genes[b]]
        
        # divide train+validation from testing
        trainvalidation_index = []
        test_index = []
        sss = StratifiedShuffleSplit(n_splits=n_splits_trainvalidation_test, test_size=test_size, random_state=seed_)
        for trainvalidation_, test_ in sss.split(X_matrix, y_vector):
            trainvalidation_index.append(list(trainvalidation_))
            test_index.append(list(test_))

        # iterate over number of training+validation/testing splits
        for c in range(n_splits_trainvalidation_test):
            print('Split %d' % (c+1))

            # separate train+validation and testing
            X_trainvalidation = X_matrix.iloc[trainvalidation_index[c],]
            X_test = X_matrix.iloc[test_index[c],]
            y_trainvalidation = y_vector[trainvalidation_index[c]]
            y_test = y_vector[test_index[c]]

            # separate training_trainvalidation from earlystopping_trainvalidation
            if classifiers[a] in ['GBM','NeuralNet']:
                training_index = []
                earlystopping_index = []
                sss = StratifiedShuffleSplit(n_splits=1, test_size=early_stopping_size, random_state=seed_)
                for training_, earlystopping_ in sss.split(X_trainvalidation, y_trainvalidation):
                    training_index.append(list(training_))
                    earlystopping_index.append(list(earlystopping_))
                X_training_trainvalidation = X_trainvalidation.iloc[training_index[0],]
                X_earlystopping_trainvalidation = X_trainvalidation.iloc[earlystopping_index[0],]
                y_training_trainvalidation = y_trainvalidation[training_index[0]]
                y_earlystopping_trainvalidation = y_trainvalidation[earlystopping_index[0]]

            # divide train from validation
            train_index = []
            validation_index = []
            skf = StratifiedKFold(n_splits=k_train_validation, shuffle=True, random_state=seed_)
            for train_, validation_ in skf.split(X_trainvalidation, y_trainvalidation):
                train_index.append(list(train_))
                validation_index.append(list(validation_))

            # separate train and validation
            X_train = []
            X_validation = []
            y_train = []
            y_validation = []
            for d in range(k_train_validation):
                X_train.append(X_trainvalidation.iloc[train_index[d],])
                X_validation.append(X_trainvalidation.iloc[validation_index[d],])
                y_train.append(y_trainvalidation[train_index[d]])
                y_validation.append(y_trainvalidation[validation_index[d]])

            # separate training_train from earlystopping_train
            if classifiers[a] in ['GBM','NeuralNet']:
                X_training_train = []
                X_earlystopping_train = []
                y_training_train = []
                y_earlystopping_train = []
                for d in range(k_train_validation):
                    training_index = []
                    earlystopping_index = []
                    sss = StratifiedShuffleSplit(n_splits=1, test_size=early_stopping_size, random_state=seed_)
                    for training_, earlystopping_ in sss.split(X_train[d], y_train[d]):
                        training_index.append(list(training_))
                        earlystopping_index.append(list(earlystopping_))
                    X_training_train.append(X_train[d].iloc[training_index[0],])
                    X_earlystopping_train.append(X_train[d].iloc[earlystopping_index[0],])
                    y_training_train.append(y_train[d][training_index[0]])
                    y_earlystopping_train.append(y_train[d][earlystopping_index[0]])
            
            # imputation and scaling
            if classifiers[a] in ['LogisticReg','SVM','NeuralNet']:
                
                # train+validation, testing
                imp = SimpleImputer(missing_values=np.nan, strategy='mean')
                X_trainvalidation = imp.fit_transform(X_trainvalidation)
                X_test = imp.transform(X_test)
                scaler = StandardScaler()
                X_trainvalidation = scaler.fit_transform(X_trainvalidation)
                X_test = scaler.transform(X_test)
                
                # training_trainvalidation, earlystopping_trainvalidation
                if classifiers[a] in ['GBM','NeuralNet']:
                    imp = SimpleImputer(missing_values=np.nan, strategy='mean')
                    X_training_trainvalidation = imp.fit_transform(X_training_trainvalidation)
                    X_earlystopping_trainvalidation = imp.transform(X_earlystopping_trainvalidation)
                    scaler = StandardScaler()
                    X_training_trainvalidation = scaler.fit_transform(X_training_trainvalidation)
                    X_earlystopping_trainvalidation = scaler.transform(X_earlystopping_trainvalidation)
                    
                # train, validation
                for d in range(k_train_validation):
                    imp = SimpleImputer(missing_values=np.nan, strategy='mean')
                    X_train[d] = imp.fit_transform(X_train[d])
                    X_validation[d] = imp.transform(X_validation[d])
                    scaler = StandardScaler()
                    X_train[d] = scaler.fit_transform(X_train[d])
                    X_validation[d] = scaler.transform(X_validation[d])
                    
                # training_train, earlystopping_train
                if classifiers[a] in ['GBM','NeuralNet']:
                    for d in range(k_train_validation):
                        imp = SimpleImputer(missing_values=np.nan, strategy='mean')
                        X_training_train[d] = imp.fit_transform(X_training_train[d])
                        X_earlystopping_train[d] = imp.transform(X_earlystopping_train[d])
                        scaler = StandardScaler()
                        X_training_train[d] = scaler.fit_transform(X_training_train[d])
                        X_earlystopping_train[d] = scaler.transform(X_earlystopping_train[d])

            # hyperopt parameters
            if classifiers[a] == 'LogisticReg':
                parameters = {
                    'C':hp.loguniform('C',np.log(1e-3),np.log(1e3)),
                    'l1_ratio': hp.uniform('l1_ratio', 0, 1)
                }
            elif classifiers[a] == 'SVM':
                parameters_choice = {
                    'kernel':['linear','sigmoid','poly','rbf']
                }
                parameters = {
                    'kernel':hp.choice('kernel', ['linear','sigmoid','poly','rbf']),
                    'C':hp.loguniform('C',np.log(1e-2),np.log(1e2))
                }
            elif classifiers[a] == 'GBM':
                parameters = {
                    'gamma': hp.loguniform('gamma', np.log(0.0001), np.log(5)) - 0.0001,
                    'max_depth': scope.int(hp.uniform('max_depth', 1, 11)),
                    'subsample': hp.uniform('subsample', 0.5, 1),
                    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
                    'colsample_bylevel': hp.uniform('colsample_bylevel', 0.5, 1),
                    'reg_lambda': hp.loguniform('reg_lambda', np.log(1), np.log(4)),
                    'reg_alpha': hp.loguniform('reg_alpha', np.log(0.0001), np.log(1)) - 0.0001,
                    'eta': hp.loguniform('eta', np.log(0.01), np.log(0.5))
                }
            elif classifiers[a] == 'NeuralNet':
                parameters_choice = {
                    'activation_function':['relu','elu','sigmoid'],
                    'optimizer':['rmsprop', 'adam', 'sgd']
                }
                parameters = {
                    'number_of_layers':scope.int(hp.quniform('number_of_layers',1.5,10.5,1)),
                    'neurons_per_layer':scope.int(hp.qloguniform('neurons_per_layer',np.log(10.5),np.log(200.5),1)),
                    'activation_function':hp.choice('activation_function', ['relu','elu','sigmoid']),
                    'dropout_rate':hp.uniform('dropout_rate',0,0.5),
                    'l1':hp.loguniform('l1', np.log(0.00001), np.log(0.001)),
                    'l2':hp.loguniform('l2', np.log(0.001), np.log(1)),
                    'optimizer':hp.choice('optimizer', ['rmsprop', 'adam', 'sgd'])
                }

            # save info for hyperopt
            with open('_files/validation.pickle','wb') as f:
                pickle.dump(1000., f)
            with open('_files/data.pickle','wb') as f:
                if classifiers[a] in ['GBM','NeuralNet']:
                    pickle.dump([[X_training_train, y_training_train, X_earlystopping_train, y_earlystopping_train], X_validation, y_validation], f)
                else:
                    pickle.dump([[X_train, y_train], X_validation, y_validation], f)
        
            # hyperopt to find best parameters
            trials = Trials()
            best = fmin(hyperopt_function, parameters, algo=tpe.suggest, max_evals=n_hyperopt_iterations, trials=trials, rstate=np.random.RandomState(seed_), verbose=0, show_progressbar=True)

            # create classifier using best parameters
            if classifiers[a] in ['GBM','NeuralNet']:
                pos_weight = len([x for x in y_training_trainvalidation if x==0])/len([x for x in y_training_trainvalidation if x==1])
            else:
                pos_weight = len([x for x in y_trainvalidation if x==0])/len([x for x in y_trainvalidation if x==1])
            
            # logistic regression
            if classifiers[a] == 'LogisticReg':
                
                # parameters
                parameters = {
                    'C':best['C'],
                    'l1_ratio':best['l1_ratio']
                }
                
                # create classifier
                clf = LogisticRegression(penalty='elasticnet', class_weight='balanced', solver='saga', max_iter=10000, random_state=seed_, **parameters)

                # train on training+validation
                clf.fit(X_trainvalidation, y_trainvalidation)

                # evaluate on testing
                y_pred = clf.predict_proba(X_test)[:,1]

            # SVM
            elif classifiers[a] == 'SVM':
                
                # parameters
                parameters = {
                    'kernel':parameters_choice['kernel'][best['kernel']],
                    'C':best['C']
                }
                
                # create classifier
                clf = SVC(gamma='auto', probability=True, random_state=seed_, **parameters)

                # train on training+validation
                clf.fit(X_trainvalidation, y_trainvalidation)

                # evaluate on testing
                y_pred = clf.predict_proba(X_test)[:,1]

            # GBM
            elif classifiers[a] == 'GBM':
                
                # parameters
                parameters = {
                    'gamma': best['gamma'],
                    'max_depth': int(np.round(best['max_depth'])),
                    'subsample': best['subsample'],
                    'colsample_bytree': best['colsample_bytree'],
                    'colsample_bylevel': best['colsample_bylevel'],
                    'reg_lambda': best['reg_lambda'],
                    'reg_alpha': best['reg_alpha'],
                    'eta':best['eta']
                }
                
                # positive weight
                pos_weight = len([x for x in y_training_trainvalidation if x==0])/len([x for x in y_training_trainvalidation if x==1])

                # xgb datasets
                xgb_training = xgb.DMatrix(X_training_trainvalidation, label=y_training_trainvalidation)
                xgb_earlystopping = xgb.DMatrix(X_earlystopping_trainvalidation, label=y_earlystopping_trainvalidation)
                xgb_testing = xgb.DMatrix(X_test, label=y_test)

                # parameters
                param = parameters.copy()
                param['objective'] = 'binary:logistic'
                param['eval_metric'] = 'logloss'
                param['scale_pos_weight'] = pos_weight
                param['seed'] = seed_
                evallist = [(xgb_training, 'train'), (xgb_earlystopping, 'eval')]

                # train on training
                bst = xgb.train(param, xgb_training, num_boost_round=10000, evals=evallist, early_stopping_rounds=10, verbose_eval=False)

                # evaluate on validation
                y_pred = bst.predict(xgb_testing, ntree_limit=bst.best_ntree_limit)

            # neural network
            elif classifiers[a] == 'NeuralNet':
                
                # train best model on training+validation set
                with tf.Graph().as_default():
                    with tf.Session() as sess:

                        # create model
                        model = Sequential()
                        for j in range(int(np.round(best['number_of_layers']))):
                            model.add(Dense(int(np.round(best['neurons_per_layer'])), activation=parameters_choice['activation_function'][best['activation_function']], kernel_regularizer=regularizers.l1_l2(l1=best['l1'], l2=best['l2'])))
                            model.add(Dropout(best['dropout_rate']))
                        model.add(Dense(1, activation='sigmoid'))

                        # loss and performance metrics
                        pos_weight = len([x for x in y_training_trainvalidation if x==0])/len([x for x in y_training_trainvalidation if x==1])
                        model.compile(loss=weighted_cross_entropy(pos_weight), metrics=[weighted_cross_entropy(pos_weight)], optimizer=parameters_choice['optimizer'][best['optimizer']])      
                        earlystopping = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto', restore_best_weights=True)

                        # fit and evaluate model
                        model.fit(X_training_trainvalidation, y_training_trainvalidation, epochs=1000, verbose=0, validation_data=(X_earlystopping_trainvalidation, y_earlystopping_trainvalidation), callbacks=[earlystopping])
                        y_pred = model.predict_proba(X_test, batch_size=len(y_test), verbose=0)

                # clear session
                K.clear_session()

            # calculate test performance - weighted log loss
            pos_weight = len([x for x in y_test if x==0])/len([x for x in y_test if x==1])
            sample_weights = [pos_weight if x==1 else 1 for x in y_test]
            performance = log_loss(y_test, y_pred, sample_weight=sample_weights)
            performance_weightedlogloss.at['split_%d' % (c+1), genelists[b]] = performance
            performance_weightedlogloss.at['MEAN', genelists[b]] = np.nanmean(performance_weightedlogloss.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], genelists[b]].values.tolist())
            performance_weightedlogloss.at['STERR', genelists[b]] = np.nanstd(performance_weightedlogloss.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], genelists[b]].values.tolist())/np.sqrt(c+1)
            performance_weightedlogloss.to_csv('%s/weightedlogloss.csv' % classifiers[a])

            # calculate test performance - balanced accuracy
            y_pred_ = [1 if x>=0.5 else 0 for x in y_pred]
            performance = balanced_accuracy_score(y_test, y_pred_)
            performance_balancedaccuracy.at['split_%d' % (c+1), genelists[b]] = performance
            performance_balancedaccuracy.at['MEAN', genelists[b]] = np.nanmean(performance_balancedaccuracy.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], genelists[b]].values.tolist())
            performance_balancedaccuracy.at['STERR', genelists[b]] = np.nanstd(performance_balancedaccuracy.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], genelists[b]].values.tolist())/np.sqrt(c+1)
            performance_balancedaccuracy.to_csv('%s/balancedaccuracy.csv' % classifiers[a])

            # calculate test performance - auroc
            y_pred_ = np.concatenate((np.array([1-x for x in y_pred]).reshape(-1,1), y_pred.reshape(-1,1)), axis=1)
            performance = roc_auc_score(dummy_y(y_test), y_pred_)
            performance_auroc.at['split_%d' % (c+1), genelists[b]] = performance
            performance_auroc.at['MEAN', genelists[b]] = np.nanmean(performance_auroc.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], genelists[b]].values.tolist())
            performance_auroc.at['STERR', genelists[b]] = np.nanstd(performance_auroc.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], genelists[b]].values.tolist())/np.sqrt(c+1)
            performance_auroc.to_csv('%s/auroc.csv' % classifiers[a])

-------------------------
CLASSIFIER: GBM
GENE LIST: Lewis
-------------------------
Split 1
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 256/256 [05:25<00:00,  1.87s/it, best loss: 0.4983579227794873]
Split 2
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 256/256 [07:25<00:00,  1.53s/it, best loss: 0.5223055591095852]
Split 3
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 256/256 [08:34<00:00,  2.20s/it, best loss: 0.5268134566890962]
Split 4
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████|