In [1]:
import gc
import glob
import numpy as np
import os
import pandas as pd
import pickle
import random
import time
from tqdm import tqdm

from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit
from sklearn.metrics import log_loss, balanced_accuracy_score, roc_auc_score, roc_curve
from sklearn.linear_model import Ridge

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

import shap

### Input

Input info

In [2]:
output_folder = 'compareclassifiers_randomforest'
datasets = [['clinical','gene_all','mutation_onehot_all']]

Data splits

In [3]:
n_splits_trainvalidation_test = 20
k_train_validation = 5

xgBoost stacking

In [4]:
n_hyperopt_iterations = 2**8

Seed

In [5]:
seed_ = 1

# implement seed
random.seed(seed_)
np.random.seed(seed_)

### HyperOpt Functions

In [6]:
def hyperopt_function(parameters):

    # load data
    with open('_files/data___.pickle', 'rb') as f:
        X_train, y_train, X_validation, y_validation = pickle.load(f, encoding='latin1')
    
    # calculate performance
    mean_validation_weightedlogloss = hyperopt_performance(X_train, y_train, X_validation, y_validation, parameters)
    
    # return performance
    return {'loss':mean_validation_weightedlogloss, 'status':STATUS_OK}

In [7]:
def hyperopt_performance(X_train, y_train, X_validation, y_validation, parameters):
    
    # initialize validation performance
    validation_weightedlogloss = []
    
    # iterate over number of training/validation splits
    for i in range(k_train_validation):

        # parameters
        param = parameters.copy()
        param['random_state'] = seed_
        param['n_estimators'] = int(param['n_estimators'])

        # train on training
        clf = RandomForestClassifier(**param).fit(X_train[i], y_train[i])

        # evaluate on validation
        y_pred = clf.predict_proba(X_validation[i])
        weightedlogloss = log_loss(y_validation[i], y_pred, labels=list(range(len(datasets[a]))))
        validation_weightedlogloss.append(weightedlogloss)
    
    # average validation performance over all folds
    mean_validation_weightedlogloss = np.mean(validation_weightedlogloss) + np.std(validation_weightedlogloss)/np.sqrt(len(validation_weightedlogloss))
    return mean_validation_weightedlogloss

### Custom functions

In [8]:
def dummy_y(y):
    
    dummy_y_ = [[],[]]
    for i in range(len(y)):
        if y[i] == 0:
            dummy_y_[0].append(1)
            dummy_y_[1].append(0)
        else:
            dummy_y_[0].append(0)
            dummy_y_[1].append(1)
    dummy_y_ = np.array(dummy_y_).T
    return dummy_y_

### Create output folders and files

In [9]:
# dataset names
dataset_names = []
for a in range(len(datasets)):
    dataset_names.append('+'.join(datasets[a]))
    os.mkdir('%s/%s' % (output_folder, dataset_names[a]))
    
# performance files
performance_files_weightedlogloss = pd.DataFrame(index=['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)]+['MEAN','STERR'], columns=dataset_names)
performance_files_weightedlogloss.to_csv('%s/weightedlogloss.csv' % output_folder)

performance_files_balancedaccuracy = pd.DataFrame(index=['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)]+['MEAN','STERR'], columns=dataset_names)
performance_files_balancedaccuracy.to_csv('%s/balancedaccuracy.csv' % output_folder)

performance_files_auroc = pd.DataFrame(index=['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)]+['MEAN','STERR'], columns=dataset_names)
performance_files_auroc.to_csv('%s/auroc.csv' % output_folder)


performance_files_sensitivity_50 = pd.DataFrame(index=['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)]+['MEAN','STERR'], columns=dataset_names)
performance_files_sensitivity_50.to_csv('%s/sensitivity_50.csv' % output_folder)

performance_files_specificity_50 = pd.DataFrame(index=['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)]+['MEAN','STERR'], columns=dataset_names)
performance_files_specificity_50.to_csv('%s/specificity_50.csv' % output_folder)

performance_files_ppv_50 = pd.DataFrame(index=['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)]+['MEAN','STERR'], columns=dataset_names)
performance_files_ppv_50.to_csv('%s/ppv_50.csv' % output_folder)

performance_files_npv_50 = pd.DataFrame(index=['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)]+['MEAN','STERR'], columns=dataset_names)
performance_files_npv_50.to_csv('%s/npv_50.csv' % output_folder)

performance_files_optimal_threshold = pd.DataFrame(index=['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)]+['MEAN','STERR'], columns=dataset_names)
performance_files_optimal_threshold.to_csv('%s/optimal_threshold.csv' % output_folder)

performance_files_sensitivity_optimal = pd.DataFrame(index=['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)]+['MEAN','STERR'], columns=dataset_names)
performance_files_sensitivity_optimal.to_csv('%s/sensitivity_optimal.csv' % output_folder)

performance_files_specificity_optimal = pd.DataFrame(index=['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)]+['MEAN','STERR'], columns=dataset_names)
performance_files_specificity_optimal.to_csv('%s/specificity_optimal.csv' % output_folder)

performance_files_ppv_optimal = pd.DataFrame(index=['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)]+['MEAN','STERR'], columns=dataset_names)
performance_files_ppv_optimal.to_csv('%s/ppv_optimal.csv' % output_folder)

performance_files_npv_optimal = pd.DataFrame(index=['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)]+['MEAN','STERR'], columns=dataset_names)
performance_files_npv_optimal.to_csv('%s/npv_optimal.csv' % output_folder)

### Pipeline

In [10]:
# iterate over datasets
for a in range(len(datasets)):
    print('-------------------------')
    print('DATASET: %s' % dataset_names[a])
    print('-------------------------')
    
    # iterate over number of training+validation/testing splits
    for b in range(n_splits_trainvalidation_test):
    
        print('Split %d' % (b+1))
        
        # load categorical conversion from each dataset
        features = []
        merged_features = []
        with open('_datasets/%s.pickle' % datasets[a][0], 'rb') as f:
            X_matrix, y_vector, categorical_conversion_old = pickle.load(f, encoding='latin1')
        features.append(['%s # %s' % (datasets[a][0], x) for x in X_matrix.columns.tolist()])
        categorical_conversion = {}
        for key in categorical_conversion_old:
            categorical_conversion['%s # %s' % (datasets[a][0], key)] = categorical_conversion_old[key]
        if len(categorical_conversion) > 0:
            merged_features.append([])
            for feature in features[0]:
                if feature.split(' | ')[0] not in categorical_conversion:
                    merged_features[-1].append(feature)
                elif feature.split(' | ')[0] not in merged_features[-1]:
                    merged_features[-1].append(feature.split(' | ')[0])
        else:
            merged_features.append(features[0].copy())
        for c in range(1,len(datasets[a])):
            with open('_datasets/%s.pickle' % datasets[a][c], 'rb') as f:
                X_matrix_, y_vector_, categorical_conversion_old = pickle.load(f, encoding='latin1')
            features.append(['%s # %s' % (datasets[a][c], x) for x in X_matrix_.columns.tolist()])
            categorical_conversion_ = {}
            for key in categorical_conversion_old:
                categorical_conversion_['%s # %s' % (datasets[a][c], key)] = categorical_conversion_old[key]
            categorical_conversion = {**categorical_conversion, **categorical_conversion_}
            if len(categorical_conversion_) > 0:
                merged_features.append([])
                for feature in features[c]:
                    if feature.split(' | ')[0] not in categorical_conversion_:
                        merged_features[-1].append(feature)
                    elif feature.split(' | ')[0] not in merged_features[-1]:
                        merged_features[-1].append(feature.split(' | ')[0])
            else:
                merged_features.append(features[c].copy())
        
        # load results from individual datasets
        validation_X = []
        validation_predictions = []
        X_test = []
        y_pred = []
        clf = []
        for c in range(len(datasets[a])):
            with open('%s/_individual/%s/iter_%d.pickle' % (output_folder, datasets[a][c], b+1), 'rb') as f:
                validation_X_, validation_y, validation_predictions_, X_test_, y_test, y_pred_, clf_ = pickle.load(f)
            validation_X.append(validation_X_)
            validation_predictions.append(validation_predictions_)
            X_test.append(X_test_)
            y_pred.append(y_pred_[:,1])
            clf.append(clf_)

        # combine predictions
        validation_predictions = np.concatenate([x.reshape(-1,1) for x in validation_predictions], axis=1)
        test_predictions = np.concatenate([x.reshape(-1,1) for x in y_pred], axis=1)
        
        # if more than one dataset
        if len(datasets[a]) > 1:
        
            # validation best classifier
            validation_best_classifier = []
            for i in range(len(validation_y)):
                if validation_y[i] == 0:
                    validation_best_classifier.append(np.argmin(validation_predictions[i,:]))
                elif validation_y[i] == 1:
                    validation_best_classifier.append(np.argmax(validation_predictions[i,:]))
            validation_best_classifier = np.array(validation_best_classifier)
           
            # subset features that are in any of the models
            features_in_models = []
            for c in range(len(clf)):
                importance = clf[c].feature_importances_
                features_in_models.extend([features[c][i] for i in range(len(features[c])) if importance[i] != 0])
                print('%s: %d/%d - %0.2f%% - %d features' % (datasets[a][c], len([x for x in validation_best_classifier if x==c]), len(validation_best_classifier), len([x for x in validation_best_classifier if x==c])/len(validation_best_classifier)*100, len([features[c][i] for i in range(len(features[c])) if importance[i] != 0])))

            # get combined dataset with features in models
            X_trainvalidation = pd.concat(validation_X, axis=1)[features_in_models]
            y_trainvalidation = validation_best_classifier.copy()
            X_test = pd.concat(X_test, axis=1)[features_in_models]
            
            # separate full
            sep1_index = []
            sep2_index = []
            skf = StratifiedKFold(n_splits=k_train_validation, shuffle=True, random_state=seed_)
            for sep1_, sep2_ in skf.split(X_trainvalidation, y_trainvalidation):
                sep1_index.append(list(sep1_))
                sep2_index.append(list(sep2_))
            X_train = []
            X_validation = []
            y_train = []
            y_validation = []
            for c in range(k_train_validation):
                X_train.append(X_trainvalidation.iloc[sep1_index[c]])
                X_validation.append(X_trainvalidation.iloc[sep2_index[c]])
                y_train.append(y_trainvalidation[sep1_index[c]])
                y_validation.append(y_trainvalidation[sep2_index[c]])
                
            # impute train+validation/testing
            imp = SimpleImputer()
            columns_to_add_back = [i for i,x in enumerate(X_trainvalidation.mean()) if pd.isna(x)]
            X_trainvalidation = imp.fit_transform(X_trainvalidation)
            for c in sorted(columns_to_add_back)[::-1]:
                X_trainvalidation = np.hstack((X_trainvalidation[:,:c], np.zeros(X_trainvalidation.shape[0]).reshape(-1,1), X_trainvalidation[:,c:]))
            X_test = imp.transform(X_test)
            for c in sorted(columns_to_add_back)[::-1]:
                X_test = np.hstack((X_test[:,:c], np.zeros(X_test.shape[0]).reshape(-1,1), X_test[:,c:]))
            scaler = StandardScaler()
            X_trainvalidation = scaler.fit_transform(X_trainvalidation)
            X_test = scaler.transform(X_test)
            
            # impute train/validation
            for c in range(k_train_validation):
                imp = SimpleImputer()
                columns_to_add_back = [i for i,x in enumerate(X_train[c].mean()) if pd.isna(x)]
                X_train[c] = imp.fit_transform(X_train[c])
                for d in sorted(columns_to_add_back)[::-1]:
                    X_train[c] = np.hstack((X_train[c][:,:d], np.zeros(X_train[c].shape[0]).reshape(-1,1), X_train[c][:,d:]))
                X_validation[c] = imp.transform(X_validation[c])
                for d in sorted(columns_to_add_back)[::-1]:
                    X_validation[c] = np.hstack((X_validation[c][:,:d], np.zeros(X_validation[c].shape[0]).reshape(-1,1), X_validation[c][:,d:]))
                scaler = StandardScaler()
                X_train[c] = scaler.fit_transform(X_train[c])
                X_validation[c] = scaler.transform(X_validation[c])
            
            # xgb parameter values
            criterion_options = ['gini','entropy']
            max_features_options = ['sqrt','log2']
            parameters = {
                'n_estimators': scope.int(hp.qloguniform('n_estimators', np.log(1e0), np.log(1e3), 1)),
                'criterion': hp.choice('criterion', criterion_options), 
                'max_depth': scope.int(hp.uniform('max_depth', 1, 11)),
                'min_samples_split': hp.uniform('min_samples_split', 0., 1.),
                'min_samples_leaf': hp.uniform('min_samples_leaf', 0., 0.5),
                'max_features': hp.choice('max_features', max_features_options)
                         }

            # save info for hyperopt
            with open('_files/data___.pickle','wb') as f:
                pickle.dump([X_train, y_train, X_validation, y_validation], f)

            # hyperopt to find best parameters
            trials = Trials()
            best = fmin(hyperopt_function, parameters, algo=tpe.suggest, max_evals=n_hyperopt_iterations, trials=trials, rstate=np.random.RandomState(seed_), verbose=0, show_progressbar=True)
            
            # parameters
            param = {'random_state':seed_, 'n_estimators':int(best['n_estimators']), 'criterion':criterion_options[best['criterion']], 'max_depth':best['max_depth'], 'min_samples_split':best['min_samples_split'], 'min_samples_leaf':best['min_samples_leaf'], 'max_features':max_features_options[best['max_features']]}

            # train on training
            clf = RandomForestClassifier(**param).fit(X_trainvalidation, y_trainvalidation)

            # evaluate on validation
            weights = clf.predict_proba(X_test)
            
            # calculate stacker performance - log loss
            test_best_classifier = []
            for i in range(len(y_test)):
                if y_test[i] == 0:
                    test_best_classifier.append(np.argmin(test_predictions[i,:]))
                elif y_test[i] == 1:
                    test_best_classifier.append(np.argmax(test_predictions[i,:]))
            test_best_classifier = np.array(test_best_classifier)
            
            # get predictions on test set
            y_pred = []
            for i in range(len(y_test)):
                y_pred.append(np.average(test_predictions[i,:], weights=weights[i,:]))
            y_pred = np.array(y_pred)
            
        # if only one dataset
        else:
            y_pred = y_pred[0]
            weights = np.array([1 for x in y_test]).reshape(-1,1)
        
        # save predictions
        with open('%s/%s/predictions_%d.pickle' % (output_folder,dataset_names[a],b+1) ,'wb') as f:
            pickle.dump([X_test_.index.tolist(), y_test, y_pred], f)
        
        # calculate test performance - weighted log loss
        pos_weight = len([x for x in y_test if x==0])/len([x for x in y_test if x==1])
        sample_weights = [pos_weight if x==1 else 1 for x in y_test]
        performance = log_loss(y_test, y_pred, sample_weight=sample_weights)
        performance_files_weightedlogloss.at['split_%d' % (b+1), dataset_names[a]] = performance
        performance_files_weightedlogloss.at['MEAN', dataset_names[a]] = np.nanmean(performance_files_weightedlogloss.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())
        performance_files_weightedlogloss.at['STERR', dataset_names[a]] = np.nanstd(performance_files_weightedlogloss.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())/np.sqrt(b+1)
        performance_files_weightedlogloss.to_csv('%s/weightedlogloss.csv' % output_folder)

        # calculate test performance - balanced accuracy
        y_pred_ = [1 if x>=0.5 else 0 for x in y_pred]
        performance = balanced_accuracy_score(y_test, y_pred_)
        performance_files_balancedaccuracy.at['split_%d' % (b+1), dataset_names[a]] = performance
        performance_files_balancedaccuracy.at['MEAN', dataset_names[a]] = np.nanmean(performance_files_balancedaccuracy.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())
        performance_files_balancedaccuracy.at['STERR', dataset_names[a]] = np.nanstd(performance_files_balancedaccuracy.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())/np.sqrt(b+1)
        performance_files_balancedaccuracy.to_csv('%s/balancedaccuracy.csv' % output_folder)

        # calculate test performance - auroc
        y_pred_ = np.concatenate((np.array([1-x for x in y_pred]).reshape(-1,1), y_pred.reshape(-1,1)), axis=1)
        performance = roc_auc_score(dummy_y(y_test), y_pred_)
        performance_files_auroc.at['split_%d' % (b+1), dataset_names[a]] = performance
        performance_files_auroc.at['MEAN', dataset_names[a]] = np.nanmean(performance_files_auroc.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())
        performance_files_auroc.at['STERR', dataset_names[a]] = np.nanstd(performance_files_auroc.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())/np.sqrt(b+1)
        performance_files_auroc.to_csv('%s/auroc.csv' % output_folder)
        
        # tp, tn, fp, fn
        tp = len([i for i in range(len(y_test)) if y_test[i]==1 and y_pred[i]>0.5])
        fp = len([i for i in range(len(y_test)) if y_test[i]==0 and y_pred[i]>0.5])
        tn = len([i for i in range(len(y_test)) if y_test[i]==0 and y_pred[i]<0.5])
        fn = len([i for i in range(len(y_test)) if y_test[i]==1 and y_pred[i]<0.5])
        
        # calculate test performance - sensitivity - 50
        performance = tp/(tp+fn)
        performance_files_sensitivity_50.at['split_%d' % (b+1), dataset_names[a]] = performance
        performance_files_sensitivity_50.at['MEAN', dataset_names[a]] = np.nanmean(performance_files_sensitivity_50.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())
        performance_files_sensitivity_50.at['STERR', dataset_names[a]] = np.nanstd(performance_files_sensitivity_50.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())/np.sqrt(b+1)
        performance_files_sensitivity_50.to_csv('%s/sensitivity_50.csv' % output_folder)
        
        # calculate test performance - specificity - 50
        performance = tn/(tn+fp)
        performance_files_specificity_50.at['split_%d' % (b+1), dataset_names[a]] = performance
        performance_files_specificity_50.at['MEAN', dataset_names[a]] = np.nanmean(performance_files_specificity_50.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())
        performance_files_specificity_50.at['STERR', dataset_names[a]] = np.nanstd(performance_files_specificity_50.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())/np.sqrt(b+1)
        performance_files_specificity_50.to_csv('%s/specificity_50.csv' % output_folder)
        
        # calculate test performance - ppv - 50
        performance = tp/(tp+fp)
        performance_files_ppv_50.at['split_%d' % (b+1), dataset_names[a]] = performance
        performance_files_ppv_50.at['MEAN', dataset_names[a]] = np.nanmean(performance_files_ppv_50.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())
        performance_files_ppv_50.at['STERR', dataset_names[a]] = np.nanstd(performance_files_ppv_50.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())/np.sqrt(b+1)
        performance_files_ppv_50.to_csv('%s/ppv_50.csv' % output_folder)
        
        # calculate test performance - npv - 50
        performance = tn/(tn+fn)
        performance_files_npv_50.at['split_%d' % (b+1), dataset_names[a]] = performance
        performance_files_npv_50.at['MEAN', dataset_names[a]] = np.nanmean(performance_files_npv_50.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())
        performance_files_npv_50.at['STERR', dataset_names[a]] = np.nanstd(performance_files_npv_50.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())/np.sqrt(b+1)
        performance_files_npv_50.to_csv('%s/npv_50.csv' % output_folder)
        
        # optimal threshold
        fpr, tpr, thresholds = roc_curve(y_test, y_pred)
        youden = [(1-fpr[i])+tpr[i] for i in range(len(thresholds))]
        top_index = []
        top_threshold = []
        for i in range(len(youden)):
            if youden[i] == np.max(youden):
                top_index.append(i)
                top_threshold.append(thresholds[i])
        distance_from_50 = [np.abs(x-0.5) for x in top_threshold]
        top_index = top_index[np.argmin(distance_from_50)]
        optimal_threshold = top_threshold[np.argmin(distance_from_50)]
        performance_files_optimal_threshold.at['split_%d' % (b+1), dataset_names[a]] = optimal_threshold
        performance_files_optimal_threshold.at['MEAN', dataset_names[a]] = np.nanmean(performance_files_optimal_threshold.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())
        performance_files_optimal_threshold.at['STERR', dataset_names[a]] = np.nanstd(performance_files_optimal_threshold.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())/np.sqrt(b+1)
        performance_files_optimal_threshold.to_csv('%s/optimal_threshold.csv' % output_folder)
        
        # tp, tn, fp, fn
        tp = len([i for i in range(len(y_test)) if y_test[i]==1 and y_pred[i]>optimal_threshold])
        fp = len([i for i in range(len(y_test)) if y_test[i]==0 and y_pred[i]>optimal_threshold])
        tn = len([i for i in range(len(y_test)) if y_test[i]==0 and y_pred[i]<optimal_threshold])
        fn = len([i for i in range(len(y_test)) if y_test[i]==1 and y_pred[i]<optimal_threshold])
        
        # calculate test performance - sensitivity - optimal
        performance = tp/(tp+fn)
        performance_files_sensitivity_optimal.at['split_%d' % (b+1), dataset_names[a]] = performance
        performance_files_sensitivity_optimal.at['MEAN', dataset_names[a]] = np.nanmean(performance_files_sensitivity_optimal.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())
        performance_files_sensitivity_optimal.at['STERR', dataset_names[a]] = np.nanstd(performance_files_sensitivity_optimal.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())/np.sqrt(b+1)
        performance_files_sensitivity_optimal.to_csv('%s/sensitivity_optimal.csv' % output_folder)
        
        # calculate test performance - specificity - optimal
        performance = tn/(tn+fp)
        performance_files_specificity_optimal.at['split_%d' % (b+1), dataset_names[a]] = performance
        performance_files_specificity_optimal.at['MEAN', dataset_names[a]] = np.nanmean(performance_files_specificity_optimal.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())
        performance_files_specificity_optimal.at['STERR', dataset_names[a]] = np.nanstd(performance_files_specificity_optimal.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())/np.sqrt(b+1)
        performance_files_specificity_optimal.to_csv('%s/specificity_optimal.csv' % output_folder)
        
        # calculate test performance - ppv - optimal
        performance = tp/(tp+fp)
        performance_files_ppv_optimal.at['split_%d' % (b+1), dataset_names[a]] = performance
        performance_files_ppv_optimal.at['MEAN', dataset_names[a]] = np.nanmean(performance_files_ppv_optimal.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())
        performance_files_ppv_optimal.at['STERR', dataset_names[a]] = np.nanstd(performance_files_ppv_optimal.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())/np.sqrt(b+1)
        performance_files_ppv_optimal.to_csv('%s/ppv_optimal.csv' % output_folder)
        
        # calculate test performance - npv - optimal
        performance = tn/(tn+fn)
        performance_files_npv_optimal.at['split_%d' % (b+1), dataset_names[a]] = performance
        performance_files_npv_optimal.at['MEAN', dataset_names[a]] = np.nanmean(performance_files_npv_optimal.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())
        performance_files_npv_optimal.at['STERR', dataset_names[a]] = np.nanstd(performance_files_npv_optimal.loc[['split_%d' % x for x in range(1,n_splits_trainvalidation_test+1)], dataset_names[a]].values.tolist())/np.sqrt(b+1)
        performance_files_npv_optimal.to_csv('%s/npv_optimal.csv' % output_folder)

-------------------------
DATASET: clinical+gene_all+mutation_onehot_all
-------------------------
Split 1
clinical: 243/732 - 33.20% - 305 features
gene_all: 122/732 - 16.67% - 718 features
mutation_onehot_all: 367/732 - 50.14% - 203 features
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 256/256 [03:37<00:00,  4.40it/s, best loss: 1.0094394386045817]
Split 2
clinical: 309/732 - 42.21% - 169 features
gene_all: 78/732 - 10.66% - 831 features
mutation_onehot_all: 345/732 - 47.13% - 2948 features
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 256/256 [02:22<00:00,  3.21it/s, best loss: 0.9561093541900673]
Split 3
clinical: 298/732 - 40.71% - 237 features
gene_all: 85/732 - 11.61% - 144 features
mutation_onehot_all: 349/732 - 47.68% - 110 fe