In [1]:
import numpy as np 
import math as math
import pandas as pd
import seaborn as sns
import sklearn as sk
import missingno as msno
#import matplot
import matplotlib.pyplot as plt
import sklearn.metrics as metrics
import dataframe_image as dfi

import matplotlib
import pathlib
import statistics
import itertools

from collections import Counter

from scipy.stats import probplot, binned_statistic

from pathlib import Path

from statistics import mean

from sklearn import set_config

from sklearn.model_selection import cross_validate, train_test_split, RandomizedSearchCV, GridSearchCV, RepeatedStratifiedKFold, KFold, cross_val_score, StratifiedKFold, RepeatedKFold
from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import plot_confusion_matrix  

#from sklearn.metrics import mean_squared_error, accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler, LabelBinarizer, FunctionTransformer
from sklearn.tree import plot_tree
from sklearn.feature_selection import SelectFromModel
from sklearn.neural_network import MLPRegressor

## DEFINED FUNCTIONS 

In [2]:
def clinical_import():
    clinical_info = pd.read_csv('UPENN-GBM_clinical_info_v1.0.csv',index_col=0)
    clinical_info.replace({'Not Available':np.NaN,'Not Applicable':np.NaN},inplace=True)
    clinical_info['Survival_from_surgery_days'] = pd.to_numeric(clinical_info['Survival_from_surgery_days']) 
    clinical_info['Time_since_baseline_preop'] = pd.to_numeric(clinical_info['Time_since_baseline_preop'])
    clinical_info['KPS'] = pd.to_numeric(clinical_info['KPS'])
    clinical_info['PsP_TP_score'] = pd.to_numeric(clinical_info['PsP_TP_score'])
    return clinical_info 

radiomic_content = {}   
for file_path in pathlib.Path('./RADIOMIC FEATURES').glob('Radiomic_Features_CaPTk_*.csv'):
    df = pd.read_csv(file_path, index_col=0)
    radiomic_content[file_path.stem] = df
    
def radiomic_data_per_subject(subjectID):
    level1 = [] 
    level2 = [] #imaging modes 
    level3 = [] #dataframes
    
    for key in radiomic_content.keys():
        temp_df = radiomic_content[key]
    
        '''create dataframe which includes feature values for all modalities (for each subject)'''
        if subjectID in list(temp_df.index):
            level1.append(subjectID)
            temp_df = pd.DataFrame(temp_df.xs(subjectID))
            '''column headers == new indices (aka radiomic features)'''
            '''subjectID == new column header (to be replaced with type of scan)'''
            
        
            if 'automaticsegm' in key:
                if 'DSC_ap' in key:
                    category = 'AUTO_SEGMENTATION_DSC'
                    mode = str(key.replace('Radiomic_Features_CaPTk_automaticsegm_DSC_ap-',''))
                    temp_df.set_axis(labels = [name.replace('DSC_ap-' + mode + '_','') for name in list(temp_df.index)], axis='index', inplace=True)
                    temp_df.rename(columns={subjectID:mode},inplace=True)
                    
                    #if category not in level2:
                        #level2.append(category)
                    level2.append((category,mode))
                    
                else:
                    category = 'AUTO_SEGMENTATION'
                    mode = str(key.replace('Radiomic_Features_CaPTk_automaticsegm_',''))
                    temp_df.set_axis(labels = [name.replace(mode + '_','') for name in list(temp_df.index)], axis='index', inplace=True)
                    temp_df.rename(columns={subjectID:mode},inplace=True)
                    
                    #if category not in level2:
                        #level2.append(category) 
                    level2.append((category,mode))
                    
            else:
                category = 'SEGMENTATION'
                mode = str(key.replace('Radiomic_Features_CaPTk_segm_',''))
                temp_df.set_axis(labels = [name.replace(mode + '_','') for name in list(temp_df.index)], axis='index', inplace=True)
                temp_df.rename(columns={subjectID:mode},inplace=True)
                
                #if category not in level2:
                    #level2.append(category)
                level2.append((category,mode))

            level3.append(temp_df)
                  
        else:
            continue
            
    radiomic_data = pd.concat(level3, keys=level2, axis=1) 
    radiomic_data = radiomic_data.droplevel(1,axis=1)
            
    radiomic_data = radiomic_data.iloc[0:20,:] 
    
    return radiomic_data

def split_list_by_type(mixed_list):
    int_list = [i for i in mixed_list if isinstance(i, int)]
    str_list = [i for i in mixed_list if isinstance(i, str)]
    return int_list, str_list

def myFunc(unit):
    return len(unit)
 

def data_processor(df, return_scaler=False):
    numeric_df = df.select_dtypes(include=['float','int64']) 
    numeric_int_columns, numeric_str_columns = split_list_by_type(numeric_df.columns)
    numeric_str_columns.sort(reverse=True, key=myFunc)
    numeric_columns = numeric_str_columns + numeric_int_columns
    numeric_df = numeric_df.reindex(columns=numeric_columns)
    print('0th index of scaler', numeric_df.columns[0]) #ensure survival - used for reverse scaling
    
    categorical_df = df.select_dtypes(include=['object','bool']) #.loc[:,['Gender','IDH1','GTR_over90percent']]
  
    scaler = StandardScaler()
    scaler.fit(numeric_df.values)
    scaled_df = pd.DataFrame(scaler.transform(numeric_df.values), columns=numeric_df.columns, index=numeric_df.index)

    le = LabelEncoder()
    columns = categorical_df.columns
    categorical_df[columns] = categorical_df[columns].apply(lambda col:le.fit_transform(col))
    
    processed_df = pd.merge(categorical_df,scaled_df,left_index=True,right_index=True)
    
    xdata = processed_df.loc[:,processed_df.columns!='Survival_from_surgery_days']
    ydata = processed_df[['Survival_from_surgery_days']]
    
    if return_scaler is False:
        return xdata, ydata
    else:
        return scaler   
    
def dropped_column_df(df,threshold):
    dropped_columns = []
    for col in df.columns:
        if df[col].isnull().sum() > threshold*(df.shape[0]):
            #dropped_columns.append(col)
            df = df.drop(col, axis='columns')
    print('number of columns dropped', len(dropped_columns))
    return df


def filter_columns(df,threshold):
    numeric_data = df.select_dtypes(include=['float','int64']) 
    categorical_data = df.loc[:,['Gender','IDH1','GTR_over90percent']]
    scaler = StandardScaler()
    scaler.fit(numeric_data.values)
    scaled_df = pd.DataFrame(scaler.transform(numeric_data.values), columns=numeric_data.columns)
    col_corr = set() # set of all the names of deleted columns
    corr_matrix = scaled_df.corr(method='kendall') #most appropriate method 
    
    for i in range(len(corr_matrix.columns)):
        for j in range(i):
            if (corr_matrix.iloc[i, j] >= threshold) and (corr_matrix.columns[j] not in col_corr):
                colname = corr_matrix.columns[i] # getting the name of column
                col_corr.add(colname)
                if colname in scaled_df.columns:
                    del scaled_df[colname] # deleting the column from the dataset
    
    reduced_columns = list(scaled_df.columns)
    #manually reinclude clinical data
    if 'Age_at_scan_years' not in reduced_columns:
        reduced_columns.append('Age_at_scan_years')
    if 'Survival_from_surgery_days' not in reduced_columns:   
        reduced_columns.append('Survival_from_surgery_days')
    for cat_col in categorical_data.columns:
        reduced_columns.append(cat_col)   
    reduced_df = df.loc[:, reduced_columns]
    return reduced_df #original unscaled values

### HYPER-PARAMETER TUNING

In [3]:
C = clinical_import()

C_base = C.dropna(subset=['Survival_from_surgery_days'])
C_base = C_base.drop('PsP_TP_score',axis='columns')
C_base = C_base.drop('Time_since_baseline_preop', axis='columns')
C_base = C_base.dropna(subset=['GTR_over90percent'])

R_base = pd.DataFrame(columns=[i for i in range(0,1320)]) #matching only those which have necessary clinical data
  
for patient in C_base.index:
    radiomic = radiomic_data_per_subject(patient)
    radiomic_list = radiomic.values.tolist()
    flattened_radiomic_list = list(itertools.chain.from_iterable(radiomic_list))
    R_base = R_base.append(pd.Series(flattened_radiomic_list, index=R_base.columns[:len(flattened_radiomic_list)]), ignore_index=True)

R_base['Subject_ID'] = list(C_base.index)
R_base = R_base.set_index('Subject_ID')

df = pd.merge(C_base,R_base,left_index=True,right_index=True)
print(df.shape)

(424, 1327)


In [4]:
df0 = dropped_column_df(df,0.0).dropna(axis='index')
print(df0.shape)
df1 = dropped_column_df(df,0.1).dropna(axis='index')
print(df1.shape)
df2 = dropped_column_df(df,0.2).dropna(axis='index')
print(df2.shape)
df3 = dropped_column_df(df,0.3).dropna(axis='index')
print(df3.shape)
df4 = dropped_column_df(df,0.4).dropna(axis='index')
print(df4.shape)
df5 = dropped_column_df(df,0.5).dropna(axis='index')
print(df5.shape)
df6 = dropped_column_df(df,0.6).dropna(axis='index')
print(df6.shape)
df7 = dropped_column_df(df,0.7).dropna(axis='index')
print(df7.shape)
df8 = dropped_column_df(df,0.8).dropna(axis='index')
print(df8.shape)
df9 = dropped_column_df(df,0.9).dropna(axis='index')
print(df9.shape)
df10 = dropped_column_df(df,1.0).dropna(axis='index')
print(df10.shape)

num_df = df.select_dtypes(include=['float','int64']) 
imputer = KNNImputer(n_neighbors=2)
imputer.fit(num_df)
imputed_num_df = pd.DataFrame(imputer.transform(num_df), columns=num_df.columns, index=num_df.index)
imputed_df = pd.merge(imputed_num_df, df.select_dtypes(exclude=['float','int64']), left_index=True, right_index=True)  
print('imputed:', imputed_df.shape)

drop_row_df = imputed_df.dropna(axis='index')
print(drop_row_df.shape)
drop_col_df = imputed_df.dropna(axis='columns')
print(drop_col_df.shape)


filtered_df0 = filter_columns(df0, threshold=0.75)
print(filtered_df0.shape)
filtered_df1 = filter_columns(df1, threshold=0.75)
print(filtered_df1.shape)
filtered_df2 = filter_columns(df2, threshold=0.75)
print(filtered_df2.shape)
filtered_df3 = filter_columns(df3, threshold=0.75)
print(filtered_df3.shape)
filtered_df4 = filter_columns(df4, threshold=0.75)
print(filtered_df4.shape)
filtered_df5 = filter_columns(df5, threshold=0.75)
print(filtered_df5.shape)
filtered_df6 = filter_columns(df6, threshold=0.75)
print(filtered_df6.shape)
filtered_df_col = filter_columns(drop_col_df, threshold=0.75)
print(filtered_df_col.shape)
filtered_df_row = filter_columns(drop_row_df, threshold=0.75)
print(filtered_df_row.shape)

number of columns dropped 0
(424, 225)
number of columns dropped 0
(423, 245)
number of columns dropped 0
(363, 485)
number of columns dropped 0
(330, 665)
number of columns dropped 0
(330, 665)
number of columns dropped 0
(168, 666)
number of columns dropped 0
(168, 666)
number of columns dropped 0
(59, 966)
number of columns dropped 0
(48, 1326)
number of columns dropped 0
(14, 1327)
number of columns dropped 0
(14, 1327)
imputed: (424, 1327)
(222, 1327)
(424, 1326)
(424, 129)
(423, 149)
(363, 366)
(330, 448)
(330, 448)
(168, 434)
(168, 434)
(424, 630)
(222, 600)


In [5]:
df_set = [df0,df1,df2,df3,df4,df5,df6,df7,df8,df9,df10,
          drop_row_df,drop_col_df,
          filtered_df0,filtered_df1,filtered_df2,filtered_df3,filtered_df4,filtered_df5,filtered_df6,
          filtered_df_row,filtered_df_col]

rf_set = [RandomForestRegressor(n_estimators=1000, 
                                     min_samples_split=2, 
                                     min_samples_leaf=2, 
                                     max_features='auto',
                                     max_depth=70, 
                                     bootstrap=True), 
          RandomForestRegressor(n_estimators=1800,  
                                 min_samples_split=5,  
                                 min_samples_leaf=4,  
                                 max_features='auto',  
                                 max_depth=100,  
                                 bootstrap=True),
          RandomForestRegressor(n_estimators=1800, 
                                 min_samples_split=5, 
                                 min_samples_leaf=4, 
                                 max_features='auto',
                                 max_depth=100, 
                                 bootstrap=True),
          RandomForestRegressor(n_estimators=600, 
                                 min_samples_split=5, 
                                 min_samples_leaf=4, 
                                 max_features='auto', 
                                 max_depth=10, 
                                 bootstrap=True),
          RandomForestRegressor(n_estimators=1000, 
                                 min_samples_split=5, 
                                 min_samples_leaf=4, 
                                 max_features='auto', 
                                 max_depth=80, 
                                 bootstrap=True),
          RandomForestRegressor(n_estimators=200, 
                                     min_samples_split=5, 
                                     min_samples_leaf=2, 
                                     max_features='auto', 
                                     max_depth=20, 
                                     bootstrap=True),
          RandomForestRegressor(n_estimators= 200, 
                                 min_samples_split= 5, 
                                 min_samples_leaf= 4, 
                                 max_features= 'auto', 
                                 max_depth= 110, 
                                 bootstrap= False),
          RandomForestRegressor(n_estimators=1200, 
                                     min_samples_split=5, 
                                     min_samples_leaf=2, 
                                     max_features='auto', 
                                     max_depth=110, 
                                     bootstrap=False),
          RandomForestRegressor(n_estimators=200,
                                     min_samples_split=5, 
                                     min_samples_leaf=2,
                                     max_features='auto',
                                     max_depth=90, 
                                     bootstrap=False),
          RandomForestRegressor(n_estimators=600, 
                                 min_samples_split=10, 
                                 min_samples_leaf=4, 
                                 max_features='auto', 
                                 max_depth=None, 
                                 bootstrap=True),
          RandomForestRegressor(n_estimators=1000, 
                                 min_samples_split=5, 
                                 min_samples_leaf=4, 
                                 max_features='auto', 
                                 max_depth=80, 
                                 bootstrap=True),
          RandomForestRegressor(n_estimators=200, 
                                 min_samples_split=2, 
                                 min_samples_leaf=2, 
                                 max_features='sqrt', 
                                 max_depth=80, 
                                 bootstrap=True),
          RandomForestRegressor(n_estimators=400, 
                                 min_samples_split=10, 
                                 min_samples_leaf=4, 
                                 max_features='sqrt', 
                                 max_depth=30, 
                                 bootstrap=True),
          RandomForestRegressor(n_estimators=400, 
                                 min_samples_split=10, 
                                 min_samples_leaf=4, 
                                 max_features='sqrt', 
                                 max_depth=30, 
                                 bootstrap=True),
          RandomForestRegressor(n_estimators=800, 
                                 min_samples_split=10, 
                                 min_samples_leaf=4, 
                                 max_features='auto', 
                                 max_depth=90, 
                                 bootstrap=True),
          RandomForestRegressor(n_estimators=1400, 
                                     min_samples_split=10, 
                                     min_samples_leaf=4, 
                                     max_features='sqrt', 
                                     max_depth=40, 
                                     bootstrap=True),
           RandomForestRegressor(n_estimators=1800, 
                                 min_samples_split=5, 
                                 min_samples_leaf=4, 
                                 max_features='auto', 
                                 max_depth=100, 
                                 bootstrap=True),
          RandomForestRegressor(n_estimators=600, 
                                     min_samples_split=10, 
                                     min_samples_leaf=4, 
                                     max_features='auto', 
                                     max_depth=None, 
                                     bootstrap=True)
         ]


mlp_set = [MLPRegressor(solver='sgd', 
                             learning_rate='adaptive', 
                             hidden_layer_sizes=133, 
                             activation='logistic'),
           MLPRegressor(solver='sgd', 
                              learning_rate='adaptive', 
                              hidden_layer_sizes=155, 
                              activation='logistic'),
           MLPRegressor(solver='sgd', 
                             learning_rate='adaptive', 
                             hidden_layer_sizes=133, 
                             activation='logistic'),
           MLPRegressor(solver='sgd', 
                        learning_rate='constant', 
                        hidden_layer_sizes=155, 
                        activation='logistic'),
           MLPRegressor(solver='sgd', 
                        learning_rate='adaptive', 
                        hidden_layer_sizes=144, 
                        activation='logistic'),
           MLPRegressor(solver='sgd', 
                         learning_rate='adaptive', 
                         hidden_layer_sizes=111, 
                         activation='logistic'),
           MLPRegressor(solver='sgd', 
                         learning_rate='constant', 
                         hidden_layer_sizes=155, 
                         activation='logistic'),
           MLPRegressor(solver='sgd', 
                        learning_rate='constant', 
                        hidden_layer_sizes=166, 
                        activation='logistic'),
           MLPRegressor(solver='lbfgs', 
                        learning_rate='constant', 
                        hidden_layer_sizes=188, 
                        activation='logistic'),
           MLPRegressor(solver='sgd', 
                          learning_rate='adaptive', 
                          hidden_layer_sizes=144, 
                          activation='logistic'),
           MLPRegressor(solver='adam', 
                          learning_rate='adaptive', 
                          hidden_layer_sizes=188, 
                          activation='logistic'),
           MLPRegressor(solver='sgd', 
                         learning_rate='adaptive', 
                         hidden_layer_sizes=111, 
                         activation='logistic'),
           MLPRegressor(solver='sgd', 
                         learning_rate='constant', 
                         hidden_layer_sizes=111, 
                         activation='logistic'),
           MLPRegressor(solver='sgd', 
                         learning_rate='adaptive', 
                         hidden_layer_sizes=155, 
                         activation='logistic'),
           MLPRegressor(solver='sgd', 
                         learning_rate='constant', 
                         hidden_layer_sizes=155, 
                         activation='logistic'),
           MLPRegressor(solver='sgd', 
                         learning_rate='constant', 
                         hidden_layer_sizes=111, 
                         activation='logistic'),
           MLPRegressor(solver='sgd', 
                         learning_rate='adaptive', 
                         hidden_layer_sizes=133, 
                         activation='logistic'),
           MLPRegressor(solver='sgd', 
                         learning_rate='adaptive', 
                         hidden_layer_sizes=144, 
                         activation='logistic')
          ]

In [38]:
unscaled_y = {}
rf_r2_score = {}
rf_mae = {}
rf_rmse = {}
rf_unscaled_predict = {}
mlp_r2_score = {}
mlp_mae = {}
mlp_rmse = {}
mlp_unscaled_predict = {}
rf_diff = {}
mlp_diff = {}

i = 0
for df,rf_model,mlp_model in zip(df_set,rf_set,mlp_set):
    
    xdata, ydata = data_processor(df,return_scaler=False)  
    scaler = data_processor(df,return_scaler=True)

    rkf = RepeatedKFold(n_splits=2, n_repeats=10, random_state=0)
    
    temp_unscaled_y = []
    temp_rf_r2_score = []
    temp_rf_mae = []
    temp_rf_rmse = []
    temp_rf_unscaled_predict = []
    temp_mlp_r2_score = []
    temp_mlp_mae = []
    temp_mlp_rmse = []
    temp_mlp_unscaled_predict = []
    temp_rf_diff = []
    temp_mlp_diff = []
    
    
    for train_index, test_index in rkf.split(xdata, ydata): 

        x_train_fold, x_test_fold = xdata.iloc[train_index], xdata.iloc[test_index]
        y_train_fold, y_test_fold = ydata.iloc[train_index], ydata.iloc[test_index]
    
        temp_unscaled_y.append(y_test_fold*(np.sqrt(scaler.var_[0])) + scaler.mean_[0])

        #RANDOM FOREST
        rf_scores = cross_val_score(rf_model, x_train_fold, y_train_fold , scoring='neg_mean_absolute_error', cv=5, n_jobs=-1, error_score='raise')
        temp_rf_mae.append(np.mean(np.absolute(rf_scores)))
        temp_rf_rmse.append(np.sqrt(np.mean(np.absolute(rf_scores))))
        rf_model.fit(x_train_fold, np.ravel(y_train_fold))
        temp_rf_r2_score.append(rf_model.score(x_test_fold, y_test_fold))
        rf_prediction = rf_model.predict(x_test_fold)
        temp_rf_unscaled_predict.append(rf_prediction*(np.sqrt(scaler.var_[0])) + scaler.mean_[0])
    
        #MULTI-LAYER PERCEPTRON
        mlp_scores = cross_val_score(mlp_model, x_train_fold, y_train_fold , scoring='neg_mean_absolute_error', cv=5, n_jobs=-1, error_score='raise')
        temp_mlp_mae.append(np.mean(np.absolute(mlp_scores)))
        temp_mlp_rmse.append(np.sqrt(np.mean(np.absolute(mlp_scores))))
        mlp_model.fit(x_train_fold, np.ravel(y_train_fold))
        temp_mlp_r2_score.append(mlp_model.score(x_test_fold, y_test_fold))
        mlp_prediction = mlp_model.predict(x_test_fold)
        temp_mlp_unscaled_predict.append(mlp_prediction*(np.sqrt(scaler.var_[0])) + scaler.mean_[0])
    
        N_GTR = x_test_fold.copy()
        N_GTR['GTR_over90percent'] = '0' #NO 
        rf_N = (rf_model.predict(N_GTR))*(np.sqrt(scaler.var_[0])) + scaler.mean_[0]
        mlp_N = (mlp_model.predict(N_GTR))*(np.sqrt(scaler.var_[0])) + scaler.mean_[0]
    
        Y_GTR = x_test_fold.copy()
        Y_GTR['GTR_over90percent'] = '1' #YES
        rf_Y = (rf_model.predict(Y_GTR))*(np.sqrt(scaler.var_[0])) + scaler.mean_[0]
        mlp_Y = (mlp_model.predict(Y_GTR))*(np.sqrt(scaler.var_[0])) + scaler.mean_[0]
    
        temp_rf_diff.append(rf_Y - rf_N)
        temp_mlp_diff.append(mlp_Y - mlp_N)
        
        
    unscaled_y[i] = temp_unscaled_y
    rf_r2_score[i] =  temp_rf_r2_score
    rf_mae[i] = temp_rf_mae
    rf_rmse[i] = temp_rf_rmse
    rf_unscaled_predict[i] = temp_rf_unscaled_predict
    mlp_r2_score[i] = temp_mlp_r2_score
    mlp_mae[i] = temp_mlp_mae
    mlp_rmse[i] = temp_mlp_rmse
    mlp_unscaled_predict[i] = temp_mlp_unscaled_predict
    rf_diff[i] = temp_rf_diff
    mlp_diff[i] = temp_mlp_diff
        
    i = i+1

0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days
0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days
0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days




0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


0th index of scaler Survival_from_surgery_days
0th index of scaler Survival_from_surgery_days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


In [36]:
for i in range(0,17): 
    print('\n')
    print(i)
    print('RF r2', np.mean(rf_r2_score[i]))
    print('RF mae', np.mean(rf_mae[i]))
    print('RF rmse', np.mean(rf_rmse[i]))
    print('MLP r2', np.mean(mlp_r2_score[i]))
    print('MLP mae', np.mean(mlp_mae[i]))
    print('MLP rmse', np.mean(mlp_rmse[i]))



0
RF r2 417.41745283018867
RF mae 0.7352309902323617
RF rmse 0.8569144055078333
MLP r2 0.005555865074638977
MLP mae 0.7340952251673685
MLP rmse 0.8564294522966239


1
RF r2                     Survival_from_surgery_days
ID                                            
UPENN-GBM-00002_11                         NaN
UPENN-GBM-00006_11                         NaN
UPENN-GBM-00008_11                         NaN
UPENN-GBM-00009_11                         NaN
UPENN-GBM-00011_11                         NaN
UPENN-GBM-00012_11                         NaN
UPENN-GBM-00013_11                         NaN
UPENN-GBM-00014_11                         NaN
UPENN-GBM-00016_11                         NaN
UPENN-GBM-00017_11                         NaN
UPENN-GBM-00018_11                         NaN
UPENN-GBM-00020_11                         NaN
UPENN-GBM-00021_11                         NaN
UPENN-GBM-00026_11                         NaN
UPENN-GBM-00029_11                         NaN
UPENN-GBM-00030_11        

  return array(a, dtype, copy=False, order=order, subok=True)


0.7363109631394688
RF rmse 0.8574312702597245
MLP r2 -0.00020206072645719274
MLP mae 0.7416202393371176
MLP rmse 0.8607348153204134


5
RF r2 444.5238095238095
RF mae 0.7756872900139689
RF rmse 0.8802143688745749
MLP r2 -0.050983215718912886
MLP mae 0.7818233722738728
MLP rmse 0.8837164458830239


6
RF r2 444.5238095238095
RF mae 0.9860739674289055
RF rmse 0.9912772619758551
MLP r2 -0.054611035915280215
MLP mae 0.7838864809010768
MLP rmse 0.8849984506773367


7
RF r2                     Survival_from_surgery_days
ID                                            
UPENN-GBM-00082_11                         NaN
UPENN-GBM-00088_11                         NaN
UPENN-GBM-00091_11                         NaN
UPENN-GBM-00093_11                         NaN
UPENN-GBM-00115_11                         NaN
UPENN-GBM-00117_11                         NaN
UPENN-GBM-00118_11                         NaN
UPENN-GBM-00119_11                         NaN
UPENN-GBM-00121_11                         NaN
UPENN-GBM-0

In [37]:
rf_r2_score[2]

[                    Survival_from_surgery_days
 ID                                            
 UPENN-GBM-00006_11                       626.0
 UPENN-GBM-00012_11                       882.0
 UPENN-GBM-00013_11                       472.0
 UPENN-GBM-00014_11                       272.0
 UPENN-GBM-00016_11                       509.0
 UPENN-GBM-00021_11                       510.0
 UPENN-GBM-00030_11                       502.0
 UPENN-GBM-00033_11                       707.0
 UPENN-GBM-00034_11                       464.0
 UPENN-GBM-00040_11                       360.0
 UPENN-GBM-00041_11                       598.0
 UPENN-GBM-00043_11                       172.0
 UPENN-GBM-00057_11                       673.0
 UPENN-GBM-00058_11                       392.0
 UPENN-GBM-00060_11                       445.0
 UPENN-GBM-00065_11                       111.0
 UPENN-GBM-00066_11                       493.0
 UPENN-GBM-00068_11                       389.0
 UPENN-GBM-00072_11                     