In [1]:
import numpy as np
import pandas as pd
import json
from epilepsypcm.utils.outcome_params import node_location, early_propogation, irritative_zone

import warnings
warnings.filterwarnings("ignore")

from epilepsypcm.utils.make_df import make_df, get_df_list, concat_dfs
from epilepsypcm.models.base_models import *
from sklearn.metrics import auc

all_positive_patients = pd.read_csv("df_final.csv", sep = ',').drop(columns = "Unnamed: 0")

print(all_positive_patients["outcome"].value_counts())


0.0    259
1.0     56
Name: outcome, dtype: int64


### Run Wilcoxon rank-sum test (WRS) on individual features to ensure no statistical difference between 0 and 1 classes in data.

In [4]:
from scipy.stats import ranksums

df_0 = all_positive_patients[all_positive_patients["outcome"] == 0].drop(columns = ["Channels", "outcome", "patient"])
df_1 = all_positive_patients[all_positive_patients["outcome"] == 1].drop(columns = ["Channels", "outcome", "patient"])

columns = df_0.columns

for col in columns:
    sample_0 = list(df_0[col])
    sample_1 = list(df_1[col])
    
    ranksum = ranksums(sample_0, sample_1)
    
    
    print(col + " P-value: ", + ranksum[1])
    print("Mean 0s: " + str(np.mean(sample_0)))
    print("Mean 1s: " + str(np.mean(sample_1)))
    
    
    

IZ P-value:  0.0023563432436153565
Mean 0s: 0.11583011583011583
Mean 1s: 0.375
EP P-value:  0.8963635749431214
Mean 0s: 0.10038610038610038
Mean 1s: 0.08928571428571429
SigResp P-value:  0.00033514514531294166
Mean 0s: 0.22879118035887835
Mean 1s: 0.13806592421176533
SigStim P-value:  0.00565856606597545
Mean 0s: 0.22555297562527293
Mean 1s: 0.15242027841761563
N1RespAvg P-value:  0.0036033473811909558
Mean 0s: 8.120916897381566
Mean 1s: 5.268444088131206
N1RespSDV P-value:  0.10669289035395962
Mean 0s: 13.83138403523032
Mean 1s: 9.746613708438597
N2RespAvg P-value:  6.16523659411747e-05
Mean 0s: 6.647236182379371
Mean 1s: 4.185707169945352
N2RespSDV P-value:  0.0015422919666230085
Mean 0s: 8.703538695872686
Mean 1s: 5.2858279332347635
P2RespAvg P-value:  2.496349892694341e-05
Mean 0s: 4.643365880461401
Mean 1s: 3.0016979251075093
P2RespSDV P-value:  0.0008144632129876115
Mean 0s: 6.551297092580054
Mean 1s: 4.409637924287424
N1StimAvg P-value:  0.2635236420992709
Mean 0s: 7.90818202071

### Run WRS test on data in Figure 2 to show statistical significance

In [37]:
plot_roc = False
plot_pr = False


X_list = ["Channels", "N1RespAvg", "N1RespSDV", "N2RespAvg", "N2RespSDV", "P2RespAvg", "P2RespSDV", 
          "N1StimAvg", "N1StimSDV", "N2StimAvg", "N2StimSDV", "P2StimAvg", "P2StimSDV"]

smote = False
max_depth = 20
rf, test_channels, y_pred_nodal, y_test, tpr, fpr, roc_thresholds, precision, recall = random_forest(all_positive_patients, X_list, max_depth, plot_roc, plot_pr, smote)

X_list = ["Channels","InDegree", "OutDegree", "Closeness"]
smote = False
max_depth =20
rf, test_channels, y_pred_network, y_test, tpr, fpr, roc_thresholds, precision, recall = random_forest(all_positive_patients, X_list, max_depth, plot_roc, plot_pr, smote)



X_list = ["Channels", "SigResp", "N1RespAvg", "N1RespSDV", "N2RespAvg", "N2RespSDV", "P2RespAvg", "P2RespSDV", 
          "SigStim", "N1StimAvg", "N1StimSDV", "N2StimAvg", "N2StimSDV", "P2StimAvg", "P2StimSDV",
         "InDegree", "OutDegree", "EV", "Closeness"]
smote = False
max_depth = 20
rf, test_channels, y_pred_nodal_network, y_test, tpr, fpr, roc_thresholds, precision, recall = random_forest(all_positive_patients, X_list, max_depth, plot_roc, plot_pr, smote)

 

In [43]:
Nodal = 0.81
Network = 0.85
Nodal_Network = 0.87

ranksum_nodal_v_network = ranksums(y_pred_nodal, y_pred_network)
ranksum_nodal_v_both = ranksums(y_pred_nodal, y_pred_nodal_network)
ranksum_network_v_both = ranksums(y_pred_network, y_pred_nodal_network)

ranksum_nodal_v_network
ranksum_nodal_v_both
ranksum_network_v_both

RanksumsResult(statistic=-0.2816760931606028, pvalue=0.778191887422782)

### Find equation for importance in the model and add table into Feature Importance section

In [60]:
from sklearn.tree import export_graphviz

# feature importances

X_list = ["Channels", "SigResp", "N1RespAvg", "N1RespSDV", "N2RespAvg", "N2RespSDV", "P2RespAvg", "P2RespSDV", 
          "SigStim", "N1StimAvg", "N1StimSDV", "N2StimAvg", "N2StimSDV", "P2StimAvg", "P2StimSDV",
         "InDegree", "OutDegree", "EV", "Closeness"]

#random forest + network features
smote = False
max_depth = 20
rf, y_pred, test_channels, y_test, tpr, fpr, roc_thresholds, precision, recall = random_forest(all_positive_patients, X_list, max_depth, plot_roc, plot_pr, smote)

#rf.feature_importances_

#pd.DataFrame(columns, rf.feature_importances_)

fi = pd.DataFrame(X_list[1:], rf.feature_importances_).reset_index()
fi

Unnamed: 0,index,0
0,0.035151,SigResp
1,0.045085,N1RespAvg
2,0.069283,N1RespSDV
3,0.07041,N2RespAvg
4,0.061182,N2RespSDV
5,0.063788,P2RespAvg
6,0.05569,P2RespSDV
7,0.050703,SigStim
8,0.060091,N1StimAvg
9,0.083526,N1StimSDV
