In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import json
import glob
import os

from epilepsypcm.utils.outcome_params import seizure_onset_zone, engel_score, early_propogation, irritative_zone
from sklearn.metrics import auc
from epilepsypcm.models.base_models import *

# INPUT
# patient = string format, patient number
# paths = path to CCEP response files, in os format
# OUTPUT
# df = dataframe for one patient with
#       X features with columns: chNames, significant, n1, n2, p2 z scores,
#       n1, n2, p2 latencies, and flipped
#       and associated y outcome labels
def make_df(patient, paths):
    #extracting info from each response file
    n = 0
    stimChs = []
    for i in range(len(paths)):
        chNames = []
        # load info into python dictionary
        data = json.load(open(paths[i]))
        p_name = patient

        # Get list of channel names
        for key in data["time"]: chNames.append(key)
        # loop over each channel, and extract average time series and information about the peaks

        if n < 1:
            avgResp = np.empty((len(paths), len(chNames), len(data['time'][chNames[0]])))
            significant = np.empty((len(paths), len(chNames)))
            n1Zscore = np.empty((len(paths), len(chNames)))
            n2Zscore = np.empty((len(paths), len(chNames)))
            p2Zscore = np.empty((len(paths), len(chNames)))
            n1Latency = np.empty((len(paths), len(chNames)))
            n2Latency = np.empty((len(paths), len(chNames)))
            p2Latency = np.empty((len(paths), len(chNames)))
            flipped = np.empty((len(paths), len(chNames)))
            n += 1
            samplingRate = np.empty((len(paths)))
            window = np.empty((len(paths), 2))

        for j in range(len(chNames)):
            avgResp[i][j] = data['time'][chNames[j]]
            significant[i][j] = data['significant'][chNames[j]]
            n1Zscore[i][j] = data['zscores'][chNames[j]]['n1'][1]
            n2Zscore[i][j] = data['zscores'][chNames[j]]['n2'][1]
            p2Zscore[i][j] = data['zscores'][chNames[j]]['p2'][1]
            n1Latency[i][j] = data['zscores'][chNames[j]]['n1'][0] + data['window'][0] * data["samplingRate"] / 1000
            n2Latency[i][j] = data['zscores'][chNames[j]]['n2'][0] + data['window'][0] * data["samplingRate"] / 1000
            p2Latency[i][j] = data['zscores'][chNames[j]]['p2'][0] + data['window'][0] * data["samplingRate"] / 1000
            flipped[i][j] = data['zscores'][chNames[j]]['flipped']

        samplingRate[i] = data["samplingRate"]
        window[i] = data['window']
        stimChs = stimChs + [paths[i].split("_")[1] + "_" + paths[i].split("_")[2]]*len(chNames)


    # creating dataframe

    df = pd.DataFrame()
    df["stimChs"] = stimChs
    df["respChs"] = chNames * len(paths)
    df["significant"] = significant.flatten()
    df["n1Zscore"] = n1Zscore.flatten()
    df["n2Zscore"] = n2Zscore.flatten()
    df["p2Zscore"] = p2Zscore.flatten()
    df["n1Latency"] = n1Latency.flatten()
    df["n2Latency"] = n2Latency.flatten()
    df["p2Latency"] = p2Latency.flatten()
    df["flipped"] = flipped.flatten()
    df["patient"] = p_name

    # Dropped rows for stimulating channels since they only
    # contain stimulating waveforms / artifacts / saturated signals
    # Also zero out rows with latency values of -999.0

    # drop rows in the dataframe with latency values of -999.0
    df = df.drop(df.loc[df["n1Latency"] == -999.0].index)
    df = df.drop(df.loc[df["n1Latency"] == -499.0].index)

    # adding dataframe outcome values (1 if in SOZ, 0 if not)

    
    # adding dataframe outcome values (1 if in SOZ, 0 if not)
    df["outcome"] = np.zeros(df.shape[0])
    df["IZ"] = np.zeros(df.shape[0])
    df["EP"] = np.zeros(df.shape[0])
    
    if engel_score[patient] == "1":
        if seizure_onset_zone[patient] != ["None"]:
            for node in seizure_onset_zone[patient]:
                for channel in df["respChs"]:
                    channel_split = channel.split("_")
                    if (node == channel_split[0]) | (node == channel_split[1]):
                        df.loc[df['respChs']==channel, ['outcome']] = 1
                    elif ("0" in channel_split[0][1:-1]) | ("0" in channel_split[1][1:-1]): # LA01 vs LA1
                        if "0" in channel_split[0][1:-1]:
                            channel_new = channel_split[0].replace("0", "")
                            if node == channel_new:
                                df.loc[df['respChs']==channel, ['outcome']] = 1
                        if "0" in channel_split[1][1:-1]:
                            channel_new = channel_split[1].replace("0", "")
                            if node == channel_new:
                                df.loc[df['respChs']==channel, ['outcome']] = 1

        if irritative_zone[patient] != ["None"]:
            for IZnode in irritative_zone[patient]:
                for channel in df["respChs"]:
                    channel_split = channel.split("_")
                    if (IZnode == channel_split[0]) | (IZnode == channel_split[1]):
                        df.loc[df['respChs']==channel, ['IZ']] = 1
        if early_propogation[patient] != ["None"]:
            for EPnode in early_propogation[patient]:
                for channel in df["respChs"]:
                    channel_split = channel.split("_")
                    if (EPnode == channel_split[0]) | (EPnode == channel_split[1]):
                        df.loc[df['respChs']==channel, ['EP']] = 1
                
    return df

# Function that takes in the location of all patient folders and engel
# score of interest, and returns a nested list of dataframes for each patient
# INPUT:
# base_path = string, file location to base folder that contains all patient folders
# engel_score = string, target engel score to get dataframe for (ex. "1")
#               can currently only handle "1" and "2"
# OUTPUT:
# positive_dataframes = a nested list, where [patient number (string), dataframe].


In [2]:
def df_processing(D):
    D.reset_index(drop = True, inplace=True)

    #Find channel names that exists both in stimChs and respChs - only account channels that have arrows going out and in
    overlap = []
    for channel in D.respChs.unique():
        if channel in D.stimChs.unique():
            overlap.append(channel)

    #Keep only the response that were stimulated in responded in the channel in overlap list
    dropindxs = []
    for i in range(len(D)):
        if D.iloc[i].stimChs not in overlap or D.iloc[i].respChs not in overlap:
                dropindxs.append(i)
    D.drop(dropindxs,inplace=True)
    D.reset_index(drop = True, inplace=True)

    D.n1Zscore = abs(D.n1Zscore)
    D.n2Zscore = abs(D.n2Zscore)
    D.p2Zscore = abs(D.p2Zscore)
    
    #start processing
    df = pd.DataFrame()
    ChNames = overlap
    Outcomes = np.array([])
    IZ = np.array([])
    EP = np.array([])
    Per_Significant_Resp = np.array([])
    Per_Significant_Stim = np.array([])
    N1_Avg_Resp = np.array([])
    N1_STV_Resp = np.array([])
    N2_Avg_Resp = np.array([])
    N2_STV_Resp = np.array([])
    P2_Avg_Resp = np.array([])
    P2_STV_Resp = np.array([])
    N1_Avg_Stim = np.array([])
    N1_STV_Stim = np.array([])
    N2_Avg_Stim = np.array([])
    N2_STV_Stim = np.array([])
    P2_Avg_Stim = np.array([])
    P2_STV_Stim = np.array([])

    for channel in ChNames:
        Resp = D[D.respChs == channel]
        Stim = D[D.stimChs == channel]

        Outcomes = np.append(Outcomes,Resp[:1].outcome)
        IZ = np.append(IZ,Resp[:1].IZ)
        EP = np.append(EP,Resp[:1].EP)

        Per_Significant_Resp = np.append(Per_Significant_Resp,
                                         sum(Resp.significant/len(Resp)))
        Per_Significant_Stim = np.append(Per_Significant_Stim,
                                         sum(Stim.significant/len(Stim)))

        N1_Avg_Resp = np.append(N1_Avg_Resp,sum(Resp.n1Zscore)/len(Resp))
        N1_STV_Resp = np.append(N1_STV_Resp,np.std(Resp.n1Zscore))

        N2_Avg_Resp = np.append(N2_Avg_Resp,sum(Resp.n2Zscore)/len(Resp))
        N2_STV_Resp = np.append(N2_STV_Resp,np.std(Resp.n2Zscore))

        P2_Avg_Resp = np.append(P2_Avg_Resp,sum(Resp.p2Zscore)/len(Resp))
        P2_STV_Resp = np.append(P2_STV_Resp,np.std(Resp.p2Zscore))

        N1_Avg_Stim = np.append(N1_Avg_Stim,sum(Stim.n1Zscore)/len(Stim))
        N1_STV_Stim = np.append(N1_STV_Stim,np.std(Stim.n1Zscore))

        N2_Avg_Stim = np.append(N2_Avg_Stim,sum(Stim.n2Zscore)/len(Stim))
        N2_STV_Stim = np.append(N2_STV_Stim,np.std(Stim.n2Zscore))

        P2_Avg_Stim = np.append(P2_Avg_Stim,sum(Stim.p2Zscore)/len(Stim))
        P2_STV_Stim = np.append(P2_STV_Stim,np.std(Stim.p2Zscore))


    df['Channels'] = ChNames
    df['outcome'] = Outcomes
    df['IZ'] = IZ
    df['EP'] = EP
    df['SigResp'] = Per_Significant_Resp
    df['SigStim'] = Per_Significant_Stim
    df['N1RespAvg'] = N1_Avg_Resp
    df['N1RespSDV'] = N1_STV_Resp
    df['N2RespAvg'] = N2_Avg_Resp
    df['N2RespSDV'] = N2_STV_Resp
    df['P2RespAvg'] = P2_Avg_Resp
    df['P2RespSDV'] = P2_STV_Resp
    df['N1StimAvg'] = N1_Avg_Stim
    df['N1StimSDV'] = N1_STV_Stim
    df['N2StimAvg'] = N2_Avg_Stim
    df['N2StimSDV'] = N2_STV_Stim
    df['P2StimAvg'] = P2_Avg_Stim
    df['P2StimSDV'] = P2_STV_Stim
    df['patient'] = D.iloc[0].patient
    
    df["InDegree"] = np.zeros(df.shape[0])
    df["OutDegree"] = np.zeros(df.shape[0])
    df["EV"] = np.zeros(df.shape[0])
    df["Closeness"] = np.zeros(df.shape[0])
    
    G = nx.DiGraph()
    for i in range(D.shape[0]):
        if D.significant.iloc[i] == 1:
            G.add_edge(D.stimChs.iloc[i],D.respChs.iloc[i])

    EV_Centrality = nx.eigenvector_centrality(G)
    Closeness_Centrality = nx.closeness_centrality(G)
    InDegree = nx.in_degree_centrality(G)
    OutDegree = nx.out_degree_centrality(G)
    
    for channel in list(EV_Centrality):
        df.loc[df.Channels == channel, 'EV'] = EV_Centrality[channel]
    for channel in list(Closeness_Centrality):
        df.loc[df.Channels == channel, 'Closeness'] = Closeness_Centrality[channel]
    for channel in list(InDegree):
        df.loc[df.Channels == channel, 'InDegree'] = InDegree[channel]
    for channel in list(OutDegree):
        df.loc[df.Channels == channel, 'OutDegree'] = OutDegree[channel]

            
    return df

In [3]:
import glob
import os
from pathlib import Path

def get_df_list(base_path, engel):
    patient_files = os.listdir(base_path)

    positive_dataframes = []
    for file in patient_files:
        if (file[0] == "P") & (file != "PY16N006"):
            response_path = base_path + file + '/ResponseInfo/CCEP'
            response_files_path = glob.glob(response_path + '/*.json', recursive=True)

            # Getting individual dataframe for positive patients
            patient = file
            if file in engel_score.keys():  # if we currently have the file's engel score
                if engel_score[patient] == engel:  # if the engel score is 1
                    df = make_df(patient, response_files_path)
                    positive_dataframes.append([patient, df])

    return positive_dataframes

# Function that combines dataframes for all patients of a particular
# engel class
# INPUT:
# base_path = string, file location to base folder that contains all patient folders
# engel_score = string, target engel score to get dataframe for (ex. "1")
#               can currently only handle "1" and "2"
# balance (OPTIONAL, default = None) = "None", "upsample", or "downsample"
#          will upsample minority class or downsample majority class to balance
#           the data
# OUTPUT:
# all_positive_patients = a concatonated dataframe of all patients


In [4]:

from sklearn.utils import resample

def concat_dfs(base_path, engel, balance = None):
    
    patient_files = os.listdir(base_path)

    full_df = pd.DataFrame()
    for file in patient_files:
        if (file[0] == "P") & (file != "PY16N006") & (file != 'PY17N014'): #PY17N014 was eliminated because there is no node with significant response
            response_path = base_path + file + '/ResponseInfo/CCEP'
            response_files_path = glob.glob(response_path + '/*.json', recursive=True)

            # Getting individual dataframe for positive patients
            patient = file
            if file in engel_score.keys():  # if we currently have the file's engel score
                if engel_score[patient] == engel:  # if the engel score is 1
                    df = make_df(patient, response_files_path)
                    df = df_processing(df)
                    full_df = pd.concat([full_df, df])
                    
                    print('%s done...'%patient)

    # seperate dataframes for class
    df_majority = full_df[full_df.outcome == 0]
    df_minority = full_df[full_df.outcome == 1]

    # upsample data if balance parameter is set to "Upsample" or "upsample"
    if (balance == "upsample") | (balance == "Upsample"):
        # Upsample minority class
        df_minority_upsampled = resample(df_minority,
                                         replace=True,  # sample with replacement
                                         n_samples=full_df["outcome"].value_counts()[0.0],
                                         # to match majority class
                                         random_state=123)  # reproducible results


        # combine dataframes
        full_df = pd.concat([df_majority, df_minority_upsampled])

    # downsample data if balance parameter is set to "downsample" or "Downsample"
    elif (balance == "downsample") | (balance == "Downsample"):
        # downsample majority class
        # downsample majority class
        df_majority_downsampled = resample(df_majority,
                                           replace=False,  # sample without replacement
                                           n_samples= full_df["outcome"].value_counts()[1.0],
                                           # to match minority class
                                           random_state=123)  # reproducible results


        full_df = pd.concat([df_majority_downsampled, df_minority])

    return full_df


# Function that upsamples or downsamples a training set to balance classes
# INPUT:
# X_train = output from train_test_split function
# y_train = output from train_test_split function
# balance = "upsample", or "downsample"
#          will upsample minority class or downsample majority class to balance
#           the data
# OUTPUT:
# X_train = new balanced X training data
# y_train = new balanced y training data

In [5]:
from sklearn.utils import resample
# from sklearn.model_selection import train_test_split

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

def class_balance(X_train, y_train, balance):
    full_df = pd.concat([X_train, y_train], axis = 1)

    # seperate dataframes for class
    df_majority = full_df[full_df.outcome == 0]
    df_minority = full_df[full_df.outcome == 1]

    # upsample data if balance parameter is set to "Upsample" or "upsample"
    if (balance == "upsample") | (balance == "Upsample"):
        # Upsample minority class
        df_minority_upsampled = resample(df_minority,
                                        replace=True,  # sample with replacement
                                        n_samples=full_df["outcome"].value_counts()[0.0],
                                        # to match majority class
                                        random_state=123)  # reproducible results


        # combine dataframes
        full_df = pd.concat([df_majority, df_minority_upsampled])

    # downsample data if balance parameter is set to "downsample" or "Downsample"
    elif (balance == "downsample") | (balance == "Downsample"):
        # downsample majority class
        # downsample majority class
        df_majority_downsampled = resample(df_majority,
                                        replace=False,  # sample without replacement
                                        n_samples= full_df["outcome"].value_counts()[1.0],
                                        # to match minority class
                                        random_state=123)  # reproducible results


        full_df = pd.concat([df_majority_downsampled, df_minority])

    X_train = full_df.drop(columns = ["outcome"])
    y_train = full_df["outcome"]
    
    return X_train, y_train


In [6]:
base_path = '/Users/richardlee/Desktop/JHU/2021 Fall/Precision Care Medicine/Preprocessed_Data/'



#Function to get the concatenated dataframe for all positive patients
## balance parameter can be changed to "None", "upsample", or "downsample"
all_positive_patients = concat_dfs(base_path, "1")

PY21N008 done...
PY21N006 done...
PY20N001 done...
PY17N020 done...
PY19N009 done...
PY19N012 done...
PY19N023 done...
PY18N015 done...
PY18N013 done...
PY17N005 done...
PY20N012 done...
PY21N002 done...
PY21N004 done...
PY18N003 done...
PY16N013 done...
PY18N002 done...
PY18N016 done...
PY17N008 done...
PY19N026 done...
PY16N008 done...


In [7]:
all_positive_patients.reset_index(drop = True, inplace=True)
all_positive_patients

Unnamed: 0,Channels,outcome,IZ,EP,SigResp,SigStim,N1RespAvg,N1RespSDV,N2RespAvg,N2RespSDV,...,N1StimSDV,N2StimAvg,N2StimSDV,P2StimAvg,P2StimSDV,patient,InDegree,OutDegree,EV,Closeness
0,LA9_LA10,0.0,1.0,0.0,0.000000,0.250000,1.530459,1.134169,2.040546,1.369091,...,10.790559,6.536859,6.383906,5.785279,7.331337,PY21N008,0.000000,0.240000,5.165334e-14,0.000000
1,LAH1_LAH2,1.0,1.0,0.0,0.076923,0.250000,7.062089,20.678040,3.320937,2.795194,...,16.334814,4.921208,3.029861,2.352152,1.803711,PY21N008,0.080000,0.240000,7.960607e-04,0.177778
2,LAH8_LAH9,0.0,0.0,0.0,0.307692,0.125000,4.032683,2.995624,3.450575,2.883998,...,2.164878,2.190308,1.489418,2.033410,2.112725,PY21N008,0.320000,0.120000,1.590588e-01,0.484848
3,LPH1_LPH2,0.0,1.0,0.0,0.115385,0.333333,6.522655,17.219698,3.369771,3.506524,...,8.264811,4.630948,3.574869,4.187831,3.626223,PY21N008,0.120000,0.320000,4.743270e-03,0.222222
4,LPH7_LPH8,0.0,0.0,0.0,0.307692,0.083333,7.304854,10.728727,4.610356,4.159990,...,2.925869,1.606341,1.075715,1.461462,1.012732,PY21N008,0.320000,0.080000,1.857524e-01,0.484848
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
362,RMBT2_RMBT3,0.0,1.0,0.0,0.250000,0.375000,10.029460,19.122649,6.391055,5.733855,...,19.912187,6.165966,5.398174,7.684954,9.885282,PY16N008,0.333333,0.500000,3.424386e-01,0.444444
363,RPTI19_RPTI20,0.0,0.0,0.0,0.250000,0.250000,13.323164,20.134989,4.393888,4.366079,...,21.710392,6.080624,5.949018,6.706642,9.411200,PY16N008,0.333333,0.333333,3.424386e-01,0.380952
364,RPTS60_RPTS61,0.0,1.0,0.0,0.125000,0.250000,9.883556,20.481622,4.603904,6.318618,...,15.484303,6.323287,5.584253,7.569330,11.384782,PY16N008,0.166667,0.333333,1.900403e-01,0.266667
365,RPFS4_RPFS5,0.0,0.0,0.0,0.000000,0.000000,2.462636,1.320800,3.635232,2.139720,...,1.392987,3.160595,1.173929,3.855516,3.516656,PY16N008,0.000000,0.000000,0.000000e+00,0.000000


In [8]:
all_positive_patients.to_csv('newDF.csv')

In [9]:
A = all_positive_patients
A = A.Channels.unique()
B = []
for channels in A:
    electrodes = channels.split('_')
    if electrodes[0][-1].isdigit():
        electrodes[0] = electrodes[0][0:-1]
    if electrodes[0][-1].isdigit():
        electrodes[0] = electrodes[0][0:-1]
    B.append(electrodes[0])    
    
    if electrodes[1][-1].isdigit():
        electrodes[1] = electrodes[1][0:-1]
    if electrodes[1][-1].isdigit():
        electrodes[1] = electrodes[1][0:-1]
    B.append(electrodes[1]) 
B = pd.Series(B)
B.unique()

array(['LA', 'LAH', 'LPH', 'LTP', 'LBT', 'LFOA', 'LFOP', 'LIN', 'LOF',
       'RA', 'RAH', 'LSMA', 'LM', 'LS', 'LACD', 'LPCD', 'LFO', 'LPO',
       'LAST', 'LPST', 'LF', 'LAC', 'LPC', 'LIH', 'LFG', 'RPH', 'RBT',
       'LCN', 'LTH', 'LNA', 'LND', 'LNE', 'RMS', 'RMM', 'RMI', 'RLS',
       'LH', 'ROF', 'RF', 'RH', 'LHA', 'LHP', 'LBTA', 'LBTP', 'LCNA',
       'LCNM', 'LCNP', 'LCNS', 'LINS', 'LBRO', 'LTG0', 'LTG1', 'LIF',
       'LAM', 'LMH', 'LMSTG', 'LPSTG', 'LPMTG', 'LTPO', 'ALS', 'BLS',
       'CLS', 'DLS', 'ELS', 'FLS', 'LSM', 'ALL', 'BLL', 'CLL', 'DLL',
       'ELL', 'FLL', 'GLL', 'BTA', 'BTMA', 'BTMP', 'BTP', 'LFT', 'RAD',
       'RHAD', 'LAD', 'RFTG', 'LHAD', 'LFP', 'MIH', 'RMFD', 'ROFD', 'RHD',
       'RACD', 'RAID', 'LMFD', 'LOFD', 'LHD', 'LSF', 'LSPF', 'LIPF',
       'LAN', 'LMN', 'LPN', 'RPCS', 'RFG', 'RAM', 'RPHG', 'RFUS', 'RPIR',
       'RATS', 'RATI', 'RABT', 'RMBT', 'RPTI', 'RPTS', 'RPFS'],
      dtype=object)

In [10]:
A

array(['LA9_LA10', 'LAH1_LAH2', 'LAH8_LAH9', 'LPH1_LPH2', 'LPH7_LPH8',
       'LTP1_LTP2', 'LBT3_LBT4', 'LBT6_LBT7', 'LFOA3_LFOA4',
       'LFOP1_LFOP2', 'LFOP3_LFOP4', 'LFOP5_LFOP6', 'LIN1_LIN2',
       'LIN3_LIN4', 'LIN8_LIN9', 'LIN10_LIN11', 'LOF1_LOF2', 'LOF3_LOF4',
       'LOF12_LOF13', 'RA1_RA2', 'RA9_RA10', 'RAH1_RAH2', 'RAH3_RAH4',
       'RAH9_RAH10', 'LFOA1_LFOA2', 'LTP6_LTP7', 'LBT1_LBT2',
       'LSMA3_LSMA4', 'LSMA5_LSMA6', 'LM3_LM4', 'LS2_LS3', 'LS5_LS6',
       'LS7_LS8', 'LACD7_LACD8', 'LPCD1_LPCD2', 'LPCD3_LPCD4',
       'LPCD5_LPCD6', 'LPCD7_LPCD8', 'LFO1_LFO2', 'LFO3_LFO4',
       'LFO5_LFO6', 'LFO7_LFO8', 'LPO1_LPO2', 'LPO3_LPO4', 'LPO7_LPO8',
       'LPO5_LPO6', 'LA1_LA2', 'LA5_LA6', 'LAH7_LAH8', 'LPH9_LPH10',
       'LBT2_LBT3', 'LAST4_LAST5', 'LAST6_LAST7', 'LPST2_LPST3',
       'LPST7_LPST8', 'LF6_LF7', 'LF2_LF3', 'LF3_LF4', 'LAC1_LAC2',
       'LAC7_LAC8', 'LPC1_LPC2', 'LPC7_LPC8', 'LIH17_LIH18',
       'LIH19_LIH20', 'LIH25_LIH26', 'LIH27_LIH28', 'LFG01_LFG02'

In [11]:
file = 'PY21N008'
patient = file
response_path = base_path + file + '/ResponseInfo/CCEP'
response_files_path = glob.glob(response_path + '/*.json', recursive=True)
df = make_df(patient, response_files_path)
df

Unnamed: 0,stimChs,respChs,significant,n1Zscore,n2Zscore,p2Zscore,n1Latency,n2Latency,p2Latency,flipped,patient,outcome,IZ,EP
2,LFOA1_LFOA2,LA3_LA4,0.0,1.396644,1.062212,-5.025081,13.0,180.0,43.0,1.0,PY21N008,1.0,0.0,0.0
6,LFOA1_LFOA2,LA7_LA8,0.0,1.371992,1.735958,-0.305671,17.0,131.0,47.0,1.0,PY21N008,0.0,1.0,0.0
7,LFOA1_LFOA2,LA8_LA9,0.0,1.990935,0.596185,0.596185,19.0,101.0,101.0,1.0,PY21N008,0.0,1.0,0.0
8,LFOA1_LFOA2,LA9_LA10,0.0,-2.361205,-1.335067,-0.176026,11.0,101.0,43.0,0.0,PY21N008,0.0,1.0,0.0
9,LFOA1_LFOA2,LAH1_LAH2,0.0,0.901176,1.048693,-1.308852,11.0,321.0,98.0,1.0,PY21N008,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2816,LFOP3_LFOP4,RAH5_RAH6,0.0,-0.006959,-3.854115,-0.006959,51.0,232.0,51.0,0.0,PY21N008,0.0,0.0,0.0
2817,LFOP3_LFOP4,RAH6_RAH7,0.0,-3.899049,0.688887,0.688887,11.0,101.0,101.0,0.0,PY21N008,0.0,0.0,0.0
2818,LFOP3_LFOP4,RAH7_RAH8,0.0,2.489016,1.919793,0.472198,13.0,107.0,61.0,1.0,PY21N008,0.0,0.0,0.0
2819,LFOP3_LFOP4,RAH8_RAH9,0.0,2.600048,3.240657,1.017700,11.0,113.0,29.0,1.0,PY21N008,0.0,0.0,0.0


In [12]:
all_positive_patients[all_positive_patients.EV==0]

Unnamed: 0,Channels,outcome,IZ,EP,SigResp,SigStim,N1RespAvg,N1RespSDV,N2RespAvg,N2RespSDV,...,N1StimSDV,N2StimAvg,N2StimSDV,P2StimAvg,P2StimSDV,patient,InDegree,OutDegree,EV,Closeness
25,LTP6_LTP7,0.0,1.0,0.0,0.0,0.0,0.888077,0.278715,1.394485,1.23073,...,1.291718,2.608184,1.685137,1.092237,1.032098,PY21N008,0.0,0.0,0.0,0.0
59,LF2_LF3,0.0,0.0,0.0,0.0,0.0,1.961148,1.164868,3.577618,3.159791,...,1.121748,3.97743,2.539415,1.085727,1.174633,PY20N001,0.0,0.0,0.0,0.0
104,LCN01_LCN02,0.0,0.0,0.0,0.0,0.0,2.674486,1.075717,3.600868,2.574516,...,0.771626,2.871678,1.178051,1.579767,1.566076,PY19N012,0.0,0.0,0.0,0.0
164,LTP01_LTP02,0.0,0.0,0.0,0.0,0.0,1.037194,0.426369,0.88727,0.720842,...,0.689223,1.378057,1.206367,0.460449,0.398221,PY18N013,0.0,0.0,0.0,0.0
170,LCNP10_LCNP11,0.0,0.0,0.0,0.0,0.0,1.040527,0.534664,1.549552,0.804709,...,0.866141,1.330598,0.781508,1.013123,1.414726,PY18N013,0.0,0.0,0.0,0.0
172,LCNS10_LCNS11,0.0,0.0,0.0,0.0,0.0,0.652892,0.29339,0.810416,0.498975,...,0.881455,1.533635,0.89097,1.089689,0.681792,PY18N013,0.0,0.0,0.0,0.0
173,LINS02_LINS03,0.0,0.0,0.0,0.0,0.0,0.638349,0.354062,1.092481,0.579339,...,0.813324,1.565022,1.085878,1.363394,0.976015,PY18N013,0.0,0.0,0.0,0.0
174,LINS03_LINS04,0.0,0.0,0.0,0.0,0.0,1.009795,0.792099,1.358499,0.595119,...,0.813891,1.583175,0.924908,0.854671,0.752825,PY18N013,0.0,0.0,0.0,0.0
175,LBRO01_LBRO02,0.0,0.0,0.0,0.0,0.0,2.099678,0.979417,2.235458,1.339097,...,1.163515,1.395794,0.979575,0.98979,0.991172,PY18N013,0.0,0.0,0.0,0.0
200,LTPO7_LTPO8,0.0,0.0,0.0,0.0,0.0,2.859214,1.305883,4.015673,1.579696,...,0.932902,2.723967,1.145802,1.295483,1.001539,PY20N012,0.0,0.0,0.0,0.0


In [13]:
all_positive_patients[all_positive_patients.outcome == 1]

Unnamed: 0,Channels,outcome,IZ,EP,SigResp,SigStim,N1RespAvg,N1RespSDV,N2RespAvg,N2RespSDV,...,N1StimSDV,N2StimAvg,N2StimSDV,P2StimAvg,P2StimSDV,patient,InDegree,OutDegree,EV,Closeness
1,LAH1_LAH2,1.0,1.0,0.0,0.076923,0.250000,7.062089,20.678040,3.320937,2.795194,...,16.334814,4.921208,3.029861,2.352152,1.803711,PY21N008,0.080000,0.240000,0.000796,0.177778
6,LBT3_LBT4,1.0,1.0,0.0,0.153846,0.125000,5.132375,7.537370,3.197186,1.828693,...,2.064447,3.795401,2.127371,1.968718,1.614045,PY21N008,0.160000,0.120000,0.027467,0.290909
19,RA1_RA2,1.0,0.0,0.0,0.038462,0.125000,2.926803,5.725073,3.249613,4.720740,...,4.018384,3.979747,3.221402,2.328967,1.545238,PY21N008,0.040000,0.120000,0.006616,0.261235
21,RAH1_RAH2,1.0,1.0,0.0,0.115385,0.153846,2.814085,2.859219,2.562153,1.546758,...,12.435196,3.704452,4.763608,3.354584,6.964322,PY21N008,0.120000,0.160000,0.039420,0.358644
22,RAH3_RAH4,1.0,1.0,0.0,0.115385,0.038462,5.343691,11.494064,2.805384,2.340846,...,1.253769,2.453778,1.238402,1.770603,1.498068,PY21N008,0.120000,0.040000,0.008522,0.298028
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
344,RPHG03_RPHG04,1.0,0.0,0.0,0.318182,0.227273,11.338725,20.462041,7.739418,5.946789,...,6.235655,4.333815,4.398078,3.436383,3.447158,PY19N026,0.318182,0.227273,0.241337,0.393357
357,RPH01_RPH02,1.0,0.0,0.0,0.409091,0.181818,10.329335,12.748902,9.256550,10.285833,...,51.153440,16.099137,28.938394,9.949782,24.290979,PY19N026,0.409091,0.181818,0.350562,0.464876
358,RATS3_RATS4,1.0,0.0,0.0,0.000000,0.000000,2.412705,1.515111,1.605639,0.973591,...,2.212669,2.477602,1.713425,1.991577,1.451194,PY16N008,0.000000,0.000000,0.000000,0.000000
359,RATI7_RATI8,1.0,0.0,0.0,0.250000,0.000000,11.834686,11.798107,5.556760,3.216603,...,6.596086,2.312074,1.504677,1.561066,1.646989,PY16N008,0.333333,0.000000,0.427012,0.462963


In [14]:
all_positive_patients[all_positive_patients.patient=='PY19N026']

Unnamed: 0,Channels,outcome,IZ,EP,SigResp,SigStim,N1RespAvg,N1RespSDV,N2RespAvg,N2RespSDV,...,N1StimSDV,N2StimAvg,N2StimSDV,P2StimAvg,P2StimSDV,patient,InDegree,OutDegree,EV,Closeness
335,RAM01_RAM02,1.0,0.0,0.0,0.272727,0.318182,11.731063,21.972386,9.058207,11.231795,...,17.458548,9.835898,8.638836,8.862182,12.99252,PY19N026,0.272727,0.318182,0.233907,0.378788
336,RAM03_RAM04,1.0,0.0,0.0,0.272727,0.5,14.182167,21.860274,7.592897,8.895566,...,43.436026,12.406533,9.717822,13.925599,16.142066,PY19N026,0.272727,0.5,0.24521,0.36526
337,RAM07_RAM08,0.0,0.0,0.0,0.272727,0.454545,9.923288,15.381993,7.156979,9.031724,...,9.964081,6.446238,6.621941,5.315334,6.265989,PY19N026,0.272727,0.454545,0.236998,0.378788
338,RAH01_RAH02,1.0,0.0,0.0,0.227273,0.363636,8.037297,15.218913,5.371633,3.988687,...,22.186777,8.797641,9.537099,7.311839,9.848104,PY19N026,0.227273,0.363636,0.20862,0.352665
339,RAH03_RAH04,1.0,0.0,0.0,0.409091,0.727273,13.230125,21.045823,6.644754,6.193942,...,17.782713,20.98852,19.016721,8.462772,5.675332,PY19N026,0.409091,0.727273,0.299352,0.464876
340,RAH06_RAH07,0.0,0.0,0.0,0.272727,0.090909,7.703199,13.408502,9.657561,16.345784,...,2.497153,3.76636,3.606663,2.158614,2.364522,PY19N026,0.272727,0.090909,0.192344,0.409091
341,RPH03_RPH04,1.0,0.0,0.0,0.318182,0.181818,17.51644,44.930806,13.804415,24.616031,...,5.099482,4.144489,2.81517,2.085836,1.449783,PY19N026,0.318182,0.181818,0.232372,0.409091
342,RPH06_RPH07,0.0,0.0,0.0,0.045455,0.272727,4.725262,11.404377,6.280559,12.513603,...,5.85555,13.009683,21.206857,4.82281,6.309606,PY19N026,0.045455,0.272727,0.047162,0.284091
343,RPHG01_RPHG02,1.0,0.0,0.0,0.363636,0.045455,10.603578,15.249879,9.654879,18.730838,...,11.534834,10.74175,10.651456,11.281719,16.435693,PY19N026,0.363636,0.045455,0.331068,0.444664
344,RPHG03_RPHG04,1.0,0.0,0.0,0.318182,0.227273,11.338725,20.462041,7.739418,5.946789,...,6.235655,4.333815,4.398078,3.436383,3.447158,PY19N026,0.318182,0.227273,0.241337,0.393357


In [15]:
all_positive_patients.patient.unique()

array(['PY21N008', 'PY21N006', 'PY20N001', 'PY17N020', 'PY19N009',
       'PY19N012', 'PY19N023', 'PY18N015', 'PY18N013', 'PY17N005',
       'PY20N012', 'PY21N002', 'PY21N004', 'PY18N003', 'PY16N013',
       'PY18N002', 'PY18N016', 'PY17N008', 'PY19N026', 'PY16N008'],
      dtype=object)

In [16]:
plot_roc = False
plot_pr = False

max_depth = 10
rf, tpr, fpr, precision, recall = random_forest(all_positive_patients, max_depth, plot_roc, plot_pr)
plt.plot(fpr, tpr, label = "Random Forest, AUC = %0.2f" % auc(fpr, tpr))
plt.legend(loc='lower right')

TypeError: argument of type 'int' is not iterable