In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import json
import glob
import os

from epilepsypcm.utils.outcome_params import seizure_onset_zone, engel_score, early_propogation, irritative_zone

# INPUT
# patient = string format, patient number
# paths = path to CCEP response files, in os format
# OUTPUT
# df = dataframe for one patient with
#       X features with columns: chNames, significant, n1, n2, p2 z scores,
#       n1, n2, p2 latencies, and flipped
#       and associated y outcome labels
def make_df(patient, paths):
    #extracting info from each response file
    n = 0
    stimChs = []
    for i in range(len(paths)):
        chNames = []
        # load info into python dictionary
        data = json.load(open(paths[i]))
        p_name = patient

        # Get list of channel names
        for key in data["time"]: chNames.append(key)
        # loop over each channel, and extract average time series and information about the peaks

        if n < 1:
            avgResp = np.empty((len(paths), len(chNames), len(data['time'][chNames[0]])))
            significant = np.empty((len(paths), len(chNames)))
            n1Zscore = np.empty((len(paths), len(chNames)))
            n2Zscore = np.empty((len(paths), len(chNames)))
            p2Zscore = np.empty((len(paths), len(chNames)))
            n1Latency = np.empty((len(paths), len(chNames)))
            n2Latency = np.empty((len(paths), len(chNames)))
            p2Latency = np.empty((len(paths), len(chNames)))
            flipped = np.empty((len(paths), len(chNames)))
            n += 1
            samplingRate = np.empty((len(paths)))
            window = np.empty((len(paths), 2))

        for j in range(len(chNames)):
            avgResp[i][j] = data['time'][chNames[j]]
            significant[i][j] = data['significant'][chNames[j]]
            n1Zscore[i][j] = data['zscores'][chNames[j]]['n1'][1]
            n2Zscore[i][j] = data['zscores'][chNames[j]]['n2'][1]
            p2Zscore[i][j] = data['zscores'][chNames[j]]['p2'][1]
            n1Latency[i][j] = data['zscores'][chNames[j]]['n1'][0] + data['window'][0] * data["samplingRate"] / 1000
            n2Latency[i][j] = data['zscores'][chNames[j]]['n2'][0] + data['window'][0] * data["samplingRate"] / 1000
            p2Latency[i][j] = data['zscores'][chNames[j]]['p2'][0] + data['window'][0] * data["samplingRate"] / 1000
            flipped[i][j] = data['zscores'][chNames[j]]['flipped']

        samplingRate[i] = data["samplingRate"]
        window[i] = data['window']
        stimChs = stimChs + [paths[i].split("_")[1] + "_" + paths[i].split("_")[2]]*len(chNames)


    # creating dataframe

    df = pd.DataFrame()
    df["stimChs"] = stimChs
    df["respChs"] = chNames * len(paths)
    df["significant"] = significant.flatten()
    df["n1Zscore"] = n1Zscore.flatten()
    df["n2Zscore"] = n2Zscore.flatten()
    df["p2Zscore"] = p2Zscore.flatten()
    df["n1Latency"] = n1Latency.flatten()
    df["n2Latency"] = n2Latency.flatten()
    df["p2Latency"] = p2Latency.flatten()
    df["flipped"] = flipped.flatten()
    df["patient"] = p_name

    # Dropped rows for stimulating channels since they only
    # contain stimulating waveforms / artifacts / saturated signals
    # Also zero out rows with latency values of -999.0

    # drop rows in the dataframe with latency values of -999.0
    df = df.drop(df.loc[df["n1Latency"] == -999.0].index)
    df = df.drop(df.loc[df["n1Latency"] == -499.0].index)

    return df

In [2]:
def df_processing(D):
    D.reset_index(drop = True, inplace=True)

    #Find channel names that exists both in stimChs and respChs - only account channels that have arrows going out and in
    overlap = []
    for channel in D.respChs.unique():
        if channel in D.stimChs.unique():
            overlap.append(channel)

    #Keep only the response that were stimulated in responded in the channel in overlap list
    dropindxs = []
    for i in range(len(D)):
        if D.iloc[i].stimChs not in overlap or D.iloc[i].respChs not in overlap:
                dropindxs.append(i)
    D.drop(dropindxs,inplace=True)
    D.reset_index(drop = True, inplace=True)

    D.n1Zscore = abs(D.n1Zscore)
    D.n2Zscore = abs(D.n2Zscore)
    D.p2Zscore = abs(D.p2Zscore)
    
    #start processing
    df = pd.DataFrame()
    ChNames = overlap
    Outcomes = np.array([])
    IZ = np.array([])
    EP = np.array([])
    Per_Significant_Resp = np.array([])
    Per_Significant_Stim = np.array([])
    N1_Avg_Resp = np.array([])
    N1_STV_Resp = np.array([])
    N2_Avg_Resp = np.array([])
    N2_STV_Resp = np.array([])
    P2_Avg_Resp = np.array([])
    P2_STV_Resp = np.array([])
    N1_Avg_Stim = np.array([])
    N1_STV_Stim = np.array([])
    N2_Avg_Stim = np.array([])
    N2_STV_Stim = np.array([])
    P2_Avg_Stim = np.array([])
    P2_STV_Stim = np.array([])

    for channel in ChNames:
        Resp = D[D.respChs == channel]
        Stim = D[D.stimChs == channel]

        Per_Significant_Resp = np.append(Per_Significant_Resp,
                                         sum(Resp.significant/len(Resp)))
        Per_Significant_Stim = np.append(Per_Significant_Stim,
                                         sum(Stim.significant/len(Stim)))

        N1_Avg_Resp = np.append(N1_Avg_Resp,sum(Resp.n1Zscore)/len(Resp))
        N1_STV_Resp = np.append(N1_STV_Resp,np.std(Resp.n1Zscore))

        N2_Avg_Resp = np.append(N2_Avg_Resp,sum(Resp.n2Zscore)/len(Resp))
        N2_STV_Resp = np.append(N2_STV_Resp,np.std(Resp.n2Zscore))

        P2_Avg_Resp = np.append(P2_Avg_Resp,sum(Resp.p2Zscore)/len(Resp))
        P2_STV_Resp = np.append(P2_STV_Resp,np.std(Resp.p2Zscore))

        N1_Avg_Stim = np.append(N1_Avg_Stim,sum(Stim.n1Zscore)/len(Stim))
        N1_STV_Stim = np.append(N1_STV_Stim,np.std(Stim.n1Zscore))

        N2_Avg_Stim = np.append(N2_Avg_Stim,sum(Stim.n2Zscore)/len(Stim))
        N2_STV_Stim = np.append(N2_STV_Stim,np.std(Stim.n2Zscore))

        P2_Avg_Stim = np.append(P2_Avg_Stim,sum(Stim.p2Zscore)/len(Stim))
        P2_STV_Stim = np.append(P2_STV_Stim,np.std(Stim.p2Zscore))


    df['Channels'] = ChNames
    df['SigResp'] = Per_Significant_Resp
    df['SigStim'] = Per_Significant_Stim
    df['N1RespAvg'] = N1_Avg_Resp
    df['N1RespSDV'] = N1_STV_Resp
    df['N2RespAvg'] = N2_Avg_Resp
    df['N2RespSDV'] = N2_STV_Resp
    df['P2RespAvg'] = P2_Avg_Resp
    df['P2RespSDV'] = P2_STV_Resp
    df['N1StimAvg'] = N1_Avg_Stim
    df['N1StimSDV'] = N1_STV_Stim
    df['N2StimAvg'] = N2_Avg_Stim
    df['N2StimSDV'] = N2_STV_Stim
    df['P2StimAvg'] = P2_Avg_Stim
    df['P2StimSDV'] = P2_STV_Stim
    df['patient'] = D.iloc[0].patient
    
    df["InDegree"] = np.zeros(df.shape[0])
    df["OutDegree"] = np.zeros(df.shape[0])
    df["EV"] = np.zeros(df.shape[0])
    df["Closeness"] = np.zeros(df.shape[0])
    
    G = nx.DiGraph()
    for i in range(D.shape[0]):
        if D.significant.iloc[i] == 1:
            G.add_edge(D.stimChs.iloc[i],D.respChs.iloc[i])

    EV_Centrality = nx.eigenvector_centrality(G)
    Closeness_Centrality = nx.closeness_centrality(G)
    InDegree = nx.in_degree_centrality(G)
    OutDegree = nx.out_degree_centrality(G)
    
    for channel in list(EV_Centrality):
        df.loc[df.Channels == channel, 'EV'] = EV_Centrality[channel]
    for channel in list(Closeness_Centrality):
        df.loc[df.Channels == channel, 'Closeness'] = Closeness_Centrality[channel]
    for channel in list(InDegree):
        df.loc[df.Channels == channel, 'InDegree'] = InDegree[channel]
    for channel in list(OutDegree):
        df.loc[df.Channels == channel, 'OutDegree'] = OutDegree[channel]

            
    return df

In [3]:
def concat_dfs(base_path, balance = None):
    
    patient_files = os.listdir(base_path)

    full_df = pd.DataFrame()
    for file in patient_files:
        if (file[0] == "P") & (file != "PY16N011") & (file != 'PY18N011') & (file != 'PY19N011') & (file != 'PY21N020'): #PY17N014 was eliminated because there is no node with significant response
            response_path = base_path + file + '/ResponseInfo/CCEP'
            response_files_path = glob.glob(response_path + '/*.json', recursive=True)

            # Getting individual dataframe for positive patients
            patient = file
            if file not in engel_score.keys():  # if we currently have the file's engel score
                df = make_df(patient, response_files_path)
                df = df_processing(df)
                full_df = pd.concat([full_df, df])
                    
                print('%s done...'%patient)
    return full_df

In [4]:
base_path = '/Users/richardlee/Desktop/JHU/2021 Fall/Precision Care Medicine/Coding/'
file = 'PY21N008'
patient = file
response_path = base_path + file + '/ResponseInfo/CCEP'
response_files_path = glob.glob(response_path + '/*.json', recursive=True)
df = make_df(patient, response_files_path)
df

Unnamed: 0,stimChs,respChs,significant,n1Zscore,n2Zscore,p2Zscore,n1Latency,n2Latency,p2Latency,flipped,patient
2,LFOA1_LFOA2,LA3_LA4,0.0,1.396644,1.062212,-5.025081,13.0,180.0,43.0,1.0,PY21N008
6,LFOA1_LFOA2,LA7_LA8,0.0,1.371992,1.735958,-0.305671,17.0,131.0,47.0,1.0,PY21N008
7,LFOA1_LFOA2,LA8_LA9,0.0,1.990935,0.596185,0.596185,19.0,101.0,101.0,1.0,PY21N008
8,LFOA1_LFOA2,LA9_LA10,0.0,-2.361205,-1.335067,-0.176026,11.0,101.0,43.0,0.0,PY21N008
9,LFOA1_LFOA2,LAH1_LAH2,0.0,0.901176,1.048693,-1.308852,11.0,321.0,98.0,1.0,PY21N008
...,...,...,...,...,...,...,...,...,...,...,...
2816,LFOP3_LFOP4,RAH5_RAH6,0.0,-0.006959,-3.854115,-0.006959,51.0,232.0,51.0,0.0,PY21N008
2817,LFOP3_LFOP4,RAH6_RAH7,0.0,-3.899049,0.688887,0.688887,11.0,101.0,101.0,0.0,PY21N008
2818,LFOP3_LFOP4,RAH7_RAH8,0.0,2.489016,1.919793,0.472198,13.0,107.0,61.0,1.0,PY21N008
2819,LFOP3_LFOP4,RAH8_RAH9,0.0,2.600048,3.240657,1.017700,11.0,113.0,29.0,1.0,PY21N008


In [5]:
failed_patients = concat_dfs(base_path)

PY21N007 done...
PY18N007 done...
PY17N018 done...
PY19N015 done...
PY19N024 done...
PY21N014 done...
PY21N022 done...
PY21N012 done...
PY20N002 done...
PY20N005 done...
PY20N003 done...
PY19N005 done...
PY17N013 done...
PY19N018 done...
PY19N020 done...
PY19N017 done...
PY21N010 done...
PY20N016 done...
PY20N011 done...


In [6]:
failed_patients.reset_index(drop = True, inplace=True)
failed_patients.to_csv('unsuccessful_DF.csv')
failed_patients

Unnamed: 0,Channels,SigResp,SigStim,N1RespAvg,N1RespSDV,N2RespAvg,N2RespSDV,P2RespAvg,P2RespSDV,N1StimAvg,N1StimSDV,N2StimAvg,N2StimSDV,P2StimAvg,P2StimSDV,patient,InDegree,OutDegree,EV,Closeness
0,RA7_RA8,0.470588,0.235294,11.275250,13.430507,12.300852,17.797695,10.693199,13.013718,5.385827,5.037416,8.653968,8.250072,6.505716,4.873793,PY21N007,0.444444,0.222222,1.839785e-01,0.600000
1,RA8_RA9,0.411765,0.294118,11.349622,14.409670,15.607462,22.626955,9.325437,7.232690,5.110791,4.167502,5.253412,4.484553,5.456525,4.048721,PY21N007,0.388889,0.277778,1.687278e-01,0.580645
2,RH1_RH2,0.166667,0.277778,18.722890,47.849258,9.351465,17.359378,7.040116,16.182438,20.063484,41.903112,7.045011,12.544347,9.430739,13.200236,PY21N007,0.166667,0.277778,1.150292e-01,0.514286
3,RH5_RH6,0.777778,0.500000,17.155535,12.812753,6.383174,4.995149,11.223168,7.985343,13.980816,17.902728,18.496704,23.920131,9.152502,11.720191,PY21N007,0.777778,0.500000,3.212952e-01,0.818182
4,RHG1_RHG2,0.166667,0.666667,6.654436,13.721655,2.825886,2.155459,3.864567,6.131093,15.076088,10.356663,4.687854,4.377788,9.564763,8.984542,PY21N007,0.166667,0.666667,7.137374e-07,0.166667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
365,RPCA9_RPCA10,0.304348,0.260870,5.564085,6.322075,3.884738,6.009140,5.052858,6.717041,6.296305,7.014255,6.752861,6.811072,6.814861,6.841007,PY20N011,0.304348,0.260870,1.695428e-01,0.589744
366,RPCP1_RPCP2,0.652174,0.739130,19.106785,24.432416,12.843545,14.558156,12.201233,13.273406,38.673026,57.959034,40.259250,56.484838,13.235196,16.832613,PY20N011,0.652174,0.739130,2.885447e-01,0.741935
367,RPCP3_RPCP4,0.869565,0.652174,29.271206,48.440441,18.350693,29.405058,11.247665,9.772951,17.600152,23.838800,16.220500,20.864380,7.335227,7.797317,PY20N011,0.869565,0.652174,3.468352e-01,0.884615
368,RPCP9_RPCP10,0.434783,0.304348,7.571620,8.570856,7.887538,11.177546,9.338446,9.249646,5.876815,4.813578,7.360311,11.602854,3.591157,3.805442,PY20N011,0.434783,0.304348,2.229955e-01,0.638889
