In [1]:
import matplotlib.pyplot as plt
plt.rc('font', size=12)
import pandas as pd
import seaborn as sns
import warnings 
import pydot
from IPython.display import Image, display
import io
from PIL import Image
# import Coverage as cov
# import RuleQuality as RQ

warnings.filterwarnings('ignore')
%matplotlib inline

%load_ext autoreload
%autoreload 2


In [None]:
def showGraph(dotGraph, title=None):
    plt.figure(figsize=[40,25])
#     plt.figure()
    img = Image.open(io.BytesIO(dotGraph.create_png()))  # .show()
    plt.imshow(img)  # to show in pycharm sciview
    if title != None:
        plt.title(title)
#     plt.show()

def saveGraph(dotgraph, graphName):
    dotGraph.write(graphName, format='png') #to save to file

def addBranch(dotGraph, nodeNames, parentName, rev=False):
    brID = "[" + ' '.join(map(str, nodeNames)) + "]"
    
    clusterBranch = pydot.Cluster(brID, label=brID)
    dotGraph.add_subgraph(clusterBranch)
    
    if rev:
        nodenames = nodeNames.reverse()
    for n in nodeNames:
        #add node to dot graph
        clusterBranch.add_node(pydot.Node(n))
        if parentName != None:
            dotGraph.add_edge(pydot.Edge(parentName, n))  # connect edge btw parent and node
            
    return dotGraph

In [None]:
# Make example template tree
dotGraph = pydot.Dot(graph_type='digraph', forcelabels=True) # Make pydot graph to visualize rule template

dotGraph = addBranch(dotGraph, nodeNames=['eval#1'], parentName=None)
dotGraph = addBranch(dotGraph, nodeNames=['stlTerm#1'], parentName='eval#1')
dotGraph = addBranch(dotGraph, nodeNames=['BoolAtomic#1', 'timeBound#1', 'U#1', 'BoolAtomic#2'], parentName='stlTerm#1')
dotGraph = addBranch(dotGraph, nodeNames=['F#1', 'timeBound#2', 'BoolAtomic#3'], parentName='stlTerm#1', rev=True)
dotGraph = addBranch(dotGraph, nodeNames=['G#1', 'timeBound#3', 'BoolAtomic#4'], parentName='stlTerm#1', rev=True)
dotGraph = addBranch(dotGraph, nodeNames=['>=#1'], parentName='BoolAtomic#1')
dotGraph = addBranch(dotGraph, nodeNames=['<=#1'], parentName='BoolAtomic#1')
dotGraph = addBranch(dotGraph, nodeNames=['boolExpr#1'], parentName='BoolAtomic#1')
dotGraph = addBranch(dotGraph, nodeNames=['>=#2'], parentName='BoolAtomic#2')
dotGraph = addBranch(dotGraph, nodeNames=['<=#2'], parentName='BoolAtomic#2')
dotGraph = addBranch(dotGraph, nodeNames=['boolExpr#2'], parentName='BoolAtomic#2')
dotGraph = addBranch(dotGraph, nodeNames=['>=#3'], parentName='BoolAtomic#3')
dotGraph = addBranch(dotGraph, nodeNames=['<=#3'], parentName='BoolAtomic#3')
dotGraph = addBranch(dotGraph, nodeNames=['boolExpr#3'], parentName='BoolAtomic#3')
dotGraph = addBranch(dotGraph, nodeNames=['>=#4'], parentName='BoolAtomic#4')
dotGraph = addBranch(dotGraph, nodeNames=['<=#4'], parentName='BoolAtomic#4')
dotGraph = addBranch(dotGraph, nodeNames=['boolExpr#4'], parentName='BoolAtomic#4')

dotGraph = addBranch(dotGraph, nodeNames=['var#1', 'param#1'], parentName='>=#1')
dotGraph = addBranch(dotGraph, nodeNames=['....'], parentName='>=#1')

dotGraph = addBranch(dotGraph, nodeNames=['...'], parentName='boolExpr#4')
dotGraph = addBranch(dotGraph, nodeNames=['stlTerm#2', 'AND#1', 'stlTerm#3'], parentName='boolExpr#4')
dotGraph = addBranch(dotGraph, nodeNames=['stlTerm#4', 'OR#1', 'stlTerm#5'], parentName='boolExpr#4')
dotGraph = addBranch(dotGraph, nodeNames=['stlTerm#6', 'IMPLIES#1', 'stlTerm#7'], parentName='boolExpr#4')



showGraph(dotGraph)
saveGraph(dotGraph, 'exampleTemplateTree.png')

In [None]:
t1d = pd.read_csv("ExampleT1D.csv")

In [None]:
t = range(0, 300)


plt.figure(figsize=(12,7))
plt.rcParams.update({'font.size': 12})
rlinestyle = "-"
glinestyle = '-'
plt.plot(t1d['bg4'], linestyle=glinestyle, color='g')
plt.plot(t1d['bg2'], linestyle=rlinestyle, color='r')
plt.plot(t1d['bg5'], linestyle=glinestyle, color='g')
plt.plot(t1d['bg8'], linestyle=rlinestyle, color='r')

plt.axhline(y=70, color='k', linestyle='--')
plt.axhline(y=180, color='k', linestyle='--')

plt.xlabel("Timesteps")
plt.ylabel("Blood Glucose (mg/dL)")
plt.legend(["Satisfying Trajectories", "Violating Trajectories"])
plt.tight_layout()
plt.savefig('ExampleBGRuleTraj.eps', format='eps')

## Calculate Client Data Stats

In [None]:
#Load original raw data from clients as dataframe
def calculateClientDataStats(popSize, dataFilename):
    sliceSizes = []
    negOutcomes = 0
    negTime = 0
    negs = []
    
    data = pd.DataFrame()
    labels = pd.DataFrame()
    
    totalPts = 0
    i = 1
    while totalPts < popSize-1:
        i += 1

        try:
            dt = pd.read_csv(dataFilename + str(i) + 'DataFrame.csv', index_col=0)
            lbls = pd.read_csv(dataFilename + str(i) + 'Labels.csv')

            if -1 in list(lbls.loc[0]):
                negOutcomes += 1

            ptTimeNeg = list(lbls.loc[0]).count(-1)
            negTime += ptTimeNeg
            negs.append(ptTimeNeg / len(list(lbls.loc[0])))

            data = data.append(dt)
            labels = labels.append(lbls)

            sliceSizes.append(len(dt))

            totalPts += 1

        except:
            print("Data file not found for Client %d" % (i))

    labels = labels.reset_index()
    
    print("\nTotal Patients Found:", totalPts)
    print("Total Rows:", len(data), "Average Rows / Patient:", sum(sliceSizes) / len(sliceSizes))
    print("Total Patients with Neg Outcome:", negOutcomes, "Percent", negOutcomes / popSize)
    print("Total Timepoints with Neg Outcome:", negTime, "Percent", negTime / len(data))
    print("Ave time patient spend in neg outcome:", sum(negs) / popSize)

    
    return data, labels

In [None]:
df1 = pd.read_csv("../Data/ICU/DataFrames/1DataFrame.csv", index_col=0)
df1

In [None]:
data, labels = calculateClientDataStats(8000, "../Data/ICU/DataFrames/")

In [4]:
df2 = pd.read_csv("../Data/Sepsis/DataFrames/1DataFrame.csv", index_col=0)
df2

Unnamed: 0_level_0,HR,O2Sat,Temp,SBP,MAP,DBP,Resp,EtCO2,BaseExcess,HCO3,...,Potassium,Bilirubin_total,TroponinI,Hct,Hgb,PTT,WBC,Fibrinogen,Platelets,SepsisLabel
Patient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,97.0,95.0,0.0,98.0,75.33,0.0,19.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,89.0,99.0,0.0,122.0,86.0,0.0,22.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,90.0,95.0,0.0,0.0,0.0,0.0,30.0,0.0,24.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,103.0,88.5,0.0,122.0,91.33,0.0,24.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,110.0,91.0,0.0,0.0,0.0,0.0,22.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,108.0,92.0,36.11,123.0,77.0,0.0,29.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,106.0,90.5,0.0,93.0,76.33,0.0,29.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,104.0,95.0,0.0,133.0,88.33,0.0,26.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,102.0,91.0,0.0,134.0,87.33,0.0,30.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [5]:
# data, labels = calculateClientDataStats(40336, "../Data/Sepsis/DataFrames/") 
data, labels = calculateClientDataStats(10, "../Data/Sepsis/DataFrames/") 


Total Patients Found: 9
Total Rows: 531 Average Rows / Patient: 59.0
Total Patients with Neg Outcome: 0 Percent 0.0
Total Timepoints with Neg Outcome: 0 Percent 0.0
Ave time patient spend in neg outcome: 0.0


In [None]:
df3 = pd.read_csv("../Data/T1D/DataFrames/1DataFrame.csv", index_col=0)
df3

In [None]:
data, labels = calculateClientDataStats(34013, "../Data/T1D/DataFrames/")

In [None]:
## Calculate STL Rule Stats

In [6]:

def calcRuleStats(popSize, datafilename):
    totalRules = 0
    numRlList = []
    opList = []
    
    totalPts = 0
    i = 1
    while totalPts < popSize-1:
        i += 1
        try:
            clRules = 0
            numOps = 0

            file = open(datafilename + str(i) + 'Rules.txt', "r")
            for line in file:
                numOps = line.count("G") + line.count("F") + line.count("U") + line.count("&") + line.count("|") + line.count("->")
                clRules += 1

            file.close()

            totalRules += clRules
            numRlList.append(clRules)
            opList.append(numOps)
            
            totalPts += 1
        except:
             print("Data file not found for Client %d" % (i))

            
    print("Total Patients Found:", totalPts)
    
    aveOpsPerRule = sum(opList) / len(opList) #total Rules
    aveNumRulesPerPt = sum(numRlList) / len(numRlList) #total patients
    
    print("\nTotal Client Rules", totalRules)
    print("Average # Rules per patient", aveNumRulesPerPt)
    print("Average # operators per rule", aveOpsPerRule)

        




In [None]:
calcRuleStats(8000, "../Data/ICU/Best/")


In [7]:
# calcRuleStats(40336, "../Data/Sepsis/Best/")
calcRuleStats(10, "../Data/Sepsis/Best/")


Total Patients Found: 9

Total Client Rules 953
Average # Rules per patient 105.88888888888889
Average # operators per rule 3.0


In [8]:
# calcRuleStats(34013, "../Data/T1D/Best/")
calcRuleStats(10, "../Data/T1D/Best/")



Total Patients Found: 9

Total Client Rules 525
Average # Rules per patient 58.333333333333336
Average # operators per rule 4.111111111111111


In [15]:
num = 1
cols = {}

df = pd.read_csv("../Data/T1D/DataFrames/" + str(num) + "DataFrame.csv", index_col=0)
for c in df.columns:
    cols[c] = [min(df[c]), max(df[c])]
    
num = 2
while num <=  34013:
    df = pd.read_csv("../Data/T1D/DataFrames/" + str(num) + "DataFrame.csv", index_col=0)

    for c in df.columns:

        if c != 'index':
#             mn = cols[c][0]
#             mx = cols[c][1]
            
            if min(df[c]) < cols[c][0]:
                cols[c][0] = min(df[c])

            if max(df[c]) > cols[c][1]:
                cols[c][1] = max(df[c])
    
    num += 1

cols

{'Pt_CGMUseNumDays': [0.0, 30.0],
 'Pt_FatherT1D': [0.0, 1.0],
 'Pt_MotherT1D': [0.0, 1.0],
 'Pt_SiblingT1D': [0.0, 1.0],
 'Pt_ChildT1D': [0.0, 1.0],
 'Pt_GrandchildT1D': [0.0, 1.0],
 'Pt_GrandparentT1D': [0.0, 1.0],
 'BldPrSys': [0.0, 300.0],
 'BldPrDia': [0.0, 160.0],
 'SMBGperDayPtMeterCombo': [0.0, 30.0],
 'WeightKg': [0.0, 240.85754847],
 'HeightCm': [0.0, 209.0],
 'DirectLDL': [0.0, 1.0],
 'age': [0, 90],
 'diabDur': [0.0, 83.0],
 'TotalDailyInsPerKg': [0.0, 12.0],
 'LDL': [0.0, 400.0],
 'HDL': [0.0, 200.0],
 'TotChol': [0.0, 632.0],
 'Triglyc': [0.0, 3000.0],
 'BUN': [0.0, 229.0],
 'AlbCreatRat_mggNew': [-0.1, 9678.6],
 'UnitsInsBasalOrLongAct': [0.0, 200.0],
 'BGTestAvgNumMeter': [0.0, 30.0],
 'BGTestAvgNumPtRep': [0.0, 30.0],
 'TSH': [0.0, 492.2],
 'HbA1c': [0.0, 19.8],
 'AutonomicNeuroCl': [0.0, 1.0],
 'Pt_InsGov': [0.0, 1.0],
 'NumPumpBolusOrShortAct': [0.0, 50.0],
 'HbA1cImputeDtMnC': [-82.05799999999999, 90.822],
 'Pt_SHFlg': [0.0, 1.0],
 'Pt_DKAFlg': [0.0, 1.0],
 'bmi': [

In [2]:
num = 1
cols = {}

df = pd.read_csv("../Data/T1D/DataFrames/" + str(num) + "DataFrame.csv", index_col=0)
for c in df.columns:
    cols[c] = [min(df[c]), max(df[c])]
    
num = 2
while num <=  34013:
    df = pd.read_csv("../Data/T1D/DataFrames/" + str(num) + "DataFrame.csv", index_col=0)
    
    for c in df.columns:
        if min(df[c]) < cols[c][0]:
            cols[c][0] = min(df[c])
            
        if max(df[c]) < cols[c][1]:
            cols[c][1] = max(df[c])
            


{'Pt_CGMUseNumDays': [0.0, 30.0],
 'Pt_FatherT1D': [0.0, 0.0],
 'Pt_MotherT1D': [0.0, 0.0],
 'Pt_SiblingT1D': [0.0, 0.0],
 'Pt_ChildT1D': [0.0, 0.0],
 'Pt_GrandchildT1D': [0.0, 0.0],
 'Pt_GrandparentT1D': [0.0, 0.0],
 'BldPrSys': [0.0, 150.0],
 'BldPrDia': [0.0, 82.0],
 'SMBGperDayPtMeterCombo': [0.0, 5.0],
 'WeightKg': [0.0, 111.58372302],
 'HeightCm': [0.0, 170.18],
 'DirectLDL': [0.0, 0.0],
 'age': [42, 48],
 'diabDur': [33.0, 39.0],
 'TotalDailyInsPerKg': [0.0, 0.4025832614],
 'LDL': [0.0, 98.0],
 'HDL': [0.0, 101.0],
 'TotChol': [0.0, 156.0],
 'Triglyc': [0.0, 150.0],
 'BUN': [0.0, 174.0],
 'AlbCreatRat_mggNew': [0.0, 15.0],
 'UnitsInsBasalOrLongAct': [0.0, 33.0],
 'BGTestAvgNumMeter': [0.0, 5.0],
 'BGTestAvgNumPtRep': [0.0, 4.0],
 'TSH': [0.0, 2.56],
 'HbA1c': [6.8, 8.0],
 'AutonomicNeuroCl': [0.0, 0.0],
 'Pt_InsGov': [0.0, 0.0],
 'NumPumpBolusOrShortAct': [0.0, 7.0],
 'HbA1cImputeDtMnC': [-1.959, 75.253],
 'Pt_SHFlg': [0.0, 0.0],
 'Pt_DKAFlg': [0.0, 0.0],
 'bmi': [0.0, 38.528651