In [1]:
from SignalTemporalLogic.STLFactory import STLFactory
import matplotlib.pyplot as plt
plt.rc('font', size=12)
import copy
import pandas as pd
import seaborn as sns
import numpy as np
from collections import Counter
import warnings
import treelib
import re

# warnings.filterwarnings('ignore')
%matplotlib inline


## Complexity Analysis - 
Given maximum depth and total number of variables, get max possible number of rule structures

In [2]:
from RuleTemplate.RuleTemplate import RuleTemplate, Node, stlGrammarDict, terminalNodes
import logging

def complexityAnalysisSingle(maxDepth, varDict):
    logging.basicConfig(level=logging.INFO)
    temp = RuleTemplate(varDict=None, default=True)
    
    branches = [temp.getBranch(temp.root)]
    addlBranches = []
    depth = 1
    while depth <= maxDepth:
        for b in branches:
            addlBranches.extend(expand(b, temp, varDict))
        
        branches = copy.deepcopy(addlBranches)
        addlBranches = []
        
        print("DEPTH:", depth, ", Total Branches:", len(temp._branches))
        depth += 1   
        
#         temp.showGraph()
    
    temp.dotGraph.write("../RuleTemplate/ComplexityAnalysis/Depth" + str(maxDepth) + "_Vars" + str(len(varDict))
                        +".png", format='png') 

    #get all possible rule structures from this  
    rs = temp.generateRuleSet(verbose=True)
    return rs
        
def complexityAnalysis(maxDepth, varDict):
    lst = []
    numStructs = []
    totalBranches = []
    
    temp = RuleTemplate(varDict=None, default=True)
    branches = [temp.getBranch(temp.root)]
    addlBranches = []
    depth = 1
    while depth <= maxDepth:
        for b in branches:
            addlBranches.extend(expand(b, temp, varDict))
        
        branches = copy.deepcopy(addlBranches)
        addlBranches = []  
        
#         temp.showGraph()
    
        temp.dotGraph.write("../RuleTemplate/ComplexityAnalysis/Depth" + str(maxDepth) + "_Vars" + str(len(varDict))
                        +".png", format='png') 

        #get all possible rule structures from this  
        rs = temp.generateRuleSet(verbose=False)
        
        lst.append([len(varDict), depth, len(temp._branches), len(rs)])
        #         print("DEPTH:", depth, ", Total Branches:", len(temp._branches))
        depth += 1 
        
    return lst

    
def expand(branch, temp, varDict): #expand a single branch
    nextBranches = []
    for node in branch.nodes:
        if node.type in stlGrammarDict.keys(): #node is expandable
            childChoices = stlGrammarDict[node.type] #get possible child branches
            
            for choice in childChoices:                
                if "Variable" in choice:
                    for v in varDict.keys():
                        br = temp.addBranch([v, "Parameter"], node.name, varBranch=True)  # add all children to branch

                else: #non var branch
                    
                    br = temp.addBranch(choice, node.name) #add all children to branch
                    nextBranches.append(br)

    return nextBranches




In [3]:
# #Run complexity analysis for single set of vars
# variables = {'LOS': [0.0, 122.0], 'ICU_Pt_Days': [0.0, 120.0], 'Mort': [0.0, 1.0], 'n_evts': [0.0, 5.0],
#              'y': [0.0, 1.0], 'tte': [-60, 100], 'death': [0.0, 1.0],
#              'direct': [0.0, 1.0], 'MET': [0.0, 1.0], 'Sgy': [0.0, 1.0], 'Glasgow_Coma_Scale_Total': [0, 15.0],
#              'O2_Flow': [0.0, 10], 'Resp': [0.0, 50.0], 'SpO2': [80, 100.0], 'SBP': [0.0, 260.0],
#              'Pulse': [0.0, 400.0], 'Temp': [30, 50], 'ALBUMIN': [1, 5.0],
#              'ALKALINE_PHOSPHATASE': [15, 400], 'ALT_GPT': [6.0, 380], 'AST_GOT': [6.0, 250],
#              'BLOOD_UREA_NITROGEN': [1, 120], 'CALCIUM': [2, 15], 'CHLORIDE': [80, 150],
#              'CO2': [5.0, 50.0], 'CREATININE': [0, 10.0], 'GLUCOSE': [50.0, 460.0], 'HEMOGLOBIN': [1, 25],
#              'LACTIC_ACID': [0, 5], 'MAGNESIUM': [0, 5], 'OXYGEN_SATURATION': [80, 100],
#              'PARTIAL_THROMBOPLASTIN_TIME': [20.0, 140.0], 'PCO2': [10, 80], 'PHOSPHORUS': [0, 10],
#              'PLATELET_COUNT': [10.0, 700.0], 'POTASSIUM': [1, 10], 'PROTIME_INR': [0, 5.0],
#              'SODIUM': [100.0, 200.0], 'TOTAL_BILIRUBIN': [0, 10.0], 'TOTAL_PROTEIN': [2, 10],
#              'TROPONIN_I': [0.02, 23.5], 'WHITE_BLOOD_CELL_COUNT': [0.12, 26.0], 'hr': [30.0, 300.0],
#              's2_hr': [-1, 1], 's8_hr': [-1, 1], 's24_hr': [-1, 1], 'n_edrk': [0.0, 1.0],
#              'edrk': [2, 55], 's2_edrk': [-1, 1], 's8_edrk': [-1, 1], 's24_edrk': [-1, 1], 'srr': [0, 1],
#              'dfa': [-1, 5], 'cosen': [-5, 1], 'lds': [0.0, 5], 'af': [0.0, 1.0], 'AF': [0.0, 1.0], 
#              'x1': [0.0, 1.0], 'x2': [0.0, 1.0], 'x3': [0.0, 1.0]}

# rs = complexityAnalysisSingle(maxDepth=7, varDict = variables)
# print("Retrieved ", len(rs), "Rules")

In [12]:
vz = dict(list(variables.items())[:60])

rs = complexityAnalysisSingle(maxDepth=7, varDict = vz)
print("Retrieved ", len(rs), "Rules")

DEPTH: 1 , Total Branches: 2
DEPTH: 2 , Total Branches: 3
DEPTH: 3 , Total Branches: 4
DEPTH: 4 , Total Branches: 8
DEPTH: 5 , Total Branches: 36
DEPTH: 6 , Total Branches: 246
DEPTH: 7 , Total Branches: 8996


INFO:Rule Template:Produced 648 Rule Structures
INFO:Rule Template:Generated 648 Formatted Rules



Retrieved  648 Rules


In [6]:
maxVars = 60
maxDepth = 18

variables = {'LOS': [0.0, 122.0], 'ICU_Pt_Days': [0.0, 120.0], 'Mort': [0.0, 1.0], 'n_evts': [0.0, 5.0],
             'y': [0.0, 1.0], 'tte': [-60, 100], 'death': [0.0, 1.0],
             'direct': [0.0, 1.0], 'MET': [0.0, 1.0], 'Sgy': [0.0, 1.0], 'Glasgow_Coma_Scale_Total': [0, 15.0],
             'O2_Flow': [0.0, 10], 'Resp': [0.0, 50.0], 'SpO2': [80, 100.0], 'SBP': [0.0, 260.0],
             'Pulse': [0.0, 400.0], 'Temp': [30, 50], 'ALBUMIN': [1, 5.0],
             'ALKALINE_PHOSPHATASE': [15, 400], 'ALT_GPT': [6.0, 380], 'AST_GOT': [6.0, 250],
             'BLOOD_UREA_NITROGEN': [1, 120], 'CALCIUM': [2, 15], 'CHLORIDE': [80, 150],
             'CO2': [5.0, 50.0], 'CREATININE': [0, 10.0], 'GLUCOSE': [50.0, 460.0], 'HEMOGLOBIN': [1, 25],
             'LACTIC_ACID': [0, 5], 'MAGNESIUM': [0, 5], 'OXYGEN_SATURATION': [80, 100],
             'PARTIAL_THROMBOPLASTIN_TIME': [20.0, 140.0], 'PCO2': [10, 80], 'PHOSPHORUS': [0, 10],
             'PLATELET_COUNT': [10.0, 700.0], 'POTASSIUM': [1, 10], 'PROTIME_INR': [0, 5.0],
             'SODIUM': [100.0, 200.0], 'TOTAL_BILIRUBIN': [0, 10.0], 'TOTAL_PROTEIN': [2, 10],
             'TROPONIN_I': [0.02, 23.5], 'WHITE_BLOOD_CELL_COUNT': [0.12, 26.0], 'hr': [30.0, 300.0],
             's2_hr': [-1, 1], 's8_hr': [-1, 1], 's24_hr': [-1, 1], 'n_edrk': [0.0, 1.0],
             'edrk': [2, 55], 's2_edrk': [-1, 1], 's8_edrk': [-1, 1], 's24_edrk': [-1, 1], 'srr': [0, 1],
             'dfa': [-1, 5], 'cosen': [-5, 1], 'lds': [0.0, 5], 'af': [0.0, 1.0], 'AF': [0.0, 1.0], 
             'x1': [0.0, 1.0], 'x2': [0.0, 1.0], 'x3': [0.0, 1.0]}


giantLst = []
for i in range(maxVars, 0, -3):
    print("Completing for", i, "vars")
    vz = dict(list(variables.items())[:i])
    
    lst = complexityAnalysis(maxDepth=maxDepth, varDict =vz)
    
    giantLst.extend(lst)
    
df = pd.DataFrame(giantLst, columns=['Num Variables', 'Depth', 'Num Branches', 'Total STL Structures'])
df

Completing for  10 vars
Completing for  7 vars
Completing for  4 vars
Completing for  1 vars


Unnamed: 0,Num Variables,Depth,Num Branches,Total STL Structures
0,10,1,2,0
1,10,2,3,0
2,10,3,4,0
3,10,4,8,0
4,10,5,36,0
5,10,6,246,0
6,10,7,1996,120
7,10,8,4726,120
8,7,1,2,0
9,7,2,3,0


In [None]:
rs = complexityAnalysisSingle(maxDepth=17, varDict = variables)
print("Retrieved ", len(rs), "Rules")

DEPTH: 1 , Total Branches: 2
DEPTH: 2 , Total Branches: 3
DEPTH: 3 , Total Branches: 4
DEPTH: 4 , Total Branches: 8
DEPTH: 5 , Total Branches: 36
DEPTH: 6 , Total Branches: 246
DEPTH: 7 , Total Branches: 8996
DEPTH: 8 , Total Branches: 18726
DEPTH: 9 , Total Branches: 35806
DEPTH: 10 , Total Branches: 359136
DEPTH: 11 , Total Branches: 1023016
