In [1]:
import sys
import os
from importlib import reload
here = os.getcwd()
sys.path.append(os.path.join(here,"../"))

In [2]:
import torch
import os
import csv
import math
import random
import numpy as np
import scipy as sp
from itertools import permutations

In [3]:
from models.cde.cde_data_common import process_data,get_final_linear_input_channels,get_final_indices,wrap_data,augment_data
import models.cde.cde_train_common as train_common
import models.cde.cde_data_common as cde_data_common
import models.cde as cde

from utils.test_utils import make_results_filenames

import explainer.rule_pattern_miner as rlm
import explainer.explainer_utils as eutils
from explainer.FPGrowth_tree import *
import explainer.itemsets_miner as itm

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score,confusion_matrix,precision_score,recall_score,accuracy_score,roc_auc_score,roc_curve
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt 
import seaborn as sns

In [5]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree,export_text,export_graphviz
import explainer.DT_rules as dtr
from explainer.DT_rules import obtain_rule_lists_from_DT,select_rule_list

In [6]:
def new_make_model():
    model, regularise = make_model()
    model.linear.weight.register_hook(lambda grad: 100 * grad)
    model.linear.bias.register_hook(lambda grad: 100 * grad)
    return model, regularise

def group_processed_data(X,y,times):
    X = torch.tensor(X)
    y = torch.tensor(y)
    final_indices,_ = get_final_indices(times,y)
    coeffs = process_data(times,X,intensity=intensity,time_intensity=time_intensity,cummean=cummean,cumsum=cumsum,append_times=append_times,interpolate=interpolate)
    return coeffs,y,final_indices

In [7]:
## Control randomness for reproducibility

seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [8]:
model_name = "ncde"

if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

In [9]:
## Configuration of NCDE model

interpolate = "linear"    
side_input = False
concat_z = True

append_times = False
time_intensity = True
intensity = True 
static_intensity = True


time_len = 72
max_epochs = 100
pos_weight = 20

hidden_channels = 2 
hidden_hidden_channels = 128
num_hidden_layers = 4

batch_size = 1024
max_epochs = 500
lr = 0.0001 * (batch_size / 64)

num_classes = 2
cummean = True
cumsum = False


In [10]:
pp = "/Users/chenyu/github/NDE-Models-DigitalHealth/"
base_loc = pp+'data/raw/sepsis/'

rpath = "./results/sepsis"
name = model_name + "_" + str(seed)

In [11]:
## Read raw data

X_times = []
X_static = []
y = []
H = time_len
for filename in os.listdir(base_loc):
    if filename.endswith('.psv'):
        with open(os.path.join(base_loc,filename)) as file:
            time = []
            label = 0.0
            reader = csv.reader(file, delimiter='|')
            reader = iter(reader)
            next(reader)  # first line is headings
            prev_iculos = 0
            for line in reader:
                assert len(line) == 41
                # time values are 34 features
                *time_values, age, gender, unit1, unit2, hospadmtime, iculos, sepsislabel = line
                iculos = int(iculos)
                #print('iculos',iculos)
                if iculos > H:  # keep at most the first H hours
                    break
                ## padding nan for missing hours
                for iculos_ in range(prev_iculos + 1, iculos):
                    time.append([float('nan') for value in time_values])
                    #time.append(np.zeros(len(time_values))+np.nan)
                prev_iculos = iculos
                time.append([float(value) for value in time_values])
                label = max(label, float(sepsislabel))
            unit1 = float(unit1)
            unit2 = float(unit2)
            unit1_obs = not math.isnan(unit1)
            unit2_obs = not math.isnan(unit2)
            if not unit1_obs:
                unit1 = 0.
            if not unit2_obs:
                unit2 = 0.
            hospadmtime = float(hospadmtime)
            if math.isnan(hospadmtime):
                hospadmtime = 0.  # this only happens for one record
            static = [float(age), float(gender), unit1, unit2, hospadmtime]
            if static_intensity:
                static += [unit1_obs, unit2_obs]
            if len(time) > 2:
                if len(time) < H:
                    # padding less hours
                    for t in range(H-len(time)):
                        time.append([float('nan') for value in time_values])
                X_times.append(time)
                X_static.append(static)
                y.append(label)
    

In [12]:
## preprocess data

times = np.arange(time_len).astype(np.float32)
times = torch.tensor(times)

X_times = np.array(X_times).astype(np.float32)
X_static = np.array(X_static).astype(np.float32)
y = np.array(y).astype(np.float32)


X_train,X_test,y_train,y_test = train_test_split(X_times,y,test_size=0.2,random_state=seed)
X_test,X_val,y_test,y_val = train_test_split(X_test,y_test,test_size=0.5,random_state=seed)

### min-max normalization
for c in range(X_times.shape[-1]):
    mi = X_train[:,:,c][~np.isnan(X_train[:,:,c])].min()
    ma = X_train[:,:,c][~np.isnan(X_train[:,:,c])].max()
    X_train[:,:,c] = (X_train[:,:,c] - mi)/(ma - mi)+1.
    X_test[:,:,c] = (X_test[:,:,c] - mi)/(ma - mi)+1.
    X_val[:,:,c] = (X_val[:,:,c] - mi)/(ma - mi)+1.
    

In [13]:
## augment data with cumulative intensities and linear interploation

X_train_raw = augment_data(torch.tensor(X_train),times,intensity=intensity,time_intensity=time_intensity,cummean=cummean,cumsum=cumsum,append_times=append_times)
X_test_raw = augment_data(torch.tensor(X_test),times,intensity=intensity,time_intensity=time_intensity,cummean=cummean,cumsum=cumsum,append_times=append_times)
X_val_raw = augment_data(torch.tensor(X_val),times,intensity=intensity,time_intensity=time_intensity,cummean=cummean,cumsum=cumsum,append_times=append_times)

train_data = group_processed_data(X_train,y_train,times)
test_data = group_processed_data(X_test,y_test,times)
val_data = group_processed_data(X_val,y_val,times)

check X torch.Size([32266, 72, 136])
check X torch.Size([4033, 72, 136])
check X torch.Size([4034, 72, 136])


In [15]:
## Define the NCDE model

input_channels = train_data[0][0].shape[-1]
output_channels = 1
stream = True if concat_z else False

if concat_z or side_input:
    side_input_dim = cv_sets[0][0][-1].shape[-1] if side_input else 0
    final_linear_input_channels = get_final_linear_input_channels(hidden_channels,side_input_dim=side_input_dim,time_len=time_len)
else:
    final_linear_input_channels = None

make_model = train_common.make_model(model_name, input_channels, output_channels, hidden_channels,
                               hidden_hidden_channels, num_hidden_layers, use_intensity=False,
                              final_linear_input_channels=final_linear_input_channels, 
                              initial=True,side_input=side_input,append_times=append_times,interpolate=interpolate)


In [16]:
## uncomment this cell if need to train a new model
times, train_dataloader, val_dataloader, test_dataloader = wrap_data(times, train_data, val_data, test_data, device,
                                                                                    batch_size=batch_size,num_workers=0)

model,log,log_num = train_common.main(name, times, train_dataloader, val_dataloader, test_dataloader, device,
                   new_make_model, num_classes, max_epochs, lr, kwargs={'stream':stream}, pos_weight=torch.tensor(pos_weight),
                   step_mode=True,rpath=rpath)
model.to('cpu')
model.eval()

interpolate linear


  0%|                                                                                                          | 0/500 [00:00<?, ?it/s]

Starting training for model:

NeuralCDE(
  input_channels=136, hidden_channels=2, output_channels=1, initial=True
  (func): FinalTanh(
    input_channels: 136, hidden_channels: 2, hidden_hidden_channels: 128, num_hidden_layers: 4
    (linear_in): Linear(in_features=2, out_features=128, bias=True)
    (linears): ModuleList(
      (0): Linear(in_features=128, out_features=128, bias=True)
      (1): Linear(in_features=128, out_features=128, bias=True)
      (2): Linear(in_features=128, out_features=128, bias=True)
    )
    (linear_out): Linear(in_features=128, out_features=272, bias=True)
  )
  (initial_network): Linear(in_features=136, out_features=2, bias=True)
  (linear): Linear(in_features=144, out_features=1, bias=True)
)




  0%|▏                                                                                               | 1/500 [00:34<4:48:04, 34.64s/it]

Epoch: 0  Train loss: 2.46  Train auroc: 0.643  Val loss: 2.43  Val auroc: 0.612


  2%|██                                                                                             | 11/500 [05:13<4:02:47, 29.79s/it]

save model
Epoch: 10  Train loss: 1.98  Train auroc: 0.837  Val loss: 1.96  Val auroc: 0.832


  4%|███▉                                                                                           | 21/500 [09:54<4:01:42, 30.28s/it]

Epoch: 20  Train loss: 2.25  Train auroc: 0.722  Val loss: 2.17  Val auroc: 0.751


  6%|█████▉                                                                                         | 31/500 [14:38<3:57:24, 30.37s/it]

Epoch: 30  Train loss: 2.2  Train auroc: 0.764  Val loss: 2.13  Val auroc: 0.791


  8%|███████▎                                                                                 | 41/500 [9:32:49<557:18:19, 4371.02s/it]

Epoch: 40  Train loss: 2.19  Train auroc: 0.775  Val loss: 2.16  Val auroc: 0.785


 10%|█████████▎                                                                                 | 51/500 [9:37:49<19:21:17, 155.18s/it]

Epoch: 50  Train loss: 2.18  Train auroc: 0.776  Val loss: 2.13  Val auroc: 0.799


 12%|███████████▎                                                                                 | 61/500 [9:43:05<4:29:46, 36.87s/it]

Epoch: 60  Train loss: 2.18  Train auroc: 0.779  Val loss: 2.2  Val auroc: 0.773


 14%|█████████████▏                                                                               | 71/500 [9:48:19<3:59:59, 33.56s/it]

Epoch: 70  Train loss: 2.18  Train auroc: 0.78  Val loss: 2.18  Val auroc: 0.784


 16%|███████████████                                                                              | 81/500 [9:53:27<3:49:14, 32.83s/it]

Epoch: 80  Train loss: 2.18  Train auroc: 0.779  Val loss: 2.12  Val auroc: 0.796


 18%|████████████████▉                                                                            | 91/500 [9:58:35<3:43:24, 32.77s/it]

Epoch: 90  Train loss: 2.18  Train auroc: 0.778  Val loss: 2.16  Val auroc: 0.783


 20%|██████████████████▍                                                                        | 101/500 [10:03:48<3:40:09, 33.11s/it]

Epoch: 100  Train loss: 2.17  Train auroc: 0.78  Val loss: 2.12  Val auroc: 0.8


 22%|████████████████████▏                                                                      | 111/500 [10:08:57<3:34:02, 33.01s/it]

Epoch: 110  Train loss: 2.18  Train auroc: 0.779  Val loss: 2.14  Val auroc: 0.779


 24%|█████████████████████▌                                                                   | 121/500 [10:14:11<32:03:48, 304.56s/it]


Epoch: 120  Train loss: 2.18  Train auroc: 0.78  Val loss: 2.1  Val auroc: 0.8
Breaking because of no improvement in training loss for 100 epochs.
best epoch 10
#####################
test_metrics
{'accuracy': 0.7571614384651184, 'confusion': array([[2195.,  713.],
       [  33.,  131.]]), 'dataset_size': 3072, 'loss': 1.93487548828125, 'auroc': 0.8519517227496897, 'average_precision': 0.3293461463119249, 'sensitivity': 0.7987804878048781, 'specificity': 0.7548143053645117}
#####################
val_metrics
{'accuracy': 0.7529296875, 'confusion': array([[2198.,  718.],
       [  41.,  115.]]), 'dataset_size': 3072, 'loss': 1.965420126914978, 'auroc': 0.8273165734585487, 'average_precision': 0.33268576359211743}
#####################
train_metrics
{'accuracy': 0.7598286271095276, 'confusion': array([[22778.,  7235.],
       [  389.,  1342.]]), 'dataset_size': 31744, 'loss': 1.9796645641326904, 'auroc': 0.8374503149540262, 'average_precision': 0.31327172211097754}


NeuralCDE(
  input_channels=136, hidden_channels=2, output_channels=1, initial=True
  (func): FinalTanh(
    input_channels: 136, hidden_channels: 2, hidden_hidden_channels: 128, num_hidden_layers: 4
    (linear_in): Linear(in_features=2, out_features=128, bias=True)
    (linears): ModuleList(
      (0): Linear(in_features=128, out_features=128, bias=True)
      (1): Linear(in_features=128, out_features=128, bias=True)
      (2): Linear(in_features=128, out_features=128, bias=True)
    )
    (linear_out): Linear(in_features=128, out_features=272, bias=True)
  )
  (initial_network): Linear(in_features=136, out_features=2, bias=True)
  (linear): Linear(in_features=144, out_features=1, bias=True)
)

In [17]:
## load model if already trained and saved it

# model_path = "./results/sepsis/intensity_time_intensity_concatz/zdim2_hdim128_nlayer4_bs1024/posw20/interp_linear/model_6"
# model, regularise_parameters = make_model()
# model.load_state_dict(torch.load(model_path))
# model.to('cpu')
# model.eval()

In [18]:
## get feature names of augmented features

raw_feature_names = ['HR','O2Sat','Temp','SBP','MAP','DBP','Resp','EtCO2','BaseExcess','HCO3','FiO2','pH','PaCO2','SaO2',
                    'AST','BUN','Alkalinephos','Calcium','Chloride','Creatinine','Bilirubin_direct','Glucose','Lactate','Magnesium',
                    'Phosphate','Potassium','Bilirubin_total','TroponinI','Hct','Hgb','PTT','WBC','Fibrinogen','Platelets']

latent_feature_names = [r'$z_'+str(i)+'(t_{'+str(h)+'})$' for h in range(time_len) for i in range(hidden_channels) ]
intensity_feature_names = [rf+'_ctime' for rf in raw_feature_names] + [rf+'_cmax' for rf in raw_feature_names] + [rf+'_cmean' for rf in raw_feature_names] 
input_feature_names = raw_feature_names + intensity_feature_names
feature_types = ["int" if "ctime" in fn else "float" for fn in input_feature_names] 
len(input_feature_names)

136

In [19]:
seed

42

In [20]:
## get baseline and test samples for computing impact score matrix

model.append_times=append_times
train_reps = model.latent_representation(X_train_raw,times=times).detach()
test_reps = model.latent_representation(X_test_raw,times=times).detach()
val_reps = model.latent_representation(X_val_raw,times=times).detach()

baselines = eutils.gen_intgrad_baselines(X_train_raw,y_train,train_reps)
subset = eutils.gen_balanced_subset(X_train_raw,y_train,size_per_class=int(y_train.sum()))

In [21]:
## get impact score matrix of latent states
int_g, z_shift = [], []
for c in range(2):
    int_g_c, z_shift_c = eutils.calc_baselines_intg(test_examples=subset,model=model,baselines=baselines,times=times,target_c=c,target_dim=0,C=2)
    int_g.append(int_g_c)
    z_shift.append(z_shift_c)

int_g = torch.vstack(int_g)
z_shift = torch.vstack(z_shift)

In [22]:
## get predicted probability of training, test, and validation sets

linear_prams = []
for p in model.linear.parameters():
    p = p.detach()
    linear_prams.append(p)

pred_y_train = model.linear(train_reps.reshape(train_reps.shape[0],-1)).detach().numpy()
pred_y_test = model.linear(test_reps.reshape(test_reps.shape[0],-1)).detach().numpy()

In [23]:
## get prediction threshold by roc

pred_y = sp.special.expit(pred_y_train).reshape(-1)
auc = roc_auc_score(y_train, pred_y)
fpr, tpr, thresholds = roc_curve(y_train, pred_y)
y_thd = thresholds[np.argmax(tpr - fpr)]
print("y threshold",y_thd)

y threshold 0.5414686


In [24]:
auc = roc_auc_score(y_test, pred_y_test)
auc

0.8441754285578046

In [42]:
# # If test the rule extraction methods for multiple random seeds, change the random seed here.
# # If change random seed in the begining, the model, the training, and test set will change as well, the randomness will have several sources.

# seed = 0
# torch.manual_seed(seed)
# np.random.seed(seed)
# random.seed(seed)

In [43]:
## get impact score matrix for output

C=2
baseline_reps = model.latent_representation(baselines,times=times)
baseline_output = model.linear(baseline_reps.reshape(baseline_reps.shape[0],-1)).detach()

subset_reps,subset_output = [],[]
for k in range(C):
    subset_reps.append(model.latent_representation(subset[k],times=times))
    subset_output.append(model.linear(subset_reps[k].reshape(subset_reps[k].shape[0],-1)).detach())
    
cids = np.arange(C)
yshift = []
for k in cids:
    for kk in cids[cids!=k]:
        yshift.append(subset_output[k]-baseline_output[kk])
yshift = torch.vstack(yshift)

weights = linear_prams[0].reshape(-1,hidden_channels)
y_int_g = eutils.output_intg_score(int_g,weights,yshift)
y_int_g[torch.isnan(y_int_g)] = 0.

In [44]:
## get impact score threshold 
thds = np.linspace(0.01,0.2,200)
f_n = []
for thd in thds:
    mask = torch.abs(y_int_g) >= thd 
    f_n.append((thd,(mask.sum(dim=0)>=len(y_int_g)*0.99).sum()))
    if f_n[-1][1]==1:
        break
thd = f_n[-1][0]
thd

0.04055276381909548

In [45]:
## get frequent important feature set
itemsets_y = itm.transform_intgrad_to_itemsets(y_int_g,thd=thd)
fids = itm.gen_freq_feature_set(itemsets_y[0],min_support=100,max_len=500)
fids = np.array(fids).astype(int)-1
print('feature set',fids,len(fids))

feature set [8741 9693 6701 9285 7109 9557 7789 9149 8059 6565 8333 7515 6971 6291
 6837 7243 6970 8467 8058 7242 7653 9011 6290 8063 7381 8466 7519 7514
 7379 5746 7378 6157 8471 9010 5747 7651 6021 6973 6295 7925 8197 9696
 8330 8742 6698 7247 9015 6975 8331 5477 7786 5882 8336 9690 6699 7655
 7792 5749 8875 9694 5883 7110 8739 7654 5613 7650 8743 9286 5885 7383
 8195 7787 7112 9419 9418 5067 8061 8064 8874 5205 9150 5751 8199 8605
 4389 9560 8198 8879 4935 5071 6704 7790 4931 7384 6976 9558 5066 6427
 8194 6838 6562 9691 4525 5887 7248 8603 9423 7518 8335 6426 8470 7245
 6294 5474 7922 7106 8738 9692 5752 4930 6568 9554 8740 6703 8607 6563
 9287 5475 8472 8744 9288 4797 6431 8602 8334 7928 3981 9283 6839 4799
 7520 3573 8606 9016 9014 3165 9151 5888 9284 9152 9421 7923 4795 9147
 9424 9556 8878 6835 5069 7108 7791 9148 6296 4253 2893 6566 5480 9700
 4119 7116 5479 6160 8877 6158 7788 7652 4115 3029 9555 6022 5072 5207
 7246 8332 6840 7796 8196 6702 3845 8062 5341 6836 4794 5206 7926

In [46]:
min_support=2000
num_grids=8
max_depth=3

x = X_train_raw.reshape(X_train_raw.shape[0],-1).numpy()
y_rule_candidates = rlm.gen_rule_list_for_one_target(x,fids,pred_y>y_thd,y=y_train,c=1,sort_by="fitness",
                                                    min_support=min_support,num_grids=num_grids,max_depth=max_depth,top_K=3,
                                                    local_x=None,feature_types=feature_types*time_len,
                                                    verbose=False,search="greedy")

build_rule_tree
init rule tree
check potential rule 5474 3.515306315419299 29.571428571428573 41.0 2223
add rule [] 5474 [3.515306315419299, 5474]
check potential rule 7652 1.0450860697396915 49.0 57.0 2057
add rule [5474] 7652 [1.0450860697396915, 7652]
check potential rule 5477 1.007049372775757 29.571428571428573 41.0 2033
add rule [5474, 7652] 5477 [1.007049372775757, 5477]
check potential rule 5341 1.007049372775757 28.857142857142854 40.0 2033
add rule [5474, 7652] 5341 [1.007049372775757, 5341]
check potential rule 5613 1.0069501952311777 30.285714285714285 42.0 2032
add rule [5474, 7652] 5613 [1.0069501952311777, 5613]
check potential rule 7788 1.0449800473116377 49.857142857142854 58.0 2056
add rule [5474] 7788 [1.0449800473116377, 7788]
check potential rule 5477 1.0070523591958414 29.571428571428573 41.0 2032
add rule [5474, 7788] 5477 [1.0070523591958414, 5477]
check potential rule 5341 1.0070523591958414 28.857142857142854 40.0 2032
add rule [5474, 7788] 5341 [1.00705235919

In [47]:
y_rule_candidates

[{'rules': [(6154, '>=', 34.0), (5612, '>=', 37.0), (5752, '>=', 31.0)],
  'cond_prob_target': 0.8631271310277643,
  'support': 2053,
  'fitness': 0.2051740745837347,
  'cond_prob_y': 0.23721383341451535,
  'ratio_y': 0.27686185332575325},
 {'rules': [(6154, '>=', 34.0), (5612, '>=', 37.0), (5888, '>=', 32.0)],
  'cond_prob_target': 0.8631271310277643,
  'support': 2053,
  'fitness': 0.2051740745837347,
  'cond_prob_y': 0.23721383341451535,
  'ratio_y': 0.27686185332575325},
 {'rules': [(6154, '>=', 34.0), (5612, '>=', 37.0), (6160, '>=', 34.0)],
  'cond_prob_target': 0.8631271310277643,
  'support': 2053,
  'fitness': 0.2051740745837347,
  'cond_prob_y': 0.23721383341451535,
  'ratio_y': 0.27686185332575325},
 {'rules': [(6154, '>=', 34.0), (5476, '>=', 36.0), (5752, '>=', 31.0)],
  'cond_prob_target': 0.8631271310277643,
  'support': 2053,
  'fitness': 0.2051740745837347,
  'cond_prob_y': 0.23721383341451535,
  'ratio_y': 0.27686185332575325},
 {'rules': [(6154, '>=', 34.0), (5476, '

In [48]:
for i, rules in enumerate(y_rule_candidates):   
    rules["rules"] = rlm.replace_feature_names(rules["rules"],input_feature_names,time_index=True)
    y_rule_candidates[i] = rules
y_rule_candidates

[{'rules': [(6154, 'HR_ctime_t45', '>=', 34.0),
   (5612, 'Temp_ctime_t41', '>=', 37.0),
   (5752, 'Resp_ctime_t42', '>=', 31.0)],
  'cond_prob_target': 0.8631271310277643,
  'support': 2053,
  'fitness': 0.2051740745837347,
  'cond_prob_y': 0.23721383341451535,
  'ratio_y': 0.27686185332575325},
 {'rules': [(6154, 'HR_ctime_t45', '>=', 34.0),
   (5612, 'Temp_ctime_t41', '>=', 37.0),
   (5888, 'Resp_ctime_t43', '>=', 32.0)],
  'cond_prob_target': 0.8631271310277643,
  'support': 2053,
  'fitness': 0.2051740745837347,
  'cond_prob_y': 0.23721383341451535,
  'ratio_y': 0.27686185332575325},
 {'rules': [(6154, 'HR_ctime_t45', '>=', 34.0),
   (5612, 'Temp_ctime_t41', '>=', 37.0),
   (6160, 'Resp_ctime_t45', '>=', 34.0)],
  'cond_prob_target': 0.8631271310277643,
  'support': 2053,
  'fitness': 0.2051740745837347,
  'cond_prob_y': 0.23721383341451535,
  'ratio_y': 0.27686185332575325},
 {'rules': [(6154, 'HR_ctime_t45', '>=', 34.0),
   (5476, 'Temp_ctime_t40', '>=', 36.0),
   (5752, 'Resp_c

In [58]:
### otain rules for pred_y = c from a DecisionTreeClassifier

X_tr = X_train_raw.reshape(X_train_raw.shape[0],-1).numpy()
X_tr[np.isnan(X_tr)] = 0.
treemodel = DecisionTreeClassifier(max_depth=3,min_samples_leaf=2000,random_state=seed)
treemodel.fit(X_tr,pred_y>y_thd)

In [59]:
print(export_text(treemodel))

|--- feature_3029 <= 9.50
|   |--- feature_9566 <= 70.50
|   |   |--- feature_9706 <= 71.50
|   |   |   |--- feature_4661 <= 3.50
|   |   |   |   |--- class: False
|   |   |   |--- feature_4661 >  3.50
|   |   |   |   |--- class: True
|   |   |--- feature_9706 >  71.50
|   |   |   |--- feature_5476 <= 32.50
|   |   |   |   |--- feature_6396 <= 1.11
|   |   |   |   |   |--- class: False
|   |   |   |   |--- feature_6396 >  1.11
|   |   |   |   |   |--- class: False
|   |   |   |--- feature_5476 >  32.50
|   |   |   |   |--- class: False
|   |--- feature_9566 >  70.50
|   |   |--- feature_9716 <= 71.50
|   |   |   |--- feature_2895 <= 4.50
|   |   |   |   |--- class: False
|   |   |   |--- feature_2895 >  4.50
|   |   |   |   |--- class: False
|   |   |--- feature_9716 >  71.50
|   |   |   |--- feature_3298 <= 5.50
|   |   |   |   |--- feature_957 <= 1.09
|   |   |   |   |   |--- class: False
|   |   |   |   |--- feature_957 >  1.09
|   |   |   |   |   |--- class: False
|   |   |   |--- 

In [60]:
feature_names = [f+"_t"+str(t) for t in range(time_len) for f in input_feature_names ]
rule_list, rule_value_list, rule_metric_list, new_lines = dtr.obtain_rule_lists_from_DT(treemodel,5,X_tr,y_train,pred_y>y_thd,np.arange(X_tr.shape[-1]),feature_names,c=1)

In [61]:
dtr.display_rules_from_DT(rule_list,rule_metric_list,feature_names)

#################
[(3029, '<=', 9.5), (9566, '<=', 70.5), (9706, '<=', 71.5), (4661, '<=', 3.5)]
cond_prob_target 0.417 cond_prob_y 0.071 support 2465 fitness -0.056
SBP_ctime_t22 <= 9.5
PaCO2_ctime_t70 <= 70.5
Alkalinephos_ctime_t71 <= 71.5
SBP_ctime_t34 <= 3.5
#################
[(3029, '<=', 9.5), (9566, '<=', 70.5), (9706, '<=', 71.5), (4661, '>', 3.5)]
cond_prob_target 0.682 cond_prob_y 0.121 support 2008 fitness 0.101
SBP_ctime_t22 <= 9.5
PaCO2_ctime_t70 <= 70.5
Alkalinephos_ctime_t71 <= 71.5
SBP_ctime_t34 > 3.5
#################
[(3029, '<=', 9.5), (9566, '<=', 70.5), (9706, '>', 71.5), (5476, '<=', 32.5), (6396, '<=', 1.115)]
cond_prob_target 0.061 cond_prob_y 0.025 support 4743 fitness -0.573
SBP_ctime_t22 <= 9.5
PaCO2_ctime_t70 <= 70.5
Alkalinephos_ctime_t71 > 71.5
Temp_ctime_t40 <= 32.5
MAP_t47 <= 1.115
#################
[(3029, '<=', 9.5), (9566, '<=', 70.5), (9706, '>', 71.5), (5476, '<=', 32.5), (6396, '>', 1.115)]
cond_prob_target 0.228 cond_prob_y 0.053 support 2059 fitn