In [3]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import random
import numpy as np
import pandas as pd
import os
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False
if not IN_COLAB:
    os.chdir("..")
%load_ext autoreload
%autoreload 2

## Utility Functions

In [4]:
def metrics(cm):
        TN, FP, FN, TP = cm.ravel()
        precision =(TP)/(TP+FP)
        accuracy_model  =(TP+TN)/(TP+TN+FP+FN)
        recall_score  =(TP)/(TP+FN)
        specificity_value =(TN)/(TN + FP)
        
        False_positive_rate =(FP)/(FP+TN)
        False_negative_rate =(FN)/(FN+TP)
        f1_score =2*(( precision * recall_score)/( precision + recall_score))
        PPV=(TP)/(TP+FP)
        NPV=TN / (TN + FN) 
        print("PPV: ",PPV*100)
        print("NPV: ",NPV*100)
        print("Precision value of the model: ",precision*100)
        print("Accuracy of the model: ",accuracy_model*100)
        print("Recall of the model: ",recall_score*100)
        print("Specificity of the model: ",specificity_value*100)
        print("F1 Score of the model: ",f1_score*100)

In [5]:
def getCols(df):
  all=list(df)
  df2=df.dropna()
  all.remove('Class')
  num_cols=[]
  for i in all:
    if type(df2[i][0])==np.float64:
      num_cols.append(i)
  bin_cols=[]
  bin_cols = [col for col in df2 if np.isin(df2[col].unique(), [0, 1]).all()]
  cat_cols=[]
  for i in all:
    if i not in num_cols and i not in bin_cols and i!='Class':
      cat_cols.append(i)
  if 'Class' in bin_cols:
    bin_cols.remove('Class')
  return [num_cols,cat_cols,bin_cols]

## Read Data

In [18]:
import pandas as pd
import numpy as mp

df=pd.read_csv('/content/delirium_icu_tr_mimic_.csv')
test=pd.read_csv('/content/delirium_icu_ts_mimic_.csv')



In [19]:
num_cols,cat_cols,bin_cols=getCols(df)
cat_col_names=cat_cols+bin_cols

In [20]:
df.head()

Unnamed: 0,ALT,AST,Admission Weight (Kg),Albumin,Anion gap,Arterial Base Excess,Arterial Blood Pressure mean,Arterial CO2(Calc),Arterial PaCO2,Arterial pH,...,ADMISSION_TYPE,ADMISSION_LOCATION,INSURANCE,LANGUAGE,RELIGION,MARITAL_STATUS,ETHNICITY,GENDER,AGE,Class
0,25.666667,77.0,73.777055,2.95,13.226413,0.534483,74.23057,26.051724,40.422414,7.400342,...,0,2,3,17,5,2,25,1,63.178082,0
1,27.75,41.0,79.876167,1.925,13.167512,3.26,59.571885,29.64,45.02,7.4102,...,2,3,3,17,3,2,25,1,53.805479,0
2,1084.977273,2380.916996,72.008629,2.715033,12.880346,0.535714,87.937727,25.482143,38.511364,7.412403,...,1,2,2,17,12,4,25,1,50.260274,1
3,25.5,36.0,78.6027,3.7,13.199486,1.088235,78.837134,26.058824,37.323529,7.442553,...,1,0,3,29,12,2,25,1,56.493151,0
4,317.207407,812.911111,76.556766,2.777419,13.723353,-0.797357,100.200931,23.159292,33.132159,7.429607,...,1,3,3,17,3,3,25,0,41.293151,0


In [21]:
from sklearn.model_selection import train_test_split
train, val = train_test_split(df,stratify=df['Class'],test_size=0.10, random_state=42,shuffle=True)

In [27]:
import pandas as pd
import numpy as mp

df=pd.read_csv('/content/delirium_icu_tr_mimic_encoded.csv')
test=pd.read_csv('/content/delirium_icu_ts_mimic_encoded.csv')



In [28]:
num_cols,cat_cols,bin_cols=getCols(df)
cat_col_names=cat_cols+bin_cols

In [29]:
df.head()

Unnamed: 0,ALT,AST,Admission Weight (Kg),Albumin,Anion gap,Arterial Base Excess,Arterial Blood Pressure mean,Arterial CO2(Calc),Arterial PaCO2,Arterial pH,...,ADMISSION_TYPE,ADMISSION_LOCATION,INSURANCE,LANGUAGE,RELIGION,MARITAL_STATUS,ETHNICITY,GENDER,AGE,Class
0,25.666667,77.0,73.777055,2.95,13.226413,0.534483,74.23057,26.051724,40.422414,7.400342,...,0,2,3,17,5,2,25,1,63.178082,0
1,27.75,41.0,79.876167,1.925,13.167512,3.26,59.571885,29.64,45.02,7.4102,...,2,3,3,17,3,2,25,1,53.805479,0
2,1084.977273,2380.916996,72.008629,2.715033,12.880346,0.535714,87.937727,25.482143,38.511364,7.412403,...,1,2,2,17,12,4,25,1,50.260274,1
3,25.5,36.0,78.6027,3.7,13.199486,1.088235,78.837134,26.058824,37.323529,7.442553,...,1,0,3,29,12,2,25,1,56.493151,0
4,317.207407,812.911111,76.556766,2.777419,13.723353,-0.797357,100.200931,23.159292,33.132159,7.429607,...,1,3,3,17,3,3,25,0,41.293151,0


In [30]:
from sklearn.model_selection import train_test_split
train, val = train_test_split(df,stratify=df['Class'],test_size=0.10, random_state=42,shuffle=True)

## Importing the Library

In [1]:
pip install pytorch_tabular

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pytorch_tabular
  Downloading pytorch_tabular-0.7.0.tar.gz (2.0 MB)
[K     |████████████████████████████████| 2.0 MB 14.9 MB/s 
Collecting category-encoders==2.2.2
  Downloading category_encoders-2.2.2-py2.py3-none-any.whl (80 kB)
[K     |████████████████████████████████| 80 kB 11.4 MB/s 
Collecting pandas==1.1.5
  Downloading pandas-1.1.5-cp37-cp37m-manylinux1_x86_64.whl (9.5 MB)
[K     |████████████████████████████████| 9.5 MB 43.9 MB/s 
Collecting pytorch-lightning==1.3.6
  Downloading pytorch_lightning-1.3.6-py3-none-any.whl (809 kB)
[K     |████████████████████████████████| 809 kB 58.0 MB/s 
[?25hCollecting omegaconf>=2.0.1
  Downloading omegaconf-2.2.3-py3-none-any.whl (79 kB)
[K     |████████████████████████████████| 79 kB 10.4 MB/s 
[?25hCollecting torchmetrics>=0.3.2
  Downloading torchmetrics-0.10.3-py3-none-any.whl (529 kB)
[K     |███████████████████████████

In [1]:
!pip install torchtext==0.8.0 torch==1.7.1 pytorch-lightning==1.2.2

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchtext==0.8.0
  Downloading torchtext-0.8.0-cp37-cp37m-manylinux1_x86_64.whl (6.9 MB)
[K     |████████████████████████████████| 6.9 MB 10.3 MB/s 
[?25hCollecting torch==1.7.1
  Downloading torch-1.7.1-cp37-cp37m-manylinux1_x86_64.whl (776.8 MB)
[K     |████████████████████████████████| 776.8 MB 18 kB/s 
[?25hCollecting pytorch-lightning==1.2.2
  Downloading pytorch_lightning-1.2.2-py3-none-any.whl (816 kB)
[K     |████████████████████████████████| 816 kB 60.4 MB/s 
Collecting PyYAML!=5.4.*,>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 62.8 MB/s 
Installing collected packages: torch, PyYAML, torchtext, pytorch-lightning
  Attempting uninstall: torch
    Found existing installation: torch 1.12.1+cu113
    Uninstalling torch-1.12.1+cu

In [22]:
from pytorch_tabular import TabularModel
from pytorch_tabular.models import  FTTransformerConfig,CategoryEmbeddingModelConfig, TabNetModelConfig, NodeConfig, AutoIntConfig
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig, ExperimentConfig

## Define the Configs

In [31]:
data_config = DataConfig(
    target=['Class'], #target should always be a list. Multi-targets are only supported for regression. Multi-Task Classification is not implemented
    continuous_cols=num_cols,
    categorical_cols=cat_col_names,
)
trainer_config = TrainerConfig(
    auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate
    batch_size=16,
    max_epochs=100,
    gpus=-1, #index of the GPU to use. -1 means all available GPUs, None, means CPU
)
optimizer_config = OptimizerConfig()

model_config = NodeConfig(
    task="classification",
    #learning_rate=0.0001
    #layers="1024-512-512",  # Number of nodes in each layer
    #activation="LeakyReLU", # Activation between each layers
 
)

tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
)

### ICU Delirium 

#### Encoded Data

In [32]:
tabular_model.fit(train=train, validation=val)

Global seed set to 42
INFO:lightning:Global seed set to 42
  elif pd.api.types.is_categorical(cols):
GPU available: True, used: True
INFO:lightning:GPU available: True, used: True
TPU available: None, using: 0 TPU cores
INFO:lightning:TPU available: None, using: 0 TPU cores

  | Name            | Type             | Params
-----------------------------------------------------
0 | backbone        | NODEBackbone     | 1.7 M 
1 | output_response | Lambda           | 0     
2 | loss            | CrossEntropyLoss | 0     
-----------------------------------------------------
1.7 M     Trainable params
769       Non-trainable params
1.7 M     Total params
6.704     Total estimated model params size (MB)
INFO:lightning:
  | Name            | Type             | Params
-----------------------------------------------------
0 | backbone        | NODEBackbone     | 1.7 M 
1 | output_response | Lambda           | 0     
2 | loss            | CrossEntropyLoss | 0     
--------------------------------

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

Restored states from the checkpoint file at /content/lr_find_temp_model.ckpt
INFO:lightning:Restored states from the checkpoint file at /content/lr_find_temp_model.ckpt
Learning rate set to 0.3311311214825908
INFO:lightning:Learning rate set to 0.3311311214825908

  | Name            | Type             | Params
-----------------------------------------------------
0 | backbone        | NODEBackbone     | 1.7 M 
1 | output_response | Lambda           | 0     
2 | loss            | CrossEntropyLoss | 0     
-----------------------------------------------------
1.7 M     Trainable params
769       Non-trainable params
1.7 M     Total params
6.704     Total estimated model params size (MB)
INFO:lightning:
  | Name            | Type             | Params
-----------------------------------------------------
0 | backbone        | NODEBackbone     | 1.7 M 
1 | output_response | Lambda           | 0     
2 | loss            | CrossEntropyLoss | 0     
-------------------------------------------

Validation sanity check: 0it [00:00, ?it/s]

Training: 99it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [33]:
result = tabular_model.evaluate(test)
pred_df = tabular_model.predict(test)
from sklearn.metrics import log_loss, roc_curve,confusion_matrix, roc_auc_score
cm=confusion_matrix(test['Class'], pred_df["prediction"])
print(roc_auc_score(test['Class'],pred_df['1_probability'])*100)
metrics(cm)

Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_accuracy': 0.88532555103302}
--------------------------------------------------------------------------------


Generating Predictions...:   0%|          | 0/65 [00:00<?, ?it/s]

89.89749169138709
PPV:  83.1858407079646
NPV:  91.15942028985508
Precision value of the model:  83.1858407079646
Accuracy of the model:  88.53255587949465
Recall of the model:  82.21574344023324
Specificity of the model:  91.69096209912536
F1 Score of the model:  82.69794721407624


#### Raw Data

In [25]:
tabular_model.fit(train=train, validation=val)

Global seed set to 42
INFO:lightning:Global seed set to 42
  elif pd.api.types.is_categorical(cols):
GPU available: True, used: True
INFO:lightning:GPU available: True, used: True
TPU available: None, using: 0 TPU cores
INFO:lightning:TPU available: None, using: 0 TPU cores

  | Name            | Type             | Params
-----------------------------------------------------
0 | backbone        | NODEBackbone     | 1.7 M 
1 | output_response | Lambda           | 0     
2 | loss            | CrossEntropyLoss | 0     
-----------------------------------------------------
1.7 M     Trainable params
769       Non-trainable params
1.7 M     Total params
6.704     Total estimated model params size (MB)
INFO:lightning:
  | Name            | Type             | Params
-----------------------------------------------------
0 | backbone        | NODEBackbone     | 1.7 M 
1 | output_response | Lambda           | 0     
2 | loss            | CrossEntropyLoss | 0     
--------------------------------

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

Restored states from the checkpoint file at /content/lr_find_temp_model.ckpt
INFO:lightning:Restored states from the checkpoint file at /content/lr_find_temp_model.ckpt
Learning rate set to 0.3311311214825908
INFO:lightning:Learning rate set to 0.3311311214825908

  | Name            | Type             | Params
-----------------------------------------------------
0 | backbone        | NODEBackbone     | 1.7 M 
1 | output_response | Lambda           | 0     
2 | loss            | CrossEntropyLoss | 0     
-----------------------------------------------------
1.7 M     Trainable params
769       Non-trainable params
1.7 M     Total params
6.704     Total estimated model params size (MB)
INFO:lightning:
  | Name            | Type             | Params
-----------------------------------------------------
0 | backbone        | NODEBackbone     | 1.7 M 
1 | output_response | Lambda           | 0     
2 | loss            | CrossEntropyLoss | 0     
-------------------------------------------

Validation sanity check: 0it [00:00, ?it/s]

Training: 99it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [26]:
result = tabular_model.evaluate(test)
pred_df = tabular_model.predict(test)
from sklearn.metrics import log_loss, roc_curve,confusion_matrix, roc_auc_score
cm=confusion_matrix(test['Class'], pred_df["prediction"])
print("AUC: "+str(roc_auc_score(test['Class'],pred_df['1_probability'])*100))
metrics(cm)

Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_accuracy': 0.8620019555091858}
--------------------------------------------------------------------------------


Generating Predictions...:   0%|          | 0/65 [00:00<?, ?it/s]

AUC: 88.88345842293603
PPV:  82.95081967213115
NPV:  87.56906077348067
Precision value of the model:  82.95081967213115
Accuracy of the model:  86.20019436345967
Recall of the model:  73.76093294460642
Specificity of the model:  92.4198250728863
F1 Score of the model:  78.08641975308643


### Non-ICU Delirium

#### Encoded Data 

In [None]:
# 16 AUBMC encoded
tabular_model.fit(train=train, validation=val)

Global seed set to 42
INFO:lightning:Global seed set to 42
  elif pd.api.types.is_categorical(cols):
  "Data-aware initialization is performed on less than 1000 data points. This may cause instability."
GPU available: True, used: True
INFO:lightning:GPU available: True, used: True
TPU available: None, using: 0 TPU cores
INFO:lightning:TPU available: None, using: 0 TPU cores

  | Name            | Type             | Params
-----------------------------------------------------
0 | backbone        | NODEBackbone     | 1.3 M 
1 | output_response | Lambda           | 0     
2 | loss            | CrossEntropyLoss | 0     
-----------------------------------------------------
1.3 M     Trainable params
769       Non-trainable params
1.3 M     Total params
5.230     Total estimated model params size (MB)
INFO:lightning:
  | Name            | Type             | Params
-----------------------------------------------------
0 | backbone        | NODEBackbone     | 1.3 M 
1 | output_response | Lamb

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

Restored states from the checkpoint file at /content/lr_find_temp_model.ckpt
INFO:lightning:Restored states from the checkpoint file at /content/lr_find_temp_model.ckpt
Learning rate set to 0.47863009232263803
INFO:lightning:Learning rate set to 0.47863009232263803

  | Name            | Type             | Params
-----------------------------------------------------
0 | backbone        | NODEBackbone     | 1.3 M 
1 | output_response | Lambda           | 0     
2 | loss            | CrossEntropyLoss | 0     
-----------------------------------------------------
1.3 M     Trainable params
769       Non-trainable params
1.3 M     Total params
5.230     Total estimated model params size (MB)
INFO:lightning:
  | Name            | Type             | Params
-----------------------------------------------------
0 | backbone        | NODEBackbone     | 1.3 M 
1 | output_response | Lambda           | 0     
2 | loss            | CrossEntropyLoss | 0     
-----------------------------------------

Validation sanity check: 0it [00:00, ?it/s]

Training: 6it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [None]:
# 16 encoded
result = tabular_model.evaluate(test)
pred_df = tabular_model.predict(test)
from sklearn.metrics import log_loss, roc_curve,confusion_matrix, auc
cm=confusion_matrix(test['Class'], pred_df["prediction"])
metrics(cm)

Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_accuracy': 0.8328357934951782}
--------------------------------------------------------------------------------


Generating Predictions...:   0%|          | 0/21 [00:00<?, ?it/s]

AUC:  65.6348728749812
PPV:  39.58333333333333
NPV:  90.59233449477352
Precision value of the model:  39.58333333333333
Accuracy of the model:  83.28358208955224
Recall of the model:  41.30434782608695
Specificity of the model:  89.96539792387543
F1 Score of the model:  40.42553191489362


#### Raw Data

In [None]:
# 8 AUBMC raw
tabular_model.fit(train=train, validation=val)

Global seed set to 42
INFO:lightning:Global seed set to 42
  elif pd.api.types.is_categorical(cols):
  "Data-aware initialization is performed on less than 1000 data points. This may cause instability."
GPU available: True, used: True
INFO:lightning:GPU available: True, used: True
TPU available: None, using: 0 TPU cores
INFO:lightning:TPU available: None, using: 0 TPU cores

  | Name            | Type             | Params
-----------------------------------------------------
0 | backbone        | NODEBackbone     | 1.3 M 
1 | output_response | Lambda           | 0     
2 | loss            | CrossEntropyLoss | 0     
-----------------------------------------------------
1.3 M     Trainable params
769       Non-trainable params
1.3 M     Total params
5.230     Total estimated model params size (MB)
INFO:lightning:
  | Name            | Type             | Params
-----------------------------------------------------
0 | backbone        | NODEBackbone     | 1.3 M 
1 | output_response | Lamb

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

Restored states from the checkpoint file at /content/lr_find_temp_model.ckpt
INFO:lightning:Restored states from the checkpoint file at /content/lr_find_temp_model.ckpt
Learning rate set to 0.2290867652767775
INFO:lightning:Learning rate set to 0.2290867652767775

  | Name            | Type             | Params
-----------------------------------------------------
0 | backbone        | NODEBackbone     | 1.3 M 
1 | output_response | Lambda           | 0     
2 | loss            | CrossEntropyLoss | 0     
-----------------------------------------------------
1.3 M     Trainable params
769       Non-trainable params
1.3 M     Total params
5.230     Total estimated model params size (MB)
INFO:lightning:
  | Name            | Type             | Params
-----------------------------------------------------
0 | backbone        | NODEBackbone     | 1.3 M 
1 | output_response | Lambda           | 0     
2 | loss            | CrossEntropyLoss | 0     
-------------------------------------------

Validation sanity check: 0it [00:00, ?it/s]

Training: 37it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [None]:
# 8 aubmc raw
result = tabular_model.evaluate(test)
pred_df = tabular_model.predict(test)
from sklearn.metrics import log_loss, roc_curve,confusion_matrix, auc
cm=confusion_matrix(test['Class'], pred_df["prediction"])
metrics(cm)

Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_accuracy': 0.8059701323509216}
--------------------------------------------------------------------------------


Generating Predictions...:   0%|          | 0/42 [00:00<?, ?it/s]

AUC:  69.56145629607342
PPV:  36.231884057971016
NPV:  92.10526315789474
Precision value of the model:  36.231884057971016
Accuracy of the model:  80.59701492537313
Recall of the model:  54.347826086956516
Specificity of the model:  84.77508650519032
F1 Score of the model:  43.47826086956522


## Saving and Loading the Model

In [36]:
tabular_model.save_model("node_icu_delirium_best_model")
loaded_model = TabularModel.load_from_checkpoint("node_icu_delirium_best_model")
result = tabular_model.evaluate(test)
pred_df = tabular_model.predict(test)
from sklearn.metrics import log_loss, roc_curve,confusion_matrix, roc_auc_score
cm=confusion_matrix(test['Class'], pred_df["prediction"])
print(roc_auc_score(test['Class'],pred_df['1_probability'])*100)
metrics(cm)

GPU available: True, used: True
INFO:lightning:GPU available: True, used: True
TPU available: None, using: 0 TPU cores
INFO:lightning:TPU available: None, using: 0 TPU cores


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_accuracy': 0.88532555103302}
--------------------------------------------------------------------------------


Generating Predictions...:   0%|          | 0/65 [00:00<?, ?it/s]

89.89749169138709
PPV:  83.1858407079646
NPV:  91.15942028985508
Precision value of the model:  83.1858407079646
Accuracy of the model:  88.53255587949465
Recall of the model:  82.21574344023324
Specificity of the model:  91.69096209912536
F1 Score of the model:  82.69794721407624
