In [1]:
import numpy as np 
import pandas as pd 
import tqdm 
import argparse 
import os 
import shutil 
from ai_clinician.preprocessing.utils import load_csv 
from ai_clinician.preprocessing.columns import * 
from ai_clinician.modeling.models.komorowski_model import AIClinicianModel 
from ai_clinician.modeling.models.common import * 
from ai_clinician.modeling.models.dqn import DuelingDQNModel 
from ai_clinician.modeling.columns import C_OUTCOME 
import pickle 
from sklearn.model_selection import train_test_split
pd.set_option('display.max_columns', None)
tqdm.tqdm.pandas()

In [None]:
main_path = '/home/lkapral/RRT_mimic_iv' 

In [3]:
df = pd.read_csv(main_path='/data/mimic/mimic_dataset.csv')

In [4]:
import argparse
import os
import shutil
import pandas as pd

def create_args():
    parser = argparse.ArgumentParser(description='Simulate command-line argument parsing in Jupyter notebook.')
    parser.add_argument('data', type=str,
                        help='Model data directory (should contain train and test directories)')
    parser.add_argument('--worker-label', dest='worker_label', type=str, default='',
                        help='Label to suffix output files')
    parser.add_argument('--save', dest='save_behavior', type=str, default='best',
                        help='Models to save (best [default], all, none)')
    parser.add_argument('--val-size', dest='val_size', type=float, default=0.2,
                        help='Proportion of data to use for validation')
    parser.add_argument('--n-models', dest='n_models', type=int, default=500,
                        help='Number of models to build')
    parser.add_argument('--model-type', dest='model_type', type=str, default='AIClinician',
                        help='Model type to train (AIClinician or DuelingDQN)')
    parser.add_argument('--cluster-fraction', dest='cluster_fraction', type=float, default=0.25,
                        help='Fraction of patient states to sample for state clustering')
    parser.add_argument('--n-cluster-init', dest='n_cluster_init', type=int, default=32,
                        help='Number of cluster initializations to try in each replicate')
    parser.add_argument('--n-cluster-states', dest='n_cluster_states', type=int, default=500,
                        help='Number of states to define through clustering')
    parser.add_argument('--n-action-bins', dest='n_action_bins', type=int, default=5,
                        help='Number of action bins for fluids and vasopressors')
    parser.add_argument('--reward', dest='reward', type=int, default=100,
                        help='Value to assign as positive reward if discharged from hospital, or negative reward if died')
    parser.add_argument('--transition-threshold', dest='transition_threshold', type=int, default=5,
                        help='Prune state-action pairs with less than this number of occurrences in training data')
    parser.add_argument('--gamma', dest='gamma', type=float, default=0.99,
                        help='Decay for reward values (default 0.99)')
    parser.add_argument('--soften-factor', dest='soften_factor', type=float, default=0.01,
                        help='Amount by which to soften factors (random actions will be chosen this proportion of the time)')
    parser.add_argument('--num-iter-ql', dest='num_iter_ql', type=int, default=6,
                        help='Number of bootstrappings to use for TD learning (physician policy)')
    parser.add_argument('--num-iter-wis', dest='num_iter_wis', type=int, default=700,
                        help='Number of bootstrappings to use for WIS estimation (AI policy)')
    parser.add_argument('--state-dim', dest='state_dim', type=int, default=256,
                        help='Dimension for learned state representation in DQN')
    parser.add_argument('--hidden-dim', dest='hidden_dim', type=int, default=128,
                        help='Number of units in hidden layer for DQN')

    simulated_input = '--n-models 500 --model-type AIClinician --n-action-bins 2 --val-size 0.2'.split()
    simulated_input.insert(0, main_path+'/data/model')
    return parser.parse_args(simulated_input)


args = create_args()

data_dir = args.data
worker_label = args.worker_label
n_models = args.n_models
model_type = args.model_type
n_action_bins = args.n_action_bins
fixed_num_features = 40

MIMICraw = pd.read_csv(os.path.join(data_dir, "train", "MIMICraw.csv"))
MIMICzs = pd.read_csv(os.path.join(data_dir, "train", "MIMICzs.csv"))
metadata = pd.read_csv(os.path.join(data_dir, "train", "metadata.csv"))
unique_icu_stays = metadata['icustayid'].unique()


In [5]:
feature_importance = pd.read_csv(main_path+'/data/model/combined_feature_importances.csv')

weights = feature_importance.head(fixed_num_features)['Combined_Average'].values
feature_weights = weights / np.linalg.norm(weights)

In [6]:
reduced_features = feature_importance.head(fixed_num_features)['Feature'].tolist()

In [10]:
print("Create actions")

rrt_cols = [
    'Ultrafiltrate_Output',
    'Blood_Flow',
    'Hourly_Patient_Fluid_Removal',
    'Dialysate_Rate',
    'Hemodialysis_Output',
    'Citrate',
    'Prefilter_Replacement_Rate',
    'Postfilter_Replacement_Rate'
]

rrt_actions = (~MIMICraw[rrt_cols].isna() & (MIMICraw[rrt_cols] != 0)).any(axis=1)
MIMICraw['action'] = rrt_actions.astype(int)

n_actions = 2 

def fit_action_bins_binary(actions):
    action_medians = np.array([0, 1])
    action_bins = np.array([0, 0.5, 1])
    all_actions = actions.values
    return all_actions, action_medians, action_bins

all_actions, action_medians, action_bins = fit_action_bins_binary(MIMICraw['action'])

model_type = args.model_type
np.seterr(divide='ignore', invalid='ignore')

Create actions


{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

In [11]:
MIMICraw['RRT'] = MIMICraw['action']
MIMICzs['RRT'] = MIMICraw['action']

In [12]:
MIMICzs = MIMICzs[reduced_features+ ['RRT']]
MIMICraw = MIMICraw[reduced_features + ['action']]
feature_weights = np.append(feature_weights,1)

In [13]:
MIMICraw['icustayid'] = metadata['icustayid']
len(MIMICraw[MIMICraw['action']>0]['icustayid'].unique())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  MIMICraw['icustayid'] = metadata['icustayid']


1457

In [None]:
from joblib import Parallel, delayed
import multiprocessing
import os
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split

train_ids, val_ids = train_test_split(unique_icu_stays, test_size=args.val_size)
train_indexes = metadata[metadata[C_ICUSTAYID].isin(train_ids)].index
val_indexes = metadata[metadata[C_ICUSTAYID].isin(val_ids)].index

X_train = MIMICzs.iloc[train_indexes]
X_val = MIMICzs.iloc[val_indexes]
metadata_train = metadata.iloc[train_indexes]
metadata_val = metadata.iloc[val_indexes]
actions_train = all_actions[train_indexes]
actions_val = all_actions[val_indexes]


penal_amounts = [i/100. for i in range(-1, 15, 1)]
results = Parallel(n_jobs=len(penal_amounts)+1)(
    delayed(run_penalty_experiment)(
        args.n_models,
        penal_amount, 
        fixed_num_features,
        X_train, X_val, metadata_train, metadata_val, actions_train, actions_val,
        data_dir, args, model_type, AIClinicianModel, DuelingDQNModel,
        n_action_bins, action_bins, action_medians, 
        feature_weights, build_complete_record_sequences,
        evaluate_physician_policy_td, evaluate_policy_wis,
        train_ids, val_ids
    )
    for penal_amount in penal_amounts)


  transitionr = np.nan_to_num(np.where(transitionr > 0, transitionr / action_counts, 0))
  physpol = np.nan_to_num(action_counts / action_counts.sum(axis=1, keepdims=True))
  transitionr = np.nan_to_num(np.where(transitionr > 0, transitionr / action_counts, 0))
  physpol = np.nan_to_num(action_counts / action_counts.sum(axis=1, keepdims=True))
  transitionr = np.nan_to_num(np.where(transitionr > 0, transitionr / action_counts, 0))
  physpol = np.nan_to_num(action_counts / action_counts.sum(axis=1, keepdims=True))
  transitionr = np.nan_to_num(np.where(transitionr > 0, transitionr / action_counts, 0))
  physpol = np.nan_to_num(action_counts / action_counts.sum(axis=1, keepdims=True))
  transitionr = np.nan_to_num(np.where(transitionr > 0, transitionr / action_counts, 0))
  physpol = np.nan_to_num(action_counts / action_counts.sum(axis=1, keepdims=True))
  transitionr = np.nan_to_num(np.where(transitionr > 0, transitionr / action_counts, 0))
  physpol = np.nan_to_num(action_counts / acti

Penalty: -0.01, Model 0 of 500
Clustering
Zeroing out 433/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set


TD evaluation:   0%|          | 0/6 [00:00<?, ?it/s]

Penalty: 0.11, Model 0 of 500
Clustering
Zeroing out 418/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set


TD evaluation:   0%|          | 0/6 [00:00<?, ?it/s]

Penalty: 0.08, Model 0 of 500
Clustering
Zeroing out 420/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set
Penalty: 0.12, Model 0 of 500
Clustering
Zeroing out 417/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set
Penalty: 0.13, Model 0 of 500
Clustering
Zeroing out 412/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set


TD evaluation:   0%|          | 0/6 [00:00<?, ?it/s]

Penalty: 0.06, Model 0 of 500
Clustering
Zeroing out 417/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set
Penalty: 0.09, Model 0 of 500
Clustering
Zeroing out 416/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set


TD evaluation:   0%|          | 0/6 [00:00<?, ?it/s]

Penalty: 0.1, Model 0 of 500
Clustering
Zeroing out 413/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set
Penalty: 0.03, Model 0 of 500
Clustering
Zeroing out 421/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set
Penalty: 0.01, Model 0 of 500
Clustering
Zeroing out 424/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set
Penalty: 0.0, Model 0 of 500
Clustering
Zeroing out 413/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set


TD evaluation:   0%|          | 0/6 [00:00<?, ?it/s]

Penalty: 0.14, Model 0 of 500
Clustering
Zeroing out 418/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set
Penalty: 0.02, Model 0 of 500
Clustering
Zeroing out 415/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set
Penalty: 0.04, Model 0 of 500
Clustering
Zeroing out 411/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set
Penalty: 0.07, Model 0 of 500
Clustering
Zeroing out 406/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set
Penalty: 0.05, Model 0 of 500
Clustering
Zeroing out 408/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set


TD evaluation: 100%|██████████| 6/6 [00:13<00:00,  2.20s/it]
  z = self.soften_factor / ii.sum()
  soft_physpol[i, ~ii] = soft_physpol[i,~ii] * (1 - self.soften_factor / coef)
TD evaluation: 100%|██████████| 6/6 [00:12<00:00,  2.06s/it]
  z = self.soften_factor / ii.sum()
  soft_physpol[i, ~ii] = soft_physpol[i,~ii] * (1 - self.soften_factor / coef)
TD evaluation: 100%|██████████| 6/6 [00:12<00:00,  2.15s/it]
  z = self.soften_factor / ii.sum()
TD evaluation: 100%|██████████| 6/6 [00:13<00:00,  2.18s/it]
  z = self.soften_factor / ii.sum()
TD evaluation: 100%|██████████| 6/6 [00:13<00:00,  2.26s/it]
TD evaluation: 100%|██████████| 6/6 [00:12<00:00,  2.11s/it]
TD evaluation: 100%|██████████| 6/6 [00:12<00:00,  2.05s/it]
  z = self.soften_factor / ii.sum()
  z = self.soften_factor / ii.sum()
  soft_physpol[i, ~ii] = soft_physpol[i,~ii] * (1 - self.soften_factor / coef)
  z = self.soften_factor / ii.sum()
  soft_physpol[i, ~ii] = soft_physpol[i,~ii] * (1 - self.soften_factor / coef)
TD ev

Evaluate on MIMIC validation set
Evaluate on MIMIC validation set


WIS estimation: 100%|██████████| 700/700 [01:51<00:00,  6.27it/s]
WIS estimation: 100%|██████████| 700/700 [01:51<00:00,  6.26it/s]
WIS estimation: 100%|██████████| 700/700 [01:51<00:00,  6.25it/s]
WIS estimation: 100%|██████████| 700/700 [01:53<00:00,  6.18it/s]
WIS estimation: 100%|██████████| 700/700 [01:51<00:00,  6.26it/s]
WIS estimation: 100%|██████████| 700/700 [01:52<00:00,  6.24it/s]
WIS estimation:  98%|█████████▊| 687/700 [01:51<00:02,  5.93it/s]

Evaluate on MIMIC validation set


WIS estimation: 100%|██████████| 700/700 [01:51<00:00,  6.28it/s]
WIS estimation: 100%|██████████| 700/700 [01:51<00:00,  6.28it/s]
TD evaluation:   0%|          | 0/6 [00:00<?, ?it/s]

Evaluate on MIMIC validation set
Evaluate on MIMIC validation set


WIS estimation: 100%|██████████| 700/700 [01:52<00:00,  6.23it/s]
WIS estimation: 100%|█████████▉| 697/700 [01:51<00:00,  6.29it/s]

Evaluate on MIMIC validation set
Evaluate on MIMIC validation set


TD evaluation:   0%|          | 0/6 [00:00<?, ?it/s]1,  6.14it/s]

Evaluate on MIMIC validation set
Evaluate on MIMIC validation set
Evaluate on MIMIC validation set


WIS estimation: 100%|██████████| 700/700 [01:52<00:00,  6.23it/s]
WIS estimation:  99%|█████████▉| 693/700 [01:52<00:01,  6.16it/s]

Evaluate on MIMIC validation set


TD evaluation:   0%|          | 0/6 [00:00<?, ?it/s]0,  6.19it/s]

Evaluate on MIMIC validation set
Evaluate on MIMIC validation set
Evaluate on MIMIC validation set


WIS estimation: 100%|██████████| 700/700 [01:53<00:00,  6.18it/s]
TD evaluation:  17%|█▋        | 1/6 [00:02<00:10,  2.16s/it]

Evaluate on MIMIC validation set


TD evaluation:   0%|          | 0/6 [00:00<?, ?it/s].11s/it]

Evaluate on MIMIC validation set


TD evaluation: 100%|██████████| 6/6 [00:12<00:00,  2.13s/it]
  z = self.soften_factor / ii.sum()
  soft_physpol[i, ~ii] = soft_physpol[i,~ii] * (1 - self.soften_factor / coef)
TD evaluation: 100%|██████████| 6/6 [00:12<00:00,  2.14s/it]
  z = self.soften_factor / ii.sum()
TD evaluation: 100%|██████████| 6/6 [00:11<00:00,  2.00s/it]
  z = self.soften_factor / ii.sum()
TD evaluation: 100%|██████████| 6/6 [00:11<00:00,  1.88s/it]
  z = self.soften_factor / ii.sum()
  soft_physpol[i, ~ii] = soft_physpol[i,~ii] * (1 - self.soften_factor / coef)
TD evaluation: 100%|██████████| 6/6 [00:12<00:00,  2.05s/it]
  z = self.soften_factor / ii.sum()
TD evaluation: 100%|██████████| 6/6 [00:11<00:00,  1.99s/it]
  z = self.soften_factor / ii.sum()
TD evaluation: 100%|██████████| 6/6 [00:12<00:00,  2.06s/it]
  z = self.soften_factor / ii.sum()
TD evaluation: 100%|██████████| 6/6 [00:12<00:00,  2.15s/it]
  z = self.soften_factor / ii.sum()
  soft_physpol[i, ~ii] = soft_physpol[i,~ii] * (1 - self.soften_fa

95% LB: 83.05
Saving model: best_model
Penalty: 0.04, Model 1 of 500
Clustering
Zeroing out 417/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set


TD evaluation:   0%|          | 0/6 [00:00<?, ?it/s]

95% LB: 78.31
Saving model: best_model
Penalty: 0.13, Model 1 of 500
Clustering
Zeroing out 411/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set


TD evaluation:   0%|          | 0/6 [00:00<?, ?it/s]

95% LB: -80.97
Saving model: worst_model
Penalty: 0.07, Model 1 of 500
Clustering
Zeroing out 412/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set


TD evaluation:   0%|          | 0/6 [00:00<?, ?it/s]

95% LB: 67.11
Saving model: best_model
Penalty: 0.08, Model 1 of 500
Clustering
Zeroing out 417/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set
95% LB: 71.87
Saving model: best_model
Penalty: 0.1, Model 1 of 500
Clustering
Zeroing out 412/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set
95% LB: 77.67
Saving model: best_model
Penalty: 0.11, Model 1 of 500
Clustering
Zeroing out 416/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set
95% LB: 71.80
Saving model: best_model
Penalty: 0.12, Model 1 of 500
Clustering
Zeroing out 416/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set


TD evaluation:   0%|          | 0/6 [00:00<?, ?it/s]

95% LB: 75.63
Saving model: best_model
Penalty: 0.06, Model 1 of 500
Clustering
Zeroing out 422/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set


TD evaluation:   0%|          | 0/6 [00:00<?, ?it/s]

95% LB: 82.10
Saving model: best_model
Penalty: 0.01, Model 1 of 500
Clustering
Zeroing out 418/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set


TD evaluation:   0%|          | 0/6 [00:00<?, ?it/s]

95% LB: 79.99
Saving model: best_model
Penalty: 0.05, Model 1 of 500
Clustering
Zeroing out 425/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set


TD evaluation:  17%|█▋        | 1/6 [00:02<00:11,  2.22s/it]

95% LB: 83.45
Saving model: best_model
Penalty: 0.0, Model 1 of 500
Clustering
Zeroing out 416/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set
95% LB: 85.05
Saving model: best_model
Penalty: -0.01, Model 1 of 500
Clustering
Zeroing out 415/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set


TD evaluation:  17%|█▋        | 1/6 [00:02<00:11,  2.21s/it]

95% LB: 83.99
Saving model: best_model
Penalty: 0.02, Model 1 of 500
Clustering
Zeroing out 417/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set
95% LB: 83.68
Saving model: best_model
Penalty: 0.03, Model 1 of 500
Clustering
Zeroing out 419/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set


TD evaluation:  17%|█▋        | 1/6 [00:02<00:11,  2.21s/it]

95% LB: 76.69
Saving model: best_model
Penalty: 0.14, Model 1 of 500
Clustering
Zeroing out 414/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set


TD evaluation:   0%|          | 0/6 [00:00<?, ?it/s].23s/it]

95% LB: 77.66
Saving model: best_model
Penalty: 0.09, Model 1 of 500
Clustering
Zeroing out 415/1004 transitions
Create reward matrix R(S, A)
Policy iteration
Evaluate on MIMIC training set


TD evaluation: 100%|██████████| 6/6 [00:13<00:00,  2.21s/it]
  z = self.soften_factor / ii.sum()
  soft_physpol[i, ~ii] = soft_physpol[i,~ii] * (1 - self.soften_factor / coef)
TD evaluation: 100%|██████████| 6/6 [00:13<00:00,  2.20s/it]
  z = self.soften_factor / ii.sum()
TD evaluation: 100%|██████████| 6/6 [00:12<00:00,  2.17s/it]
  z = self.soften_factor / ii.sum()
  soft_physpol[i, ~ii] = soft_physpol[i,~ii] * (1 - self.soften_factor / coef)
TD evaluation: 100%|██████████| 6/6 [00:13<00:00,  2.18s/it]
  z = self.soften_factor / ii.sum()
TD evaluation: 100%|██████████| 6/6 [00:13<00:00,  2.22s/it]
  z = self.soften_factor / ii.sum()
  soft_physpol[i, ~ii] = soft_physpol[i,~ii] * (1 - self.soften_factor / coef)
TD evaluation: 100%|██████████| 6/6 [00:13<00:00,  2.20s/it]
  z = self.soften_factor / ii.sum()
  soft_physpol[i, ~ii] = soft_physpol[i,~ii] * (1 - self.soften_factor / coef)
TD evaluation: 100%|██████████| 6/6 [00:13<00:00,  2.23s/it]
  z = self.soften_factor / ii.sum()
  sof