In [3]:
###
#  FUTON Model MDP + Q-Learning Creation Script
#  A Research Project conducted by Noah Dunn 
###

# Import the standard tools for working with Pandas dataframe
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import shelve
# Import the MDP toolbox that contains a method for conducting Q-Learning
# Tool can be found here: https://github.com/sawcordwell/pymdptoolbox
# Documentation for the tool can be found here 
import mdptoolbox

In [4]:
#  The Data File that will be used to conduct the experiments
patientdata = pd.read_csv("G:/MIMIC-ALL/MIMIC-PATIENTS/patient_data_modified.csv")

In [5]:
### 
#  An MDP, or Markov Decision Process is used to model relationships between various states and actions.
#  A state can be thought of in medical solution as a patient's diagnosis based on current vitals and state of being. 
#  An action can be thought of as a change in current diagnosis based on one of those vitals.
#  The inspirations for the bulk of this code came from Komorowksi's AI Clinician which can be found 
#  here: https://github.com/matthieukomorowski/AI_Clinician/blob/master/AIClinician_core_160219.m
###

###
# Begin by establishing some global variables for use in the MDP creation
###
mdp_count = 500            # The number of repititions we want/count of MDPs we need to create 
clustering_iter = 32       # The number of times clustering will be conducted
cluster_sample = 0.25      # Proportion of the data used for clustering
gamma = 0.99               # How close we desire clusters to be in similarity (Percentage)
transition_threshold = 5   # The cutoff value for the transition matrix
final_policies = 1         # The number of policies we would like to end up with
state_count = 750          # The number of distinct states
action_count = 5           # Number of actions per state (reccommended 2 to 10)
crossval_iter = 10         # Number of crossvalidation runs (Default is 80% Train, 20% Test)

In [6]:
###
# Data structures to hold our interim data
###

# Create the structures and fill them with NaN values
optimal_actions = np.empty((state_count + 2, mdp_count,))  # Not sure the significance of the 2 yet
optimal_actions[:] = np.nan


model_data = np.empty((mdp_count*2, 30,))
model_data[:] = np.nan

bestmodels_data = np.empty((mdp_count, 15))

In [7]:
# Grab list of unique patient ICU stay IDs
icu_ids = patientdata['icustayid'].unique()
# Number of patients to be used for states
id_count = icu_ids.size
print(id_count)

# Create a data structure to representing all patients
patient_idxs = np.empty((id_count, mdp_count,))
patient_idxs[:] = np.nan

21463


In [8]:
# All our columns are broken up into 3 distinct categories:
# 1. Binary values (0 or 1)
# 2. Standard Ranges (Plain old Integers + Decimals)
# 3. Logarthmic Values (columnvalue = log(columnvalue))

colbin = ['gender','mechvent','max_dose_vaso','re_admission', 'qSOFAFlag', 'SOFAFlag']
colnorm = ['age','Weight_kg','GCS','HR','SysBP','MeanBP','DiaBP','RR','Temp_C','FiO2_1',
    'Potassium','Sodium','Chloride','Glucose','Magnesium','Calcium',
    'Hb','WBC_count','Platelets_count','PTT','PT','Arterial_pH','paO2','paCO2',
    'Arterial_BE','HCO3','Arterial_lactate','SOFA','SIRS','Shock_Index','PaO2_FiO2','cumulated_balance', 'qSOFA'];
collog=['SpO2','BUN','Creatinine','SGOT','SGPT','Total_bili','INR','input_total','input_4hourly','output_total','output_4hourly'];

# Create seperate dataframes for each of the columns
colbin_df = patientdata[colbin]
colnorm_df = patientdata[colnorm]
collog_df = patientdata[collog]

# Let's make sure we have what we need
print(colbin_df, "\n", colnorm_df, "\n", collog_df)


        gender  mechvent  max_dose_vaso  re_admission  qSOFAFlag  SOFAFlag
0            0         1            0.0             0          1         0
1            0         1            0.0             0          1         1
2            0         1            0.0             0          1         1
3            0         1            0.0             0          1         1
4            0         1            0.0             0          1         1
...        ...       ...            ...           ...        ...       ...
238325       0         0            0.0             0          1         0
238326       0         0            0.0             0          1         0
238327       0         0            0.0             0          1         0
238328       0         0            0.0             0          1         0
238329       0         0            0.0             0          1         0

[238330 rows x 6 columns] 
                 age  Weight_kg        GCS         HR       SysBP     Me

In [9]:
# Rearrange the dataframe in order of binary, normal, and log data from left to right
MIMIC_raw = pd.concat([colbin_df, colnorm_df, collog_df], axis=1)
print(MIMIC_raw) 

        gender  mechvent  max_dose_vaso  re_admission  qSOFAFlag  SOFAFlag  \
0            0         1            0.0             0          1         0   
1            0         1            0.0             0          1         1   
2            0         1            0.0             0          1         1   
3            0         1            0.0             0          1         1   
4            0         1            0.0             0          1         1   
...        ...       ...            ...           ...        ...       ...   
238325       0         0            0.0             0          1         0   
238326       0         0            0.0             0          1         0   
238327       0         0            0.0             0          1         0   
238328       0         0            0.0             0          1         0   
238329       0         0            0.0             0          1         0   

                age  Weight_kg        GCS         HR  ...    BU

In [10]:
# We want a Z-Score for every item. This a measure of variance to see how far a value is from the mean

# Scipy provides a library to execute this kind of thing
from scipy.stats import zscore
# We need to normalize binaries to -0.5 and 0.5 for later use
MIMIC_zscores = MIMIC_raw

# No need for the zscore algorithm here, -0.5 and 0.5 suffice
MIMIC_zscores[colbin] = MIMIC_zscores[colbin] - 0.5

# Recall these columns are logarithmic, so they needed converted back for proper Z-Scoring (+ 0.1 to avoid log(0))
MIMIC_zscores[collog] = np.log(MIMIC_zscores[collog] + 0.1).apply(zscore)

# Normal column requires no modifications. Z-Scores are calculated as normal
MIMIC_zscores[colnorm] = MIMIC_zscores[colnorm].apply(zscore)
print(MIMIC_zscores)

# We want Re Admission and fluid intake scaled Similarly to the other variables
MIMIC_zscores['re_admission'] = np.log(MIMIC_zscores['re_admission'] + 0.6)
# Apply a scalar to fluid intake
MIMIC_zscores['input_total'] = 2 * MIMIC_zscores['input_total']

        gender  mechvent  max_dose_vaso  re_admission  qSOFAFlag  SOFAFlag  \
0         -0.5       0.5           -0.5          -0.5        0.5      -0.5   
1         -0.5       0.5           -0.5          -0.5        0.5       0.5   
2         -0.5       0.5           -0.5          -0.5        0.5       0.5   
3         -0.5       0.5           -0.5          -0.5        0.5       0.5   
4         -0.5       0.5           -0.5          -0.5        0.5       0.5   
...        ...       ...            ...           ...        ...       ...   
238325    -0.5      -0.5           -0.5          -0.5        0.5      -0.5   
238326    -0.5      -0.5           -0.5          -0.5        0.5      -0.5   
238327    -0.5      -0.5           -0.5          -0.5        0.5      -0.5   
238328    -0.5      -0.5           -0.5          -0.5        0.5      -0.5   
238329    -0.5      -0.5           -0.5          -0.5        0.5      -0.5   

             age  Weight_kg       GCS        HR  ...       BUN 

In [25]:
### The main loop to generate all possible models

num_rows = id_count  # Total Number of Patients to divy data up
testing_flag = 1     # The random number we use to identify a patient used for testing

# TODO: Change this to 1 in MDP_COUNT
#for model in range(1, 2): #mdp_count):
train_ids = []       # A list containing all training ids from the icu_ids list
test_ids =[]         # A list containing all testing ids from the icu_ids list

# We want approximate 20% test, 80% train, so we random numbers 1-5
# 1s Represent data points that will be used to test, 2-5 will be used to train
group_ids = pd.DataFrame([int(np.floor(5 * np.random.random() + 1)) for i in range(1, id_count + 1)])
icu_pair_set = pd.concat([pd.DataFrame(icu_ids), group_ids], axis=1, sort=False)
icu_pair_set.columns = ['id', 'fil_val']
train_ids =  icu_pair_set[icu_pair_set['fil_val'] != testing_flag]
test_ids = icu_pair_set[icu_pair_set['fil_val'] == testing_flag]

# We want to insure that the testing patients + training patients = total patients
if (train_ids['id'].size + test_ids['id'].size) != id_count:
    print("The testing and training set do not add up to the total set")
    exit()

# Percentage for testing should be about 20%, Training about 80%
print("Testing Percentage: " + str((test_ids['id'].size / id_count)))
print("Training Percentage: " + str((train_ids['id'].size / id_count)))

# After grabbing all the IDs, we want to flag all the rows that are train or test
train_flag = patientdata['icustayid'].isin(train_ids['id'])
test_flag = patientdata['icustayid'].isin(test_ids['id'])

# Validating that all data is being selected
if(patientdata.size != train_set.size + test_set.size):
    print("Not all rows were grabbed properly, there is something wrong with the split")
    exit()

Testing Percentage: 0.2037925732656199
Training Percentage: 0.79620742673438


In [34]:
# Seperate the Z-Scores for the training set and the testing set
train_zscores = MIMIC_zscores[train_flag]
test_zscores = MIMIC_zscores[test_flag]

# Validate all data is selected
if(train_zscores.size + test_zscores.size != MIMIC_zscores.size):
    print("The Z-Scores are all evenly distributed")
    exit()

    
# The blocs of relevance in order based on the train and test set
# These will be used to build relevant data frames later down
train_blocs = patientdata[train_flag]['bloc']
test_blocs = patientdata[test_flag]['bloc']

# Doing the same with the patient ids
train_id_list = patientdata[train_flag]['icustayid']
test_id_list = patientdata[test_flag]['icustayid']

# Grabbing the boolean values for the patients who died within 90 days in the training set
train_90d = patientdata[train_flag]['mortality_90d']

In [None]:

# The initial MDP matrix
# We need the values of weights that determines how much the model
# prefers transitioning from one state (medical conditional), to another
# The Matrix must be in the form [[S1][S2][A]] Where S1 is initial state, S2 is the second state, and
# A is the action taken to get from S1 to S2. 
transitions = [[][][]]

# We need to determine the reward value for predicting an outcome leading to survival (+)
# and a penalty for an outcome that will yield death (-)
# The Matrix must be in the form [[S1][S2][R]] Where S1 is initial state, S2 is the second state, and
# R is the reward for taking the action from S1 to S2. 
reward = [[][][]]

# We need to determine the discount value to influence the model to continue changing
# when outcomes are not desired, This value should be kept in the range 0 < discount < 1
discount = 1

# The Q-Learning algorithm will run a fixed number of times
numOfIterations = 10000

# We need to determine whether or not we waant to validate that the transitions and rewards matrix
# to make sure they are valid, this option will only be turned off for speed
scheck = False
