# Setup Model

In [25]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from UtilityMethods import utils
import sys
import pickle
import time
import pulp as p
import math
from copy import copy
from tqdm import tqdm

## State space and action space

In [1]:
# state space, actions available in each state vary with state

N_STATES = 7

# build the action space for each state
ACTIONS_PER_STATE = {}
for s in range(N_STATES):
    ACTIONS_PER_STATE[s] = []
    for a in range(N_STATES-s):
        ACTIONS_PER_STATE[s].append(a) 
print('Actions for State 0:', ACTIONS_PER_STATE[0])
print('Actions for State 1:', ACTIONS_PER_STATE[1])
print('Actions for State 6:', ACTIONS_PER_STATE[6])

Actions for State 0: [0, 1, 2, 3, 4, 5, 6]
Actions for State 1: [0, 1, 2, 3, 4, 5]
Actions for State 6: [0]


## Calculate empirical estimates of P

In [33]:
# add the state and action code columns
action_code = []
state_code = []
sbp_discrete_merged = []
hba1c_discrete_merged = []
for i in range(len(df)):
    row = df.iloc[i]
    s_code = ''
    a_code = ''
    for state_fea in state_features:
        code = str(row[state_fea])

        # merge 3 with 2 for sbp_discrete and TC_discrete
        if state_fea == 'sbp_discrete':
            code = sbp_discrete_code_dict[code]
            sbp_discrete_merged.append(int(code))
        elif state_fea == 'hba1c_discrete':
            code = hba1c_discrete_code_dict[code]
            hba1c_discrete_merged.append(int(code))
        elif state_fea == 'TC_discrete':
            code = TC_discrete_code_dict[code]
        elif state_fea == 'hdl_discrete':
            code = hdl_discrete_code_dict[code]
        else:
            raise ValueError('state_fea not recognized')
            exit(1)       
        
        s_code += code
    
    for action_fea in action_features:
        a_code += str(row[action_fea])
    
    action_code.append(a_code)
    state_code.append(s_code)

assert len(hba1c_discrete_merged) == len(df)

df['sbp_discrete_merged'] = sbp_discrete_merged
df['hba1c_discrete_merged'] = hba1c_discrete_merged
df['action_code'] = action_code
df['state_code'] = state_code
print('Finished adding action_code and state_code columns')

# DATA_MERGED = DATA[:-4] + '_merged.csv'
# # write the merged data to file
# df.to_csv(DATA_MERGED, index=False)

Finished adding action_code and state_code columns


In [35]:
#------------- calculate the empirical estimate of P based on entire dataset ----------------
        
count_s_a = {} # count the number of times state s and action a appear in the dataset, sparse format
count_s_a_d = {} # count the number of times state s, action a, and next state s' appear in the dataset
visit_number = [] # number of visits for each patient

# loop through each patient in the dataset
for i in tqdm(range(100001, 110252)):
    df_patient = df[df['MaskID'] == i]

    if len(df_patient) > 0:
        visit_number.append(len(df_patient))

    # loop through each visit of the patient
    for j in range(len(df_patient)-1): # loop before last visit
        row = df_patient.iloc[j]
        s_code = row['state_code']
        a_code = row['action_code']
        ns_code = df_patient.iloc[j+1]['state_code']

        # convert from code to index
        s = state_code_to_index[s_code]
        a = action_code_to_index[a_code]
        s_ = state_code_to_index[ns_code]

        if (s, a) not in count_s_a:
            count_s_a[(s, a)] = 1
        else:
            count_s_a[(s, a)] += 1

        if (s, a, s_) not in count_s_a_d:
            count_s_a_d[(s, a, s_)] = 1
        else:
            count_s_a_d[(s, a, s_)] += 1

print('len(visit_number) =', len(visit_number))
print('averge visit_number =', sum(visit_number)/len(visit_number))

print('len(count_s_a) =', len(count_s_a))
print('len(count_s_a_d) =', len(count_s_a_d))
print('Finished counting by looping through the dataset')

100%|██████████| 10251/10251 [00:44<00:00, 231.50it/s]

len(visit_number) = 3595
averge visit_number = 38.666203059805284
len(count_s_a) = 64
len(count_s_a_d) = 256
Finished counting by looping through the dataset





### Save the model settings

In [44]:
EPISODE_LENGTH = 20 # average number of visits per patient
# CONSTRAINT_list = [16, 10, 10] # deviation * 20 visits 
# C_b_list = [8, 5, 5]  # change this if you want different baseline policy.

# CONSTRAINT_list = [166, 160, 160,
#                    166, 160, 160,
#                    166, 160, 160,]  

CONSTRAINT1_list = [220] * N_STATES # deviation * 20 visits
C1_b_list = [40] * N_STATES # constraint for baseline policy

CONSTRAINT2_list = [16] * N_STATES # deviation * 20 visits 
C2_b_list = [8] * N_STATES  # constraint for baseline policy


delta = 0.01 # bound

In [45]:
# dump the model settings and parameters to a pickle file
CONTEXT_VEC_LENGTH = len(context_fea)
ACTION_CODE_LENGTH = len(action_index_to_code[0])
print('CONTEXT_VEC_LENGTH =', CONTEXT_VEC_LENGTH)
print('ACTION_CODE_LENGTH =', ACTION_CODE_LENGTH)

with open('output_final/model_contextual_BPBG.pkl', 'wb') as f:
    pickle.dump([P, CONTEXT_VEC_LENGTH, ACTION_CODE_LENGTH, CONTEXT_VECTOR_dict, INIT_STATE_INDEX, INIT_STATES_LIST,
                CONSTRAINT1_list, C1_b_list, N_STATES, N_ACTIONS, ACTIONS_PER_STATE, EPISODE_LENGTH, delta], f)

CONTEXT_VEC_LENGTH = 9
ACTION_CODE_LENGTH = 4
