# Setup Model

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from UtilityMethods import utils
import sys
#import gym
import pickle
import time
import pulp as p
import math
from copy import copy
import pprint as pp
import itertools
from tqdm import tqdm

## Global variables

In [2]:
# Global variables

IS_VISIT_DEPENDENT = False # whether the above empirical estimates are visit-dependent or not
DATA = '../../../../Codes/Accord/data/ACCORD_BPClass_v2.csv'

EPISODE_LENGTH = 40 # average number of visits per patient
CONSTRAINT = EPISODE_LENGTH/2
C_b = CONSTRAINT/5  #Change this if you want different baseline policy. here is 0.2C

NUMBER_EPISODES = 1e4
NUMBER_SIMULATIONS = 1

delta = 0.01 # bound
EPS = 0.01 # not used
M = 0 # not used

## State space and action space

In [15]:
# state space, actions available in each state are always the same
"""
state_features = ['sbp_discrete','hba1c_discrete','TC_discrete','hdl_discrete','BMI_discrete'] 
fea1 = ['0', '1', '2', '3'] # possible values for sbp_discrete
fea2 = ['0', '1', '2', '3', '4', '5', '6', '7']
fea3 = ['0', '1', '2', '3']
fea4 = ['0', '1', '2', '3']
fea5 = ['0', '1', '2', '3']
"""

state_features = ['sbp_discrete','hba1c_discrete','TC_discrete','hdl_discrete'] 
fea1 = ['0', '1', '2'] # possible values for sbp_discrete, merge 3 with 2
fea2 = ['0', '1', '2', '3', '4', '5', '6', '7']
fea3 = ['0', '1', '2'] # possible values for TC_discrete, merge 3 with 2
fea4 = ['0', '1', '2', '3']

combinations = itertools.product(fea1, fea2, fea3, fea4)
states = [''.join(i) for i in combinations]
print('len(states) =', len(states))
print(states[-5:])

N_STATES = len(states) # number of states = 2048
state_code_to_index = {code: i for i, code in enumerate(states)}
# print the first 5 state_code_to_index
for i in range(5):
    print(states[i], state_code_to_index[states[i]])
print()

# action space, 000000000 means bpclass_none, 111111111 means all bpmed class are precribed
"""
action_features = ['Diur', 'ACE', 'Beta-blocker', 'CCB', 'ARB', 
                   'Alpha-Beta-blocker', 'Alpha-blocker', 'Sympath', 'Vasod'] # we donot include 'bpclass_none' as a action, because 000000000 means bpclass_none
"""

action_features = ['Diur', 'ACE', 'Beta-blocker', 'CCB', 'ARB'] 

combinations = list(itertools.product('01', repeat=len(action_features)))
actions = [''.join(i) for i in combinations]
print('len(actions) =', len(actions))
N_ACTIONS = len(actions) # number of actions = 512
action_code_to_index = {code: i for i, code in enumerate(actions)}
# print the first 5 action_code_to_index
for i in range(5):
    print(actions[i], action_code_to_index[actions[i]])

len(states) = 288
['2713', '2720', '2721', '2722', '2723']
0000 0
0001 1
0002 2
0003 3
0010 4

len(actions) = 32
00000 0
00001 1
00010 2
00011 3
00100 4


## Calculate empirical estimates of P, R, C

In [16]:
df = pd.read_csv(DATA)

# add the state and action code columns
action_code = []
state_code = []
for i in range(len(df)):
    row = df.iloc[i]
    s_code = ''
    a_code = ''
    for state_fea in state_features:
        code = str(row[state_fea])

        # merge 3 with 2 for sbp_discrete and TC_discrete
        if state_fea == 'sbp_discrete' and code == '3':
            code = '2'
        if state_fea == 'TC_discrete' and code == '3':
            code = '2'

        s_code += code
    
    for action_fea in action_features:
        a_code += str(row[action_fea])
    
    action_code.append(a_code)
    state_code.append(s_code)

df['action_code'] = action_code
df['state_code'] = state_code
print('Finished adding action_code and state_code columns')

Finished adding action_code and state_code columns


In [17]:
#------------- calculate the empirical estimate of P, R, C
        
count_s_a = {} # count the number of times state s and action a appear in the dataset, sparse format
count_s_a_d = {} # count the number of times state s, action a, and next state s' appear in the dataset
sum_r_s_a = {} # sum of the reward of state s and action a
sum_c_s_a = {} # sum of the cost of state s and action a
visit_number = [] # number of visits for each patient

# loop through each patient in the dataset
for i in tqdm(range(100001, 110252)):
    df_patient = df[df['MaskID'] == i]

    if len(df_patient) > 0:
        visit_number.append(len(df_patient))

    # loop through each visit of the patient
    for j in range(len(df_patient)-1): # loop before last visit
        row = df_patient.iloc[j]
        s_code = row['state_code']
        a_code = row['action_code']
        ns_code = df_patient.iloc[j+1]['state_code']

        # convert from code to index
        s = state_code_to_index[s_code]
        a = action_code_to_index[a_code]
        s_ = state_code_to_index[ns_code]

        r = df_patient.iloc[j]['CVDRisk_feedback']
        c = df_patient.iloc[j]['sbp_feedback']

        if (s, a) not in count_s_a:
            count_s_a[(s, a)] = 1
            sum_r_s_a[(s, a)] = r 
            sum_c_s_a[(s, a)] = c
        else:
            count_s_a[(s, a)] += 1
            sum_r_s_a[(s, a)] += r
            sum_c_s_a[(s, a)] += c

        if (s, a, s_) not in count_s_a_d:
            count_s_a_d[(s, a, s_)] = 1
        else:
            count_s_a_d[(s, a, s_)] += 1

print('len(visit_number) =', len(visit_number))
print('averge visit_number =', sum(visit_number)/len(visit_number))

print('len(count_s_a) =', len(count_s_a))
print('len(count_s_a_d) =', len(count_s_a_d))
print('Finished counting by looping through the dataset')

100%|██████████| 10251/10251 [01:37<00:00, 105.15it/s]

len(visit_number) = 4368
averge visit_number = 40.500457875457876
len(count_s_a) = 7115
len(count_s_a_d) = 41003
Finished counting by looping through the dataset





* Before reducing state and action space: results above shows that there were 25,658 state-action pairs and 65,912 state-action-nextstate pairs appeared in the dataset.
* After: 7,115 state-action pairs, and 41,003 state-action-nextstate pairs.

In [18]:
# calculate the empirical estimate of P, R, C using counts

# initialize R, C, P, using sparse matrix format
R = {} # N_STATES * N_ACTIONS, dictionary of reward matrices, this is the CVDRisk empirical estimate based on entire dataset
C = {} # N_STATES * N_ACTIONS, dictionary of cost matrices, this is SBP empirical estimate based on entire dataset
P = {} # N_STATES * N_ACTIONS * N_STATES, dictionary of transition probability matrices, based on the entire dataset

# for s in range(N_STATES):
#     l = len(actions)
#     R[s] = np.zeros(l)
#     C[s] = np.zeros(l)
#     P[s] = {}    
#     for a in range(N_ACTIONS):
#         C[s][a] = 0
#         P[s][a] = np.zeros(N_STATES)
#         R[s][a] = 0
# print('Finished initializing R, C, P')

for (s,a) in count_s_a: # only calculate for the states and actions that appearedin the dataset, for efficiency

    if s not in R:
        R[s] = {}
        C[s] = {}
        P[s] = {}
    
    if a not in R[s]:
        R[s][a] = 0
        C[s][a] = 0
        P[s][a] = {}

    R[s][a] = sum_r_s_a[(s, a)]/max(count_s_a[(s, a)],1)
    C[s][a] = sum_c_s_a[(s, a)]/max(count_s_a[(s, a)],1)

for (s, a, s_) in count_s_a_d:
    P[s][a][s_] = count_s_a_d[(s, a, s_)]/max(count_s_a[(s, a)],1)

print('Finished calculating the empirical estimate of P, R, C')

#------------- check the sparsity of P, R, C
print('\nSparsity of P, R, C:')
print('P: {:.6f}% are non-zeros'.format(len(count_s_a_d)*100/(N_STATES*N_ACTIONS*N_STATES)))
print('R: {:.6f}% are non-zeros'.format(len(sum_r_s_a)*100/(N_STATES*N_ACTIONS)))
print('C: {:.6f}% are non-zeros'.format(len(sum_c_s_a)*100/(N_STATES*N_ACTIONS)))

Finished calculating the empirical estimate of P, R, C

Sparsity of P, R, C:
P: 1.544830% are non-zeros
R: 77.202691% are non-zeros
C: 77.202691% are non-zeros


In [19]:
# normalize R and C to the range [0, 1]

max_R = max([R[s][a] for s in R for a in R[s]])
max_C = max([C[s][a] for s in C for a in C[s]])

print('max_R =', max_R)
print('max_C =', max_C)

for s in R:
    for a in R[s]:
        R[s][a] = (R[s][a])/(max_R)
        C[s][a] = (C[s][a])/(max_C)

max_R = 0.7312023177749944
max_C = 189.0


## Compute solution.pkl and baseline.pkl files

In [None]:
util_methods_1 = utils(EPS, delta, M, P, R, C, EPISODE_LENGTH, N_STATES, actions, CONSTRAINT, C_b)

opt_policy_con, opt_value_LP_con, opt_cost_LP_con, opt_q_con = util_methods_1.compute_opt_LP_Constrained(0) # constrained MDP
opt_policy_uncon, opt_value_LP_uncon, opt_cost_LP_uncon, opt_q_uncon = util_methods_1.compute_opt_LP_Unconstrained(0) # unconstrained = standard MDP, not used in DOPE
with open('solution.pckl', 'wb') as f:
    pickle.dump([opt_policy_con, opt_value_LP_con, opt_cost_LP_con, opt_q_con, 
                 opt_policy_uncon, opt_value_LP_uncon, opt_cost_LP_uncon, opt_q_uncon], f)

In [None]:
util_methods_1 = utils(EPS, delta, M, P,R,C,EPISODE_LENGTH,N_STATES,actions,C_b,C_b)
policy_b, value_b, cost_b, q_b = util_methods_1.compute_opt_LP_Constrained(0)
with open('base.pckl', 'wb') as f:
    pickle.dump([policy_b, value_b, cost_b, q_b], f)

In [None]:
with open('model.pckl', 'wb') as f:
    pickle.dump([NUMBER_SIMULATIONS, NUMBER_EPISODES, P, R, C, 
                CONSTRAINT, N_STATES, actions, EPISODE_LENGTH, delta], f)

In [None]:
print('\n*******')
print("opt_value_LP_uncon[0, 0] =",opt_value_LP_uncon[0, 0])
print("opt_value_LP_con[0, 0] =",opt_value_LP_con[0, 0])
print("value_b[0, 0] =",value_b[0, 0])

## Other

In [14]:
# check the frequency counts of hba1c_discrete in the dataset

def check_frequency(df, col_name):
    df = df[col_name]
    df = df.value_counts()
    print(df)
    print()

# 'sbp_discrete','hba1c_discrete','TC_discrete','hdl_discrete','BMI_discrete'
check_frequency(df, 'sbp_discrete')
check_frequency(df, 'hba1c_discrete')
check_frequency(df, 'TC_discrete')
check_frequency(df, 'hdl_discrete')
check_frequency(df, 'BMI_discrete')

1    80866
0    64732
2    26865
3     4443
Name: sbp_discrete, dtype: int64

1    34956
2    34414
3    32317
4    24291
0    17289
5    15053
7    10157
6     8429
Name: hba1c_discrete, dtype: int64

1    69154
0    49985
2    39101
3    18666
Name: TC_discrete, dtype: int64

0    58248
1    54122
2    38232
3    26304
Name: hdl_discrete, dtype: int64

3    113017
2     48196
1     15594
0        99
Name: BMI_discrete, dtype: int64



Frequency counts above show that:
1. we can merge the sbp_discrete 3 with 2. 
2. hba1c_discrete no change
3. TC_discrete: merge 3 with 2
4. hdl_discrete: no change
5. remove BMI_discrete since most are in 3