Files can be downloaded at https://www.cdc.gov/nchs/nhis/data-questionnaires-documentation.htm

After downloading data, follow the code instructions at https://github.com/sookyojeong/worst-ate 
to clean the data and put in in `data/`. 

In [1]:
import os 
import torch 
import numpy as np 
from process_nhis import preprocess_nhis, get_nhis_cluster_dict
import pandas as pd

os.chdir('../..')

from notebooks.meager_nhis.cluster_env import ClusterEnv
from aexgym.model import fixedPersonalizedModel
from scripts.setup_script import make_uniform_prior
from aexgym.env import BaseContextualEnv
from aexgym.agent import LinearUniform, LinearTS, LinearUCB, LinearEI
from aexgym.objectives import contextual_simple_regret

        

In [2]:
DATAPATH = 'data/nhis2017.csv'

df = pd.read_csv(DATAPATH)
df = preprocess_nhis(df)
nhis_dict = get_nhis_cluster_dict(df)

  df = df.applymap(lambda x: int(x) if isinstance(x, bool) else x)


In [3]:
n_steps = 32
budget = False 
no_duplicates = False 
reward_type = ['care_office_2wks']
cluster_batch_size = 100
batch_size = 1

env = ClusterEnv(
    cluster_dict = nhis_dict, 
    batch_size = batch_size, 
    cluster_batch_size = cluster_batch_size, 
    n_steps = n_steps, 
    reward_type=reward_type, 
    no_duplicates=no_duplicates,
    budget = budget)

env.reset()

((tensor([[1.]]),
  tensor([[2.1300, 2.1900, 1.5300,  ..., 0.6300, 0.0300, 0.0300],
          [1.8900, 2.6600, 1.5800,  ..., 0.6900, 0.0100, 0.0000],
          [2.0700, 2.3600, 1.5500,  ..., 0.8400, 0.0100, 0.0100],
          ...,
          [1.9800, 2.4300, 1.6100,  ..., 0.7500, 0.0000, 0.0000],
          [1.7700, 2.4700, 1.4900,  ..., 0.7100, 0.0200, 0.0200],
          [1.9200, 2.4200, 1.4000,  ..., 0.7700, 0.0100, 0.0100]],
         dtype=torch.float64),
  tensor([[1.]])),
 0)

In [4]:
n_objs = 1
scaling = 0.01
s2 = scaling*torch.ones(100, n_objs)
n_arms = env.temp_feature_list.shape[0]
beta, sigma = make_uniform_prior(2*env.temp_feature_list.shape[1], scaling, n_objs=n_objs)


model = fixedPersonalizedModel(
    beta_0 = beta, 
    sigma_0 = sigma, 
    n_arms = n_arms, 
    s2 = s2, 
    n_objs=n_objs
)

In [5]:
#initialize agents 
agent = LinearUniform(model, "Linear Uniform")
agent = LinearTS(model, "Linear TS", toptwo=False, n_samples=1, constraint=False)
#agent = LinearUCB(model, "Linear UCB", alpha=0.95) 
#agent = LinearEI(model, "Linear EI")

In [6]:
device = 'cpu'
objective = contextual_simple_regret()
num_experiments = 250
print_probs = False


    
#set seed and experiment parameters 
torch.manual_seed(0)
torch.set_printoptions(sci_mode=False) 
print_probs = False

#set objective 
objective = contextual_simple_regret()

#initialize lists
regret_list = []
percent_arms_correct_list = []

#run experiment simulation
for i in range(10000):
    env.reset()
    #print(env.mean_matrix)
    all_contexts, cur_step = env.reset()
    beta, sigma = agent.model.reset()
    #print(beta, sigma)
    beta, sigma = beta.to(device), sigma.to(device)
    beta_0, sigma_0 = beta.clone(), sigma.clone()
    
    while env.n_steps - cur_step > 0:

        #move to device 
        state_contexts, action_contexts, eval_contexts = tuple(contexts.to(device) for contexts in all_contexts)
        
        #update n_arms  
        agent.model.n_arms = action_contexts.shape[0]

        #get probabilities
        probs = agent(
            beta = beta, 
            sigma = sigma, 
            contexts = state_contexts, 
            action_contexts = action_contexts, 
            objective = objective
        )
     
        #print probabilities 
        if print_probs == True:
            print(agent.name, env.n_steps - cur_step, torch.mean(probs, dim=0))
        
        #get actions and move to new state
        actions = torch.distributions.Categorical(probs).sample()
        
        #move to next environment state 
        all_contexts, sampled_rewards, sampled_features, cur_step  = env.step(
            state_contexts = state_contexts, 
            action_contexts = action_contexts, 
            actions = actions
        )
        
        #update model state 
        beta, sigma = agent.model.update_posterior(
            beta = beta_0, 
            sigma = sigma_0, 
            rewards = sampled_rewards, 
            features = sampled_features, 
            idx = cur_step-1
        )

    #get evaluation contexts and true rewards 
    eval_contexts = env.sample_eval_contexts(access=True).to(device)
    true_eval_rewards = env.get_true_rewards()
    #calculate results from objective 
    results_dict = objective(
        fantasy_rewards = agent.fantasize(beta, eval_contexts, action_contexts).view(1, agent.model.n_arms, 1).to(device), 
        true_rewards = true_eval_rewards.view(1, agent.model.n_arms, 1).to(device)
    )
    #append results 
    percent_arms_correct_list.append(results_dict['percent_arms_correct'])
    regret_list.append(results_dict['regret'])

    #print results 
    if i % 10 == 0:
        
        print("Regret: ", np.mean(regret_list))
        print("Percent Arms Correct: ", np.mean(percent_arms_correct_list))

Regret:  0.10999999999999999
Percent Arms Correct:  0.0


KeyboardInterrupt: 