To download the data used in this simulation, download from https://www.openicpsr.org/openicpsr/project/116357/version/V1/view. 

After downloading, combine the $7$ data files into one csv and add to `data/`

In [1]:
import os 
import torch 
import numpy as np 
from process_meager import get_meager_cluster_df, get_meager_cluster_dict  

os.chdir('../..')
from notebooks.meager_nhis.cluster_env import ClusterEnv
from aexgym.model import fixedPersonalizedModel 
from scripts.setup_script import make_uniform_prior
from aexgym.env import BaseContextualEnv
from aexgym.agent import LinearUniform, LinearTS, LinearUCB, LinearEI
from aexgym.objectives import contextual_simple_regret

        

In [2]:
# make dataframe and dictionary
df = get_meager_cluster_df(DATAPATH = 'data/Meager_data.csv', cluster_type = 'district', filter_both_treat=True) 
meager_dict = get_meager_cluster_dict(df)

  df = pd.read_csv(DATAPATH)


In [3]:
print("reward types:", meager_dict[0]['response'].columns)

reward types: Index(['profit', 'expenditures', 'revenues', 'consumption'], dtype='object')


In [4]:
#setup parameters for env 
n_steps = 32
budget = False 
no_duplicates = False 
reward_type = ['profit']
cluster_batch_size = 100
batch_size = 1


#initialize env 
env = ClusterEnv(
    cluster_dict = meager_dict,
    batch_size = batch_size, 
    cluster_batch_size = cluster_batch_size,
    n_steps = n_steps, 
    reward_type=reward_type, 
    no_duplicates=no_duplicates, 
    budget=budget
)
env.reset()

((tensor([[1.]]),
  tensor([[1.0000, 1.0000, 0.2800,  ..., 0.0000, 1.0000, 0.0000],
          [1.0000, 1.0000, 0.5800,  ..., 0.0000, 1.0000, 0.0000],
          [1.0000, 1.0000, 0.2300,  ..., 0.0000, 1.0000, 0.0000],
          ...,
          [1.0000, 0.1800, 0.5200,  ..., 0.0000, 0.0000, 1.0000],
          [1.0000, 0.2200, 0.5400,  ..., 0.0000, 0.0000, 1.0000],
          [1.0000, 0.3800, 0.4700,  ..., 0.0000, 0.0000, 1.0000]],
         dtype=torch.float64),
  tensor([[1.]])),
 0)

In [5]:
#setup parameters for agent and model 
n_objs = 1
scaling = 0.01
s2 = scaling*torch.ones(100, n_objs)
n_arms = env.temp_feature_list.shape[0]
beta, sigma = make_uniform_prior(2*env.temp_feature_list.shape[1], scaling, n_objs=n_objs)


model = fixedPersonalizedModel(
    beta_0 = beta, 
    sigma_0 = sigma, 
    n_arms = n_arms, 
    s2 = s2, 
    n_objs=n_objs
)


In [8]:
#initialize agents 
agent = LinearUniform(model, "Linear Uniform")
agent = LinearTS(model, "Linear TS", toptwo=False, n_samples=1, constraint=False)
agent = LinearUCB(model, "Linear UCB", alpha=0.95) 
agent = LinearEI(model, "Linear EI")

In [9]:
device = 'cpu'
objective = contextual_simple_regret()
num_experiments = 250
print_probs = False


    
#set seed and experiment parameters 
torch.manual_seed(0)
torch.set_printoptions(sci_mode=False) 
print_probs = False

#set objective 
objective = contextual_simple_regret()

#initialize lists
regret_list = []
percent_arms_correct_list = []

#run experiment simulation
for i in range(10000):
    env.reset()
    #print(env.mean_matrix)
    all_contexts, cur_step = env.reset()
    beta, sigma = agent.model.reset()
    #print(beta, sigma)
    beta, sigma = beta.to(device), sigma.to(device)
    beta_0, sigma_0 = beta.clone(), sigma.clone()
    
    while env.n_steps - cur_step > 0:

        #move to device 
        state_contexts, action_contexts, eval_contexts = tuple(contexts.to(device) for contexts in all_contexts)
        
        #update n_arms  
        agent.model.n_arms = action_contexts.shape[0]

        #get probabilities
        probs = agent(
            beta = beta, 
            sigma = sigma, 
            contexts = state_contexts, 
            action_contexts = action_contexts, 
            objective = objective
        )
     
        #print probabilities 
        if print_probs == True:
            print(agent.name, env.n_steps - cur_step, torch.mean(probs, dim=0))
        
        #get actions and move to new state
        actions = torch.distributions.Categorical(probs).sample()
        
        #move to next environment state 
        all_contexts, sampled_rewards, sampled_features, cur_step  = env.step(
            state_contexts = state_contexts, 
            action_contexts = action_contexts, 
            actions = actions
        )
        
        #update model state 
        beta, sigma = agent.model.update_posterior(
            beta = beta_0, 
            sigma = sigma_0, 
            rewards = sampled_rewards, 
            features = sampled_features, 
            idx = cur_step-1
        )

    #get evaluation contexts and true rewards 
    eval_contexts = env.sample_eval_contexts(access=True).to(device)
    true_eval_rewards = env.get_true_rewards()
    #calculate results from objective 
    results_dict = objective(
        fantasy_rewards = agent.fantasize(beta, eval_contexts, action_contexts).view(1, agent.model.n_arms, 1).to(device), 
        true_rewards = true_eval_rewards.view(1, agent.model.n_arms, 1).to(device)
    )
    #append results 
    percent_arms_correct_list.append(results_dict['percent_arms_correct'])
    regret_list.append(results_dict['regret'])

    #print results 
    if i % 10 == 0:
        
        print("Regret: ", np.mean(regret_list))
        print("Percent Arms Correct: ", np.mean(percent_arms_correct_list))

Regret:  173.35816574402742
Percent Arms Correct:  0.0
Regret:  161.45999528650145
Percent Arms Correct:  0.0
Regret:  183.89542680462355
Percent Arms Correct:  0.0
Regret:  186.31789945757376
Percent Arms Correct:  0.03225806451612903


KeyboardInterrupt: 