Data Download Instructions: 

Download zip file from https://www.upjohn.org/data-tools/employment-research-data-center/pennsylvania-reemployment-bonus-demonstration. 

After downloading zip file, turn the recsfile.dta file into a csv file, and put it under data/ 

In [1]:
import numpy as np
import os
import torch 
import pandas as pd

os.chdir("../..")

from aexgym.model import PersonalizedLinearModel
from aexgym.agent import LinearTS, LinearUniform, LinearUCB
from aexgym.objectives import contextual_best_arm, contextual_simple_regret
from notebooks.pennui.penn_env import PennEnv
from notebooks.pennui.process_penn import process_penn
from scripts.setup_script import make_uniform_prior

In [2]:
PATH = "data/penn.csv"
df = process_penn(PATH, outcome=1, linear_impute=True)



In [3]:
n_days = 6
n_arms = 7
context_len = 75
n_steps = n_days 
batch_size = 100
s2 = 0.1 * torch.ones((n_days, 1))

if torch.cuda.is_available():
    device = 'cuda:0'
else:
    device = 'cpu'
print(device)

cpu


In [4]:
#personalization 

#initialize MDPs
n_objs = 1
scaling = 1 / (batch_size*10)
pers_beta, pers_sigma = make_uniform_prior(context_len*n_arms, scaling, n_objs=n_objs)

model = PersonalizedLinearModel(
    beta_0 = pers_beta, 
    sigma_0 = pers_sigma, 
    n_arms = n_arms, 
    s2 = s2, 
    n_objs = n_objs
)
env = PennEnv(df, batch_size, n_days)

In [5]:
agent = LinearUniform(model, "Linear Uniform")
agent = LinearTS(model, "Linear TS", toptwo=False, n_samples = 1)
#agent = LinearTS(model, "Linear TS toptwo", toptwo=True, n_samples = 100)
#agent = LinearUCB(model, "Linear UCB", alpha = 0.95)


In [6]:
#set seed and experiment parameters 
torch.manual_seed(0)
torch.set_printoptions(sci_mode=False) 
print_probs = False

#set objective 
objective = contextual_simple_regret()

#initialize lists
regret_list = []
percent_arms_correct_list = []

#run experiment simulation
for i in range(10000):
    env.reset()
    #print(env.mean_matrix)
    all_contexts, cur_step = env.reset()
    beta, sigma = agent.model.reset()
    #print(beta, sigma)
    beta, sigma = beta.to(device), sigma.to(device)
    beta_0, sigma_0 = beta.clone(), sigma.clone()
    
    while env.n_steps - cur_step > 0:

        #move to device 
        state_contexts, action_contexts, eval_contexts = tuple(contexts.to(device) for contexts in all_contexts)
        
        #get batch size 
        batch = state_contexts.shape[0]

        #get probabilities
        probs = agent(
            beta = beta, 
            sigma = sigma, 
            contexts = state_contexts, 
            action_contexts = action_contexts, 
            objective = objective
        )
     
        #print probabilities 
        if print_probs == True:
            print(agent.name, env.n_steps - cur_step, torch.mean(probs, dim=0))
        
        #get actions and move to new state
        actions = torch.distributions.Categorical(probs).sample()
        
        #move to next environment state 
        all_contexts, sampled_rewards, sampled_features, cur_step  = env.step(
            state_contexts = state_contexts, 
            action_contexts = action_contexts, 
            actions = actions
        )
        
        #update model state 
        beta, sigma = agent.model.update_posterior(
            beta = beta_0, 
            sigma = sigma_0, 
            rewards = sampled_rewards, 
            features = agent.model.feature_map(actions, state_contexts, action_contexts), 
            idx = cur_step-1
        )

    #get evaluation contexts and true rewards 
    eval_contexts = env.sample_eval_contexts(access=True).to(device)
    true_eval_rewards = env.get_true_rewards(eval_contexts, action_contexts)
    
    #calculate results from objective 
    results_dict = objective(
        fantasy_rewards = agent.fantasize(beta, eval_contexts, action_contexts).to(device), 
        true_rewards = true_eval_rewards.to(device)
    )
    
    #append results 
    percent_arms_correct_list.append(results_dict['percent_arms_correct'])
    regret_list.append(results_dict['regret'])

    #print results 
    if i % 10 == 0:
        
        print("Regret: ", np.mean(regret_list))
        print("Percent Arms Correct: ", np.mean(percent_arms_correct_list))

Regret:  2.276535749435425
Percent Arms Correct:  0.19
Regret:  2.7170725519006904
Percent Arms Correct:  0.16818181818181818
Regret:  2.8555057843526206
Percent Arms Correct:  0.15476190476190477
Regret:  2.8637644014050885
Percent Arms Correct:  0.15129032258064515
Regret:  2.8795377277746432
Percent Arms Correct:  0.15317073170731707
Regret:  2.8570394399119357
Percent Arms Correct:  0.1523529411764706
Regret:  2.847397071416261
Percent Arms Correct:  0.15311475409836064
Regret:  2.868101672387459
Percent Arms Correct:  0.1529577464788732


KeyboardInterrupt: 