Download Instructions: 

Download the asos_digital_experiments_dataset.csv from https://osf.io/64jsb/ and put the file in data/

In [1]:
import numpy as np
import os
import torch 
import pandas as pd

os.chdir("../..")

from aexgym.model import TreatmentLinearModel, TreatmentPersonalModel
from aexgym.agent import LinearTS, DeconfoundedTS, LinearUniform
from aexgym.objectives import contextual_best_arm, contextual_simple_regret, constraint_best_arm
from scripts.setup_script import make_uniform_prior
from notebooks.asos.make_asos_env import make_matrices, ASOS

In [2]:
asos_path = 'data/asos_digital_experiments_dataset.csv'
# Load the ASOS dataset
asos_df = pd.read_csv(asos_path)

In [3]:
#parameters 
n_days = 10
n_arms = 10
context_len = n_days 
n_steps = n_days 
batch_size = 100 
metric_id_list = [2]
n_objs = len(metric_id_list)
exp_id = '036afc' 


env = ASOS(
    asos_df = asos_df, 
    context_len = n_days, 
    batch_size = batch_size, 
    n_steps = n_steps, 
    n_arms = n_arms, 
    seed = 0, 
    exp_id = exp_id, 
    metric_id_list = metric_id_list, 
    subtract=True, 
    demean=False
)

#print means of asos env 
env.reset()
print(env.mean_matrix[:,:,0])

tensor([[0.0319, 0.0313, 0.0308, 0.0316, 0.0313, 0.0305, 0.0307, 0.0325, 0.0313,
         0.0320],
        [0.0156, 0.0160, 0.0163, 0.0158, 0.0160, 0.0164, 0.0163, 0.0153, 0.0160,
         0.0156],
        [0.0282, 0.0271, 0.0264, 0.0278, 0.0272, 0.0259, 0.0262, 0.0292, 0.0272,
         0.0283],
        [0.0160, 0.0150, 0.0142, 0.0156, 0.0150, 0.0137, 0.0141, 0.0171, 0.0151,
         0.0162],
        [0.0212, 0.0217, 0.0221, 0.0214, 0.0217, 0.0224, 0.0222, 0.0206, 0.0217,
         0.0211],
        [0.0071, 0.0065, 0.0060, 0.0069, 0.0065, 0.0057, 0.0060, 0.0077, 0.0065,
         0.0072],
        [0.0133, 0.0125, 0.0118, 0.0129, 0.0125, 0.0114, 0.0117, 0.0141, 0.0125,
         0.0134],
        [0.0044, 0.0046, 0.0047, 0.0045, 0.0046, 0.0048, 0.0047, 0.0042, 0.0046,
         0.0044],
        [0.0139, 0.0138, 0.0138, 0.0138, 0.0138, 0.0138, 0.0138, 0.0139, 0.0138,
         0.0139],
        [0.0165, 0.0165, 0.0164, 0.0165, 0.0165, 0.0164, 0.0164, 0.0165, 0.0165,
         0.0165]])


In [4]:
#make agent models 
s2 = torch.mean(env.var_matrix, dim=1)
scaling = 1 / batch_size 
beta, sigma = make_uniform_prior(context_len + n_arms, scaling, n_objs)
beta_mix, sigma_mix = make_uniform_prior(context_len + n_arms + context_len*n_arms, scaling, n_objs)
model = TreatmentLinearModel(
    beta_0 = beta, 
    sigma_0 = sigma, 
    n_arms = n_arms, 
    s2 = s2,
    n_objs = n_objs
)

mix_model = TreatmentPersonalModel(
    beta_0 = beta_mix, 
    sigma_0 = sigma_mix, 
    n_arms = n_arms, 
    s2 = s2, 
    n_objs = n_objs
)

In [7]:
#agent = LinearUniform(model, "Linear Uniform", n_samples = 10000)
#agent = DeconfoundedTS(mix_model, "Linear TS", toptwo=False, n_samples = 1)
agent = LinearTS(model, "Linear TS", toptwo=False, n_samples = 1)


In [8]:
device = "cpu"
print_probs = False
objective = contextual_best_arm()
torch.set_printoptions(sci_mode=False)
regret_list = []
percent_arms_correct_list = []



for i in range(10000):
    env.reset()
    #print(env.mean_matrix)
    all_contexts, cur_step = env.reset()
    beta, sigma = agent.model.reset()
    #print(beta, sigma)
    beta, sigma = beta.to(device), sigma.to(device)
    beta_0, sigma_0 = beta.clone(), sigma.clone()
    
    while env.n_steps - cur_step > 0:

        #move to device 
        state_contexts, action_contexts, eval_contexts = tuple(contexts.to(device) for contexts in all_contexts)
        
        #get batch size 
        batch = state_contexts.shape[0]
        
        #train agent 
        agent.train_agent(
            beta, 
            sigma, 
            cur_step, 
            env.n_steps, 
            env.sample_train_contexts, 
            eval_contexts, 
            batch, 
            objective=objective)
        #get probabilities
        probs = agent(
            beta = beta, 
            sigma = sigma, 
            contexts = state_contexts, 
            action_contexts = action_contexts, 
            objective = objective
        )
     
        #print probabilities 
        if print_probs == True:
            print(agent.name, env.n_steps - cur_step, torch.mean(probs, dim=0))
        
        #get actions and move to new state
        actions = torch.distributions.Categorical(probs).sample()
        
        #move to next environment state 
        all_contexts, sampled_rewards, sampled_features, cur_step  = env.step(
            state_contexts = state_contexts, 
            action_contexts = action_contexts, 
            actions = actions
        )
        
        #update model state 
        beta, sigma = agent.model.update_posterior(
            beta = beta_0, 
            sigma = sigma_0, 
            rewards = sampled_rewards, 
            features = agent.model.feature_map(actions, state_contexts, action_contexts), 
            idx = cur_step-1
        )

    #get evaluation contexts and true rewards 
    eval_contexts = env.sample_eval_contexts(access=True).to(device)
    true_eval_rewards = env.get_true_rewards(eval_contexts, action_contexts)
    
    #calculate results from objective 
    results_dict = objective(
        fantasy_rewards = agent.fantasize(beta, eval_contexts, action_contexts).to(device), 
        true_rewards = true_eval_rewards.to(device)
    )
    
    #append results 
    percent_arms_correct_list.append(results_dict['percent_arms_correct'])
    regret_list.append(results_dict['regret'])

    #print results 
    if i % 10 == 0:
        
        print("Regret: ", np.mean(regret_list))
        print("Percent Arms Correct: ", np.mean(percent_arms_correct_list))

Regret:  0.0007585827261209488
Percent Arms Correct:  0.0
Regret:  0.00045145049013874746
Percent Arms Correct:  0.0
Regret:  0.000413104004803158
Percent Arms Correct:  0.0
Regret:  0.0004295292761056654
Percent Arms Correct:  0.0
Regret:  0.00046028487566040783
Percent Arms Correct:  0.0
Regret:  0.0004630755706160676
Percent Arms Correct:  0.0
Regret:  0.0004607515684405311
Percent Arms Correct:  0.01639344262295082
Regret:  0.00045960065974316126
Percent Arms Correct:  0.014084507042253521
Regret:  0.0004566308156943616
Percent Arms Correct:  0.024691358024691357
Regret:  0.0004395695792122202
Percent Arms Correct:  0.054945054945054944
Regret:  0.0004453006394133709
Percent Arms Correct:  0.0594059405940594
Regret:  0.000450544730500058
Percent Arms Correct:  0.05405405405405406


KeyboardInterrupt: 