# Semi Synthetic Experiments

Analyze the performance of various algorithms to solve the joint matching + activity task, when the number of volunteers is large and structured

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import random 
import matplotlib.pyplot as plt
import json 
import argparse 
import sys
import secrets

In [144]:
from rmab.simulator import RMABSimulator, generate_random_context
from rmab.omniscient_policies import *
from rmab.mcts_policies import full_mcts_policy
from rmab.fr_dynamics import get_all_transitions
from rmab.utils import get_save_path, delete_duplicate_results

In [118]:
is_jupyter = 'ipykernel' in sys.modules

In [208]:
if is_jupyter: 
    seed        = 42
    n_arms      = 2
    volunteers_per_arm = 5
    budget      = 2
    discount    = 0.9
    alpha       = 3 
    n_episodes  = 30 
    episode_len = 20 
    n_epochs    = 10
    save_with_date = False 
    TIME_PER_RUN = 0.01 * 1000
    lamb = 0/(n_arms*volunteers_per_arm)
    prob_distro = 'normal'
    context_dim = 2
else:
    parser = argparse.ArgumentParser()
    parser.add_argument('--n_arms',         '-N', help='num beneficiaries (arms)', type=int, default=2)
    parser.add_argument('--volunteers_per_arm',         '-V', help='volunteers per arm', type=int, default=5)
    parser.add_argument('--episode_len',    '-H', help='episode length', type=int, default=20)
    parser.add_argument('--n_episodes',     '-T', help='num episodes', type=int, default=30)
    parser.add_argument('--budget',         '-B', help='budget', type=int, default=3)
    parser.add_argument('--n_epochs',       '-E', help='number of epochs (num_repeats)', type=int, default=10)
    parser.add_argument('--discount',       '-d', help='discount factor', type=float, default=0.9)
    parser.add_argument('--alpha',          '-a', help='alpha: for conf radius', type=float, default=3)
    parser.add_argument('--lamb',          '-l', help='lambda for matching-engagement tradeoff', type=float, default=1)
    parser.add_argument('--seed',           '-s', help='random seed', type=int, default=42)
    parser.add_argument('--prob_distro',           '-p', help='which prob distro [uniform,uniform_small,uniform_large,normal]', type=str, default='uniform')
    parser.add_argument('--time_per_run',      '-t', help='time per MCTS run', type=float, default=.01*1000)
    parser.add_argument('--use_date', action='store_true')
    parser.add_argument('--context_dim',           '-c', help='Size of the context dim, integer', type=int, default=2)

    args = parser.parse_args()

    n_arms      = args.n_arms
    volunteers_per_arm = args.volunteers_per_arm
    budget      = args.budget
    discount    = args.discount
    alpha       = args.alpha 
    seed        = args.seed
    n_episodes  = args.n_episodes
    episode_len = args.episode_len
    n_epochs    = args.n_epochs
    lamb = args.lamb /(volunteers_per_arm*n_arms)
    save_with_date = args.use_date
    TIME_PER_RUN = args.time_per_run
    prob_distro = args.prob_distro

save_name = secrets.token_hex(4)  

In [209]:
n_states = 2
n_actions = 2

In [210]:
all_population_size = 100 # number of random arms to generate
all_transitions = get_all_transitions(all_population_size)

In [211]:
all_transitions = np.zeros((all_population_size,2,2,2))
all_transitions[:,:,1,1] = 1
all_transitions[:,:,0,0] = 1

In [212]:
random.seed(seed)
np.random.seed(seed)

In [213]:
all_features = np.arange(all_population_size)

In [214]:
match_probabilities = [generate_random_context(context_dim) for i in range(all_population_size * volunteers_per_arm)]

In [215]:
np.random.seed(seed)
random.seed(seed)
simulator = RMABSimulator(all_population_size, all_features, all_transitions,
            n_arms, volunteers_per_arm, episode_len, n_epochs, n_episodes, budget, discount,number_states=n_states, match_probability_list=match_probabilities,reward_style='match',TIME_PER_RUN=TIME_PER_RUN,contextual=True,context_dim=context_dim)

cohort [83 53]
cohort [19 21]
cohort [21 90]
cohort [52 79]
cohort [78 87]
cohort [27 47]
cohort [30 90]
cohort [73 96]
cohort [25 89]
cohort [83 95]


In [216]:
results = {}
results['parameters'] = {'seed'      : seed,
        'n_arms'    : n_arms,
        'volunteers_per_arm': volunteers_per_arm, 
        'budget'    : budget,
        'discount'  : discount, 
        'alpha'     : alpha, 
        'n_episodes': n_episodes, 
        'episode_len': episode_len, 
        'n_epochs'  : n_epochs, 
        'lamb': lamb,
        'time_per_run': TIME_PER_RUN, 
        'prob_distro': prob_distro} 

## Index Policies

In [217]:
policy = greedy_policy_contextual
name = "greedy"
greedy_reward, greedy_active_rate = run_heterogenous_policy(simulator, n_episodes, n_epochs, discount,policy,seed,lamb=lamb)
time_greedy = simulator.time_taken
print(np.mean(greedy_reward) + lamb*n_arms*volunteers_per_arm*greedy_active_rate)

results['{}_match'.format(name)] = np.mean(greedy_reward) 
results['{}_active'.format(name)] = greedy_active_rate 
results['{}_time'.format(name)] = time_greedy 

instance 0, ep 1


instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
instance 0, ep 5
instance 0, ep 6
instance 0, ep 7
instance 0, ep 8
instance 0, ep 9
instance 0, ep 10
instance 0, ep 11
instance 0, ep 12
instance 0, ep 13
instance 0, ep 14
instance 0, ep 15
instance 0, ep 16
instance 0, ep 17
instance 0, ep 18
instance 0, ep 19
instance 0, ep 20
instance 0, ep 21
instance 0, ep 22
instance 0, ep 23
instance 0, ep 24
instance 0, ep 25
instance 0, ep 26
instance 0, ep 27
instance 0, ep 28
instance 0, ep 29
instance 1, ep 1
instance 1, ep 2
instance 1, ep 3
instance 1, ep 4
instance 1, ep 5
instance 1, ep 6
instance 1, ep 7
instance 1, ep 8
instance 1, ep 9
instance 1, ep 10
instance 1, ep 11
instance 1, ep 12
instance 1, ep 13
instance 1, ep 14
instance 1, ep 15
instance 1, ep 16
instance 1, ep 17
instance 1, ep 18
instance 1, ep 19
instance 1, ep 20
instance 1, ep 21
instance 1, ep 22
instance 1, ep 23
instance 1, ep 24
instance 1, ep 25
instance 1, ep 26
instance 1, ep 27
instance 1, ep 28
instance 

In [218]:
policy = random_policy
name = "random"
random_reward, random_active_rate = run_heterogenous_policy(simulator, n_episodes, n_epochs, discount,policy,seed,lamb=lamb)
time_random = simulator.time_taken
print(np.mean(random_reward) + random_active_rate*lamb*n_arms*volunteers_per_arm)

results['{}_match'.format(name)] = np.mean(random_reward)
results['{}_active'.format(name)] = random_active_rate 
results['{}_time'.format(name)] = time_random 

instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
instance 0, ep 5
instance 0, ep 6
instance 0, ep 7
instance 0, ep 8
instance 0, ep 9
instance 0, ep 10
instance 0, ep 11
instance 0, ep 12
instance 0, ep 13
instance 0, ep 14
instance 0, ep 15
instance 0, ep 16
instance 0, ep 17
instance 0, ep 18
instance 0, ep 19
instance 0, ep 20
instance 0, ep 21
instance 0, ep 22
instance 0, ep 23
instance 0, ep 24
instance 0, ep 25
instance 0, ep 26
instance 0, ep 27
instance 0, ep 28
instance 0, ep 29
instance 1, ep 1
instance 1, ep 2
instance 1, ep 3
instance 1, ep 4
instance 1, ep 5
instance 1, ep 6
instance 1, ep 7
instance 1, ep 8
instance 1, ep 9
instance 1, ep 10
instance 1, ep 11
instance 1, ep 12
instance 1, ep 13
instance 1, ep 14
instance 1, ep 15
instance 1, ep 16
instance 1, ep 17
instance 1, ep 18
instance 1, ep 19
instance 1, ep 20
instance 1, ep 21
instance 1, ep 22
instance 1, ep 23
instance 1, ep 24
instance 1, ep 25
instance 1, ep 26
instance 1, ep 27
instance 1

In [219]:
policy = whittle_activity_policy
name = "whittle_engagement"
whittle_activity_reward, whittle_activity_active_rate = run_heterogenous_policy(simulator, n_episodes, n_epochs, discount,policy,seed,lamb=lamb)
time_whittle_activity = simulator.time_taken    
print(np.mean(whittle_activity_reward) + whittle_activity_active_rate*lamb*n_arms*volunteers_per_arm)

results['{}_match'.format(name)] = np.mean(whittle_activity_reward) 
results['{}_active'.format(name)] = whittle_activity_active_rate 
results['{}_time'.format(name)] = time_whittle_activity 

instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
instance 0, ep 5
instance 0, ep 6
instance 0, ep 7
instance 0, ep 8
instance 0, ep 9
instance 0, ep 10
instance 0, ep 11
instance 0, ep 12
instance 0, ep 13
instance 0, ep 14
instance 0, ep 15


instance 0, ep 16
instance 0, ep 17
instance 0, ep 18
instance 0, ep 19
instance 0, ep 20
instance 0, ep 21
instance 0, ep 22
instance 0, ep 23
instance 0, ep 24
instance 0, ep 25
instance 0, ep 26
instance 0, ep 27
instance 0, ep 28
instance 0, ep 29
instance 1, ep 1
instance 1, ep 2
instance 1, ep 3
instance 1, ep 4
instance 1, ep 5
instance 1, ep 6
instance 1, ep 7
instance 1, ep 8
instance 1, ep 9
instance 1, ep 10
instance 1, ep 11
instance 1, ep 12
instance 1, ep 13
instance 1, ep 14
instance 1, ep 15
instance 1, ep 16
instance 1, ep 17
instance 1, ep 18
instance 1, ep 19
instance 1, ep 20
instance 1, ep 21
instance 1, ep 22
instance 1, ep 23
instance 1, ep 24
instance 1, ep 25
instance 1, ep 26
instance 1, ep 27
instance 1, ep 28
instance 1, ep 29
instance 2, ep 1
instance 2, ep 2
instance 2, ep 3
instance 2, ep 4
instance 2, ep 5
instance 2, ep 6
instance 2, ep 7
instance 2, ep 8
instance 2, ep 9
instance 2, ep 10
instance 2, ep 11
instance 2, ep 12
instance 2, ep 13
instance 2

In [223]:
policy = whittle_policy_contextual
name = "linear_whittle"
whittle_reward, whittle_active_rate = run_heterogenous_policy(simulator, n_episodes, n_epochs, discount,policy,seed,lamb=lamb)
time_whittle = simulator.time_taken    
print(np.mean(whittle_reward) + whittle_active_rate*lamb*n_arms*volunteers_per_arm)

results['{}_match'.format(name)] = np.mean(whittle_reward) 
results['{}_active'.format(name)] = whittle_active_rate 
results['{}_time'.format(name)] = time_whittle 

instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
instance 0, ep 5
instance 0, ep 6
instance 0, ep 7
instance 0, ep 8
instance 0, ep 9
instance 0, ep 10
instance 0, ep 11
instance 0, ep 12
instance 0, ep 13
instance 0, ep 14
instance 0, ep 15
instance 0, ep 16
instance 0, ep 17
instance 0, ep 18
instance 0, ep 19
instance 0, ep 20
instance 0, ep 21
instance 0, ep 22
instance 0, ep 23
instance 0, ep 24
instance 0, ep 25
instance 0, ep 26
instance 0, ep 27
instance 0, ep 28
instance 0, ep 29
instance 1, ep 1
instance 1, ep 2
instance 1, ep 3
instance 1, ep 4
instance 1, ep 5
instance 1, ep 6
instance 1, ep 7
instance 1, ep 8
instance 1, ep 9
instance 1, ep 10
instance 1, ep 11
instance 1, ep 12
instance 1, ep 13
instance 1, ep 14
instance 1, ep 15
instance 1, ep 16
instance 1, ep 17
instance 1, ep 18
instance 1, ep 19
instance 1, ep 20
instance 1, ep 21
instance 1, ep 22
instance 1, ep 23
instance 1, ep 24
instance 1, ep 25
instance 1, ep 26
instance 1, ep 27
instance 1

In [221]:
policy = whittle_greedy_contextual_policy
name = "whittle_greedy"
whittle_greedy_reward, whittle_greedy_active_rate = run_heterogenous_policy(simulator, n_episodes, n_epochs, discount,policy,seed,lamb=lamb)
time_whitte_greedy = simulator.time_taken
print(np.mean(whittle_greedy_reward) + whittle_greedy_active_rate*lamb*n_arms*volunteers_per_arm)

results['{}_match'.format(name)] = np.mean(whittle_greedy_reward)
results['{}_active'.format(name)] = whittle_greedy_active_rate 
results['{}_time'.format(name)] = time_whitte_greedy 

instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
instance 0, ep 5
instance 0, ep 6
instance 0, ep 7
instance 0, ep 8
instance 0, ep 9
instance 0, ep 10
instance 0, ep 11
instance 0, ep 12
instance 0, ep 13
instance 0, ep 14
instance 0, ep 15
instance 0, ep 16
instance 0, ep 17
instance 0, ep 18
instance 0, ep 19
instance 0, ep 20
instance 0, ep 21
instance 0, ep 22
instance 0, ep 23
instance 0, ep 24
instance 0, ep 25
instance 0, ep 26
instance 0, ep 27
instance 0, ep 28
instance 0, ep 29
instance 1, ep 1
instance 1, ep 2
instance 1, ep 3
instance 1, ep 4
instance 1, ep 5
instance 1, ep 6
instance 1, ep 7
instance 1, ep 8
instance 1, ep 9
instance 1, ep 10
instance 1, ep 11
instance 1, ep 12
instance 1, ep 13
instance 1, ep 14
instance 1, ep 15
instance 1, ep 16
instance 1, ep 17
instance 1, ep 18
instance 1, ep 19
instance 1, ep 20
instance 1, ep 21
instance 1, ep 22
instance 1, ep 23
instance 1, ep 24
instance 1, ep 25
instance 1, ep 26
instance 1, ep 27
instance 1

In [225]:
policy = contextual_future_policy
name = "future_context"
future_context_reward, future_context_active_rate = run_heterogenous_policy(simulator, n_episodes, n_epochs, discount,policy,seed,lamb=lamb)
time_future = simulator.time_taken
print(np.mean(future_context_reward) + future_context_active_rate*lamb*n_arms*volunteers_per_arm)

results['{}_match'.format(name)] = np.mean(future_context_reward)
results['{}_active'.format(name)] = future_context_active_rate 
results['{}_time'.format(name)] = time_future 

instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
instance 0, ep 5
instance 0, ep 6
instance 0, ep 7
instance 0, ep 8
instance 0, ep 9
instance 0, ep 10
instance 0, ep 11
instance 0, ep 12
instance 0, ep 13
instance 0, ep 14
instance 0, ep 15
instance 0, ep 16
instance 0, ep 17
instance 0, ep 18
instance 0, ep 19
instance 0, ep 20
instance 0, ep 21
instance 0, ep 22
instance 0, ep 23
instance 0, ep 24
instance 0, ep 25
instance 0, ep 26
instance 0, ep 27
instance 0, ep 28
instance 0, ep 29
instance 1, ep 1
instance 1, ep 2
instance 1, ep 3
instance 1, ep 4
instance 1, ep 5
instance 1, ep 6
instance 1, ep 7
instance 1, ep 8
instance 1, ep 9
instance 1, ep 10
instance 1, ep 11
instance 1, ep 12
instance 1, ep 13
instance 1, ep 14
instance 1, ep 15
instance 1, ep 16
instance 1, ep 17
instance 1, ep 18
instance 1, ep 19
instance 1, ep 20
instance 1, ep 21
instance 1, ep 22
instance 1, ep 23
instance 1, ep 24
instance 1, ep 25
instance 1, ep 26
instance 1, ep 27
instance 1

## Write Data

In [51]:
save_path = get_save_path('semi_synthetic',save_name,seed,use_date=save_with_date)

In [132]:
delete_duplicate_results('semi_synthetic',"",results)

In [53]:
json.dump(results,open('../results/'+save_path,'w'))