# Semi Synthetic Experiments

Analyze the performance of various algorithms to solve the joint matching + activity task, when the number of volunteers is large and structured

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import random 
import matplotlib.pyplot as plt
import json 
import argparse 
import sys
import secrets

In [3]:
from rmab.simulator import RMABSimulator
from rmab.omniscient_policies import *
from rmab.fr_dynamics import get_all_transitions
from rmab.utils import get_save_path, delete_duplicate_results

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
is_jupyter = 'ipykernel' in sys.modules

In [5]:
if is_jupyter: 
    seed        = 42
    n_arms      = 2
    volunteers_per_arm = 2
    budget      = 3
    discount    = 0.9
    alpha       = 3 
    n_episodes  = 30 
    episode_len = 20 
    n_epochs    = 10
    save_with_date = False 
    TIME_PER_RUN = 0.01 * 1000
    lamb = 0.5
    prob_distro = 'normal'
else:
    parser = argparse.ArgumentParser()
    parser.add_argument('--n_arms',         '-N', help='num beneficiaries (arms)', type=int, default=2)
    parser.add_argument('--volunteers_per_arm',         '-V', help='volunteers per arm', type=int, default=5)
    parser.add_argument('--episode_len',    '-H', help='episode length', type=int, default=20)
    parser.add_argument('--n_episodes',     '-T', help='num episodes', type=int, default=30)
    parser.add_argument('--budget',         '-B', help='budget', type=int, default=3)
    parser.add_argument('--n_epochs',       '-E', help='number of epochs (num_repeats)', type=int, default=10)
    parser.add_argument('--discount',       '-d', help='discount factor', type=float, default=0.9)
    parser.add_argument('--alpha',          '-a', help='alpha: for conf radius', type=float, default=3)
    parser.add_argument('--lamb',          '-l', help='lambda for matching-engagement tradeoff', type=float, default=1)
    parser.add_argument('--seed',           '-s', help='random seed', type=int, default=42)
    parser.add_argument('--prob_distro',           '-p', help='which prob distro [uniform,uniform_small,uniform_large,normal]', type=str, default='uniform')
    parser.add_argument('--time_per_run',      '-t', help='time per MCTS run', type=float, default=.01*1000)
    parser.add_argument('--use_date', action='store_true')

    args = parser.parse_args()

    n_arms      = args.n_arms
    volunteers_per_arm = args.volunteers_per_arm
    budget      = args.budget
    discount    = args.discount
    alpha       = args.alpha 
    seed        = args.seed
    n_episodes  = args.n_episodes
    episode_len = args.episode_len
    n_epochs    = args.n_epochs
    lamb = args.lamb 
    save_with_date = args.use_date
    TIME_PER_RUN = args.time_per_run
    prob_distro = args.prob_distro

save_name = secrets.token_hex(4)  

In [6]:
n_states = 2
n_actions = 2

In [7]:
all_population_size = 100 # number of random arms to generate
all_transitions = get_all_transitions(all_population_size)

In [8]:
random.seed(seed)
np.random.seed(seed)

In [9]:
all_features = np.arange(all_population_size)

if prob_distro == 'uniform':
    match_probabilities = [random.random() for i in range(all_population_size * volunteers_per_arm)] 
elif prob_distro == 'uniform_small':
    match_probabilities = [random.random()/4 for i in range(all_population_size * volunteers_per_arm)] 
elif prob_distro == 'uniform_large':
    match_probabilities = [random.random()/4+0.75 for i in range(all_population_size * volunteers_per_arm)] 
elif prob_distro == 'normal':
    match_probabilities = [np.clip(random.gauss(0.25, 0.1),0,1) for i in range(all_population_size * volunteers_per_arm)] 
else:
    raise Exception("{} probability distro not found".format(prob_distro))

In [10]:
np.random.seed(seed)
random.seed(seed)
simulator = RMABSimulator(all_population_size, all_features, all_transitions,
            n_arms, volunteers_per_arm, episode_len, n_epochs, n_episodes, budget, discount,number_states=n_states, reward_style='match',match_probability_list=match_probabilities,TIME_PER_RUN=TIME_PER_RUN)

acting should always be good! 0.000 < 0.044
acting should always be good! 0.000 < 0.162
acting should always be good! 0.108 < 0.183
good start state should always be good! 0.380 < 0.508
good start state should always be good! 0.506 < 0.760
cohort [83 53]
cohort [41 39]
cohort [79 72]
cohort [92 64]
cohort [ 7 38]
cohort [12 59]
cohort [48 64]
cohort [18 53]
cohort [69 64]
cohort [46 87]


In [11]:
results = {}
results['parameters'] = {'seed'      : seed,
        'n_arms'    : n_arms,
        'volunteers_per_arm': volunteers_per_arm, 
        'budget'    : budget,
        'discount'  : discount, 
        'alpha'     : alpha, 
        'n_episodes': n_episodes, 
        'episode_len': episode_len, 
        'n_epochs'  : n_epochs, 
        'lamb': lamb,
        'time_per_run': TIME_PER_RUN, 
        'prob_distro': prob_distro} 

## Index Policies

In [12]:
policy = greedy_policy
name = "greedy"
greedy_match, greedy_active_rate = run_heterogenous_policy(simulator, n_episodes, n_epochs, discount,policy,seed,lamb=lamb)
time_greedy = simulator.time_taken
greedy_discounted_reward = get_discounted_reward(greedy_match,greedy_active_rate,discount,lamb)

print(greedy_discounted_reward)

results['{}_reward'.format(name)] = greedy_discounted_reward
results['{}_match'.format(name)] = np.mean(greedy_match) 
results['{}_active'.format(name)] = np.mean(greedy_active_rate)
results['{}_time'.format(name)] = time_greedy 

instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
instance 0, ep 5
instance 0, ep 6
instance 0, ep 7
instance 0, ep 8
instance 0, ep 9
instance 0, ep 10
instance 0, ep 11
instance 0, ep 12
instance 0, ep 13
instance 0, ep 14
instance 0, ep 15
instance 0, ep 16
instance 0, ep 17
instance 0, ep 18
instance 0, ep 19
instance 0, ep 20
instance 0, ep 21
instance 0, ep 22
instance 0, ep 23
instance 0, ep 24
instance 0, ep 25
instance 0, ep 26
instance 0, ep 27
instance 0, ep 28
instance 0, ep 29
instance 1, ep 1
instance 1, ep 2
instance 1, ep 3
instance 1, ep 4
instance 1, ep 5
instance 1, ep 6
instance 1, ep 7
instance 1, ep 8
instance 1, ep 9
instance 1, ep 10
instance 1, ep 11
instance 1, ep 12
instance 1, ep 13
instance 1, ep 14
instance 1, ep 15
instance 1, ep 16
instance 1, ep 17
instance 1, ep 18
instance 1, ep 19
instance 1, ep 20
instance 1, ep 21
instance 1, ep 22
instance 1, ep 23
instance 1, ep 24
instance 1, ep 25
instance 1, ep 26
instance 1, ep 27
instance 1

In [13]:
policy = random_policy
name = "random"
random_match, random_active_rate = run_heterogenous_policy(simulator, n_episodes, n_epochs, discount,policy,seed,lamb=lamb)
time_random = simulator.time_taken
random_discounted_reward = get_discounted_reward(random_match,random_active_rate,discount,lamb)

print(random_discounted_reward)

results['{}_reward'.format(name)] = random_discounted_reward
results['{}_match'.format(name)] = np.mean(random_match) 
results['{}_active'.format(name)] = np.mean(random_active_rate)
results['{}_time'.format(name)] = time_random 

instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
instance 0, ep 5
instance 0, ep 6
instance 0, ep 7
instance 0, ep 8
instance 0, ep 9
instance 0, ep 10
instance 0, ep 11
instance 0, ep 12
instance 0, ep 13
instance 0, ep 14
instance 0, ep 15
instance 0, ep 16
instance 0, ep 17
instance 0, ep 18
instance 0, ep 19
instance 0, ep 20
instance 0, ep 21
instance 0, ep 22
instance 0, ep 23
instance 0, ep 24
instance 0, ep 25
instance 0, ep 26
instance 0, ep 27
instance 0, ep 28
instance 0, ep 29
instance 1, ep 1
instance 1, ep 2
instance 1, ep 3
instance 1, ep 4
instance 1, ep 5
instance 1, ep 6
instance 1, ep 7
instance 1, ep 8
instance 1, ep 9
instance 1, ep 10
instance 1, ep 11
instance 1, ep 12
instance 1, ep 13
instance 1, ep 14
instance 1, ep 15
instance 1, ep 16
instance 1, ep 17
instance 1, ep 18
instance 1, ep 19
instance 1, ep 20
instance 1, ep 21
instance 1, ep 22
instance 1, ep 23
instance 1, ep 24
instance 1, ep 25
instance 1, ep 26
instance 1, ep 27
instance 1

In [14]:
policy = whittle_activity_policy
name = "whittle_engagement"
whittle_activity_match, whittle_activity_active_rate = run_heterogenous_policy(simulator, n_episodes, n_epochs, discount,policy,seed,lamb=lamb)
time_whittle_activity = simulator.time_taken
whittle_activity_discounted_reward = get_discounted_reward(whittle_activity_match,whittle_activity_active_rate,discount,lamb)

print(whittle_activity_discounted_reward)

results['{}_reward'.format(name)] = whittle_activity_discounted_reward
results['{}_match'.format(name)] = np.mean(whittle_activity_match) 
results['{}_active'.format(name)] = np.mean(whittle_activity_active_rate)
results['{}_time'.format(name)] = time_whittle_activity 

instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
instance 0, ep 5
instance 0, ep 6
instance 0, ep 7
instance 0, ep 8
instance 0, ep 9
instance 0, ep 10
instance 0, ep 11
instance 0, ep 12
instance 0, ep 13
instance 0, ep 14
instance 0, ep 15
instance 0, ep 16
instance 0, ep 17
instance 0, ep 18
instance 0, ep 19
instance 0, ep 20
instance 0, ep 21
instance 0, ep 22
instance 0, ep 23
instance 0, ep 24
instance 0, ep 25
instance 0, ep 26
instance 0, ep 27
instance 0, ep 28
instance 0, ep 29
instance 1, ep 1
instance 1, ep 2
instance 1, ep 3
instance 1, ep 4
instance 1, ep 5
instance 1, ep 6
instance 1, ep 7
instance 1, ep 8
instance 1, ep 9
instance 1, ep 10
instance 1, ep 11
instance 1, ep 12
instance 1, ep 13
instance 1, ep 14
instance 1, ep 15
instance 1, ep 16
instance 1, ep 17
instance 1, ep 18
instance 1, ep 19
instance 1, ep 20
instance 1, ep 21
instance 1, ep 22
instance 1, ep 23
instance 1, ep 24
instance 1, ep 25
instance 1, ep 26
instance 1, ep 27
instance 1

In [15]:
policy = whittle_policy
name = "linear_whittle"
whittle_match, whittle_active_rate = run_heterogenous_policy(simulator, n_episodes, n_epochs, discount,policy,seed,lamb=lamb)
time_whittle = simulator.time_taken
whittle_discounted_reward = get_discounted_reward(whittle_match,whittle_active_rate,discount,lamb)

print(whittle_discounted_reward)

results['{}_reward'.format(name)] = whittle_discounted_reward
results['{}_match'.format(name)] = np.mean(whittle_match) 
results['{}_active'.format(name)] = np.mean(whittle_active_rate)
results['{}_time'.format(name)] = time_whittle 

instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
instance 0, ep 5
instance 0, ep 6
instance 0, ep 7
instance 0, ep 8
instance 0, ep 9
instance 0, ep 10
instance 0, ep 11
instance 0, ep 12
instance 0, ep 13
instance 0, ep 14
instance 0, ep 15
instance 0, ep 16
instance 0, ep 17
instance 0, ep 18
instance 0, ep 19
instance 0, ep 20
instance 0, ep 21
instance 0, ep 22
instance 0, ep 23
instance 0, ep 24
instance 0, ep 25
instance 0, ep 26
instance 0, ep 27
instance 0, ep 28
instance 0, ep 29
instance 1, ep 1
instance 1, ep 2
instance 1, ep 3
instance 1, ep 4
instance 1, ep 5
instance 1, ep 6
instance 1, ep 7
instance 1, ep 8
instance 1, ep 9
instance 1, ep 10
instance 1, ep 11
instance 1, ep 12
instance 1, ep 13
instance 1, ep 14
instance 1, ep 15
instance 1, ep 16
instance 1, ep 17
instance 1, ep 18
instance 1, ep 19
instance 1, ep 20
instance 1, ep 21
instance 1, ep 22
instance 1, ep 23
instance 1, ep 24
instance 1, ep 25
instance 1, ep 26
instance 1, ep 27
instance 1

In [16]:
policy = shapley_whittle_policy 
name = "shapley_whittle"
whittle_shapley_match, whittle_shapley_active_rate = run_heterogenous_policy(simulator, n_episodes, n_epochs, discount,policy,seed,lamb=lamb)
time_whittle_shapley = simulator.time_taken
whittle_shapley_discounted_reward = get_discounted_reward(whittle_shapley_match,whittle_shapley_active_rate,discount,lamb)

print(whittle_shapley_discounted_reward)

results['{}_reward'.format(name)] = whittle_shapley_discounted_reward
results['{}_match'.format(name)] = np.mean(whittle_shapley_match) 
results['{}_active'.format(name)] = np.mean(whittle_shapley_active_rate)
results['{}_time'.format(name)] = time_whittle_shapley 

instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
instance 0, ep 5
instance 0, ep 6
instance 0, ep 7
instance 0, ep 8
instance 0, ep 9
instance 0, ep 10
instance 0, ep 11
instance 0, ep 12
instance 0, ep 13
instance 0, ep 14
instance 0, ep 15
instance 0, ep 16
instance 0, ep 17
instance 0, ep 18
instance 0, ep 19
instance 0, ep 20
instance 0, ep 21
instance 0, ep 22
instance 0, ep 23
instance 0, ep 24
instance 0, ep 25
instance 0, ep 26
instance 0, ep 27
instance 0, ep 28
instance 0, ep 29
instance 1, ep 1
instance 1, ep 2
instance 1, ep 3
instance 1, ep 4
instance 1, ep 5
instance 1, ep 6
instance 1, ep 7
instance 1, ep 8
instance 1, ep 9
instance 1, ep 10
instance 1, ep 11
instance 1, ep 12
instance 1, ep 13
instance 1, ep 14
instance 1, ep 15
instance 1, ep 16
instance 1, ep 17
instance 1, ep 18
instance 1, ep 19
instance 1, ep 20
instance 1, ep 21
instance 1, ep 22
instance 1, ep 23
instance 1, ep 24
instance 1, ep 25
instance 1, ep 26
instance 1, ep 27
instance 1

In [17]:
policy = whittle_greedy_policy
name = "whittle_greedy"
whittle_greedy_match, whittle_greedy_active_rate = run_heterogenous_policy(simulator, n_episodes, n_epochs, discount,policy,seed,lamb=lamb)
time_whittle_greedy = simulator.time_taken
whittle_greedy_discounted_reward = get_discounted_reward(whittle_greedy_match,whittle_greedy_active_rate,discount,lamb)

print(whittle_greedy_discounted_reward)

results['{}_reward'.format(name)] = whittle_greedy_discounted_reward
results['{}_match'.format(name)] = np.mean(whittle_greedy_match) 
results['{}_active'.format(name)] = np.mean(whittle_greedy_active_rate)
results['{}_time'.format(name)] = time_whittle_greedy 

instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
instance 0, ep 5
instance 0, ep 6
instance 0, ep 7
instance 0, ep 8
instance 0, ep 9
instance 0, ep 10
instance 0, ep 11
instance 0, ep 12
instance 0, ep 13
instance 0, ep 14
instance 0, ep 15
instance 0, ep 16
instance 0, ep 17
instance 0, ep 18
instance 0, ep 19
instance 0, ep 20
instance 0, ep 21
instance 0, ep 22
instance 0, ep 23
instance 0, ep 24
instance 0, ep 25
instance 0, ep 26
instance 0, ep 27
instance 0, ep 28
instance 0, ep 29
instance 1, ep 1
instance 1, ep 2
instance 1, ep 3
instance 1, ep 4
instance 1, ep 5
instance 1, ep 6
instance 1, ep 7
instance 1, ep 8
instance 1, ep 9
instance 1, ep 10
instance 1, ep 11
instance 1, ep 12
instance 1, ep 13
instance 1, ep 14
instance 1, ep 15
instance 1, ep 16
instance 1, ep 17
instance 1, ep 18
instance 1, ep 19
instance 1, ep 20
instance 1, ep 21
instance 1, ep 22
instance 1, ep 23
instance 1, ep 24
instance 1, ep 25
instance 1, ep 26
instance 1, ep 27
instance 1

In [18]:
if is_jupyter:
    policy = q_iteration_policy
    per_epoch_function = q_iteration_epoch
    name = "optimal"
    optimal_match, optimal_active_rate = run_heterogenous_policy(simulator, n_episodes, n_epochs, discount,policy,seed,lamb=lamb,per_epoch_function=per_epoch_function)
    time_optimal = simulator.time_taken
    optimal_discounted_reward = get_discounted_reward(optimal_match,optimal_active_rate,discount,lamb)

    print(optimal_discounted_reward)

instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
instance 0, ep 5
instance 0, ep 6
instance 0, ep 7
instance 0, ep 8
instance 0, ep 9
instance 0, ep 10
instance 0, ep 11
instance 0, ep 12
instance 0, ep 13
instance 0, ep 14
instance 0, ep 15
instance 0, ep 16
instance 0, ep 17
instance 0, ep 18
instance 0, ep 19
instance 0, ep 20
instance 0, ep 21
instance 0, ep 22
instance 0, ep 23
instance 0, ep 24
instance 0, ep 25
instance 0, ep 26
instance 0, ep 27
instance 0, ep 28
instance 0, ep 29
instance 1, ep 1
instance 1, ep 2
instance 1, ep 3
instance 1, ep 4
instance 1, ep 5
instance 1, ep 6
instance 1, ep 7
instance 1, ep 8
instance 1, ep 9
instance 1, ep 10
instance 1, ep 11
instance 1, ep 12
instance 1, ep 13
instance 1, ep 14
instance 1, ep 15
instance 1, ep 16
instance 1, ep 17
instance 1, ep 18
instance 1, ep 19
instance 1, ep 20
instance 1, ep 21
instance 1, ep 22
instance 1, ep 23
instance 1, ep 24
instance 1, ep 25
instance 1, ep 26
instance 1, ep 27
instance 1

## Write Data

In [19]:
save_path = get_save_path('semi_synthetic',save_name,seed,use_date=save_with_date)

In [20]:
delete_duplicate_results('semi_synthetic',"",results)

In [21]:
json.dump(results,open('../results/'+save_path,'w'))