# All Policies

Analyze the performance of our Whittle and Adaptive Policies

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import random 
import matplotlib.pyplot as plt
import json 
import argparse 
import sys
import secrets
from itertools import combinations

In [3]:
from rmab.simulator import run_multi_seed
from rmab.whittle_policies import *
from rmab.baseline_policies import *
from rmab.mcts_policies import *
from rmab.utils import get_save_path, delete_duplicate_results, restrict_resources

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
is_jupyter = 'ipykernel' in sys.modules

In [6]:
if is_jupyter: 
    seed        = 43
    n_arms      = 50
    volunteers_per_arm = 5
    budget      = 10
    discount    = 0.9
    alpha       = 3 
    n_episodes  = 5
    episode_len = 50 
    n_epochs    = 1
    save_with_date = False 
    lamb = 0.5
    prob_distro = 'food_rescue'
    reward_type = "probability"
    reward_parameters = {'universe_size': 20, 'arm_set_low': 0, 'arm_set_high': 1}
    out_folder = 'iterative'
    time_limit = 100
else:
    parser = argparse.ArgumentParser()
    parser.add_argument('--n_arms',         '-N', help='num beneficiaries (arms)', type=int, default=2)
    parser.add_argument('--volunteers_per_arm',         '-V', help='volunteers per arm', type=int, default=5)
    parser.add_argument('--episode_len',    '-H', help='episode length', type=int, default=50)
    parser.add_argument('--n_episodes',     '-T', help='num episodes', type=int, default=105)
    parser.add_argument('--budget',         '-B', help='budget', type=int, default=3)
    parser.add_argument('--n_epochs',       '-E', help='number of epochs (num_repeats)', type=int, default=1)
    parser.add_argument('--discount',       '-d', help='discount factor', type=float, default=0.9)
    parser.add_argument('--alpha',          '-a', help='alpha: for conf radius', type=float, default=3)
    parser.add_argument('--lamb',          '-l', help='lambda for matching-engagement tradeoff', type=float, default=0.5)
    parser.add_argument('--universe_size', help='For set cover, total num unvierse elems', type=int, default=10)
    parser.add_argument('--arm_set_low', help='Least size of arm set, for set cover', type=float, default=3)
    parser.add_argument('--arm_set_high', help='Largest size of arm set, for set cover', type=float, default=6)
    parser.add_argument('--reward_type',          '-r', help='Which type of custom reward', type=str, default='set_cover')
    parser.add_argument('--seed',           '-s', help='random seed', type=int, default=42)
    parser.add_argument('--prob_distro',           '-p', help='which prob distro [uniform,uniform_small,uniform_large,normal]', type=str, default='uniform')
    parser.add_argument('--out_folder', help='Which folder to write results to', type=str, default='iterative')
    parser.add_argument('--time_limit', help='Online time limit for computation', type=float, default=100)
    parser.add_argument('--use_date', action='store_true')

    args = parser.parse_args()

    n_arms      = args.n_arms
    volunteers_per_arm = args.volunteers_per_arm
    budget      = args.budget
    discount    = args.discount
    alpha       = args.alpha 
    seed        = args.seed
    n_episodes  = args.n_episodes
    episode_len = args.episode_len
    n_epochs    = args.n_epochs
    lamb = args.lamb
    save_with_date = args.use_date
    prob_distro = args.prob_distro
    out_folder = args.out_folder
    reward_type = args.reward_type
    reward_parameters = {'universe_size': args.universe_size,
                        'arm_set_low': args.arm_set_low, 
                        'arm_set_high': args.arm_set_high}
    time_limit = args.time_limit 

save_name = secrets.token_hex(4)  

In [7]:
results = {}
results['parameters'] = {'seed'      : seed,
        'n_arms'    : n_arms,
        'volunteers_per_arm': volunteers_per_arm, 
        'budget'    : budget,
        'discount'  : discount, 
        'alpha'     : alpha, 
        'n_episodes': n_episodes, 
        'episode_len': episode_len, 
        'n_epochs'  : n_epochs, 
        'lamb': lamb,
        'prob_distro': prob_distro, 
        'reward_type': reward_type, 
        'universe_size': reward_parameters['universe_size'],
        'arm_set_low': reward_parameters['arm_set_low'], 
        'arm_set_high': reward_parameters['arm_set_high'],
        'time_limit': time_limit, 
        } 

## Index Policies

In [8]:
seed_list = [seed]
restrict_resources()

In [9]:
policy = greedy_policy
name = "greedy"

rewards, memory, simulator = run_multi_seed(seed_list,policy,results['parameters'],test_length=episode_len*(n_episodes%50))
results['{}_reward'.format(name)] = rewards['reward']
results['{}_match'.format(name)] =  rewards['match'] 
results['{}_active'.format(name)] = rewards['active_rate']
results['{}_time'.format(name)] =  rewards['time']
results['ratio'] = simulator.ratio 
print(np.mean(rewards['reward']))

acting should always be good! (0, 1) 0.108 < 0.183
good start state should always be good! 0.380 < 0.508
good start state should always be good! 0.506 < 0.760
cohort [61 15 51 31 71 54 69 20 14 23 41 19 32 21 58 30 16 49 18 24 79 38 86 43
 68 84 39  9  0 74 78 56 64  2 99 57 75 76 10 12 97  7 47 63 48 28  5 35
  8 82]
instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
Took 4.063074588775635 time for inference and 0.016371488571166992 time for training
3.7087036083433005


In [9]:
policy = random_policy
name = "random"

rewards, memory, simulator = run_multi_seed(seed_list,policy,results['parameters'],test_length=episode_len*(n_episodes%50))
results['{}_reward'.format(name)] = rewards['reward']
results['{}_match'.format(name)] =  rewards['match'] 
results['{}_active'.format(name)] = rewards['active_rate']
results['{}_time'.format(name)] =  rewards['time']
print(np.mean(rewards['reward']))

cohort [40 88 42 87]
instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
Took 0.04671072959899902 time for inference and 0.0002827644348144531 time for training
7.148556983964163


In [10]:
policy = whittle_activity_policy
name = "whittle_activity"

rewards, memory, simulator = run_multi_seed(seed_list,policy,results['parameters'],test_length=episode_len*(n_episodes%50))
results['{}_reward'.format(name)] = rewards['reward']
results['{}_match'.format(name)] =  rewards['match'] 
results['{}_active'.format(name)] = rewards['active_rate']
results['{}_time'.format(name)] =  rewards['time']
print(np.mean(rewards['reward']))

cohort [40 88 42 87]
instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
Took 0.06586885452270508 time for inference and 0.0005936622619628906 time for training
13.314422290729675


In [19]:
policy = whittle_policy
name = "linear_whittle"

rewards, memory, simulator = run_multi_seed(seed_list,policy,results['parameters'],test_length=episode_len*(n_episodes%50))
results['{}_reward'.format(name)] = rewards['reward']
results['{}_match'.format(name)] =  rewards['match'] 
results['{}_active'.format(name)] = rewards['active_rate']
results['{}_time'.format(name)] =  rewards['time']
print(np.mean(rewards['reward']))

acting should always be good! (0, 1) 0.108 < 0.183
good start state should always be good! 0.380 < 0.508
good start state should always be good! 0.506 < 0.760
cohort [61 15 51 31 71 54 69 20 14 23 41 19 32 21 58 30 16 49 18 24 79 38 86 43
 68 84 39  9  0 74 78 56 64  2 99 57 75 76 10 12 97  7 47 63 48 28  5 35
  8 82]
[114, 125, 160, 163, 172, 174, 183, 188, 220, 223]
[114, 125, 126, 160, 163, 172, 174, 183, 188, 220]
[114, 125, 126, 160, 163, 172, 174, 183, 188, 220]
[114, 125, 126, 160, 163, 172, 174, 183, 220, 223]
[114, 125, 126, 158, 160, 163, 172, 174, 188, 189]
[71, 114, 126, 158, 160, 163, 172, 174, 183, 188]
[71, 114, 126, 127, 158, 160, 163, 172, 174, 183]
[71, 87, 114, 126, 127, 163, 172, 174, 183, 188]
[87, 114, 126, 127, 163, 172, 174, 183, 188, 189]
[71, 73, 87, 114, 126, 163, 172, 174, 183, 188]
[71, 73, 114, 126, 127, 160, 163, 172, 174, 189]
[71, 73, 87, 114, 127, 160, 163, 172, 174, 189]
[71, 73, 114, 127, 160, 163, 172, 174, 188, 189]
[71, 73, 114, 127, 130, 160, 163

In [29]:
a = """[114, 125, 160, 163, 172, 174, 183, 188, 220, 223]
[114, 125, 126, 160, 163, 172, 174, 183, 188, 220]
[114, 125, 126, 160, 163, 172, 174, 183, 188, 220]
[114, 125, 126, 160, 163, 172, 174, 183, 220, 223]
[114, 125, 126, 158, 160, 163, 172, 174, 188, 189]
[71, 114, 126, 158, 160, 163, 172, 174, 183, 188]
[71, 114, 126, 127, 158, 160, 163, 172, 174, 183]
[71, 87, 114, 126, 127, 163, 172, 174, 183, 188]
[87, 114, 126, 127, 163, 172, 174, 183, 188, 189]
[71, 73, 87, 114, 126, 163, 172, 174, 183, 188]
[71, 73, 114, 126, 127, 160, 163, 172, 174, 189]
[71, 73, 87, 114, 127, 160, 163, 172, 174, 189]
[71, 73, 114, 127, 160, 163, 172, 174, 188, 189]
[71, 73, 114, 127, 130, 160, 163, 172, 174, 189]
[71, 73, 114, 160, 172, 174, 183, 188, 189, 220]
[71, 73, 113, 114, 130, 160, 172, 174, 245, 249]
[71, 73, 114, 130, 160, 163, 174, 188, 189, 220]
[71, 73, 114, 128, 160, 172, 174, 188, 189, 220]
[71, 73, 87, 113, 114, 128, 172, 174, 189, 220]
[71, 73, 114, 126, 128, 158, 172, 174, 188, 189]
[71, 73, 113, 114, 126, 128, 172, 174, 188, 245]
[71, 73, 114, 125, 126, 128, 172, 174, 177, 189]
[71, 73, 125, 126, 158, 160, 163, 172, 174, 189]
[71, 73, 113, 114, 160, 172, 174, 189, 215, 247]
[71, 73, 114, 128, 163, 172, 174, 183, 189, 215]
[71, 73, 114, 128, 163, 172, 174, 183, 188, 189]
[71, 73, 114, 128, 160, 163, 172, 174, 188, 189]
[71, 73, 114, 127, 160, 163, 172, 174, 177, 189]
[71, 73, 114, 127, 128, 160, 172, 174, 188, 189]
[73, 114, 127, 128, 158, 160, 163, 172, 174, 188]
[73, 114, 126, 127, 158, 163, 172, 174, 188, 189]
[73, 87, 114, 126, 127, 160, 163, 172, 174, 188]
[71, 73, 87, 114, 160, 163, 172, 174, 188, 189]
[71, 73, 87, 114, 127, 160, 163, 172, 174, 183]
[71, 87, 114, 127, 160, 163, 172, 174, 183, 188]
[71, 87, 114, 125, 126, 127, 172, 174, 188, 189]
[2, 25, 71, 87, 113, 114, 125, 172, 174, 188]
[25, 71, 87, 113, 114, 158, 163, 172, 174, 179]
[25, 71, 73, 114, 127, 128, 172, 174, 179, 248]
[71, 87, 114, 125, 127, 128, 158, 172, 174, 189]
[71, 114, 125, 126, 128, 158, 163, 174, 188, 189]
[71, 114, 125, 126, 127, 128, 158, 174, 177, 188]
[71, 114, 126, 128, 158, 160, 174, 177, 188, 223]
[71, 114, 125, 158, 160, 172, 174, 177, 188, 189]
[71, 114, 126, 127, 160, 163, 172, 174, 188, 189]
[114, 126, 127, 158, 160, 163, 172, 174, 188, 189]
[114, 125, 126, 127, 158, 160, 163, 172, 174, 188]
[114, 125, 126, 127, 158, 160, 163, 172, 174, 189]
[2, 71, 114, 126, 127, 158, 163, 172, 174, 189]
[2, 71, 73, 87, 114, 127, 158, 160, 172, 174]
[26, 71, 73, 114, 127, 130, 160, 174, 177, 183]
[73, 114, 126, 127, 158, 160, 172, 174, 188, 189]
[73, 114, 127, 158, 160, 172, 174, 183, 188, 189]
[73, 114, 126, 158, 160, 172, 174, 183, 188, 189]
[73, 114, 125, 158, 160, 163, 172, 174, 188, 189]
[73, 114, 125, 158, 160, 163, 172, 174, 188, 189]
[73, 87, 114, 125, 160, 163, 172, 174, 177, 189]
[71, 73, 114, 128, 158, 160, 163, 172, 174, 189]
[71, 73, 114, 128, 158, 163, 172, 174, 183, 188]
[2, 71, 114, 126, 160, 163, 172, 174, 183, 188]
[71, 87, 114, 126, 160, 172, 174, 183, 188, 223]
[71, 73, 114, 126, 160, 172, 174, 183, 188, 223]
[71, 73, 87, 114, 126, 163, 172, 174, 183, 188]
[71, 114, 125, 126, 172, 174, 177, 188, 189, 223]
[71, 73, 114, 117, 126, 172, 174, 177, 189, 223]
[71, 73, 114, 126, 172, 174, 177, 183, 189, 223]
[71, 73, 114, 125, 126, 160, 172, 174, 177, 189]
[71, 73, 114, 126, 160, 163, 172, 174, 188, 189]
[71, 73, 114, 126, 160, 163, 172, 174, 188, 189]
[71, 114, 126, 160, 163, 172, 174, 183, 188, 189]
[71, 114, 126, 160, 163, 172, 174, 183, 188, 189]
[71, 114, 125, 160, 172, 174, 183, 188, 189, 223]
[71, 114, 125, 160, 172, 174, 183, 188, 189, 223]
[71, 114, 125, 126, 127, 163, 172, 174, 183, 189]
[71, 114, 126, 128, 163, 172, 174, 183, 188, 220]
[71, 114, 128, 158, 163, 172, 174, 183, 189, 220]
[71, 114, 128, 160, 163, 172, 174, 183, 188, 189]
[71, 73, 113, 114, 160, 163, 172, 174, 188, 245]
[71, 73, 87, 114, 128, 163, 172, 174, 183, 189]
[71, 87, 113, 114, 125, 127, 163, 174, 183, 189]
[71, 87, 114, 125, 163, 172, 174, 183, 188, 189]
[71, 114, 125, 163, 172, 174, 177, 183, 188, 189]
[71, 114, 125, 128, 163, 172, 174, 183, 188, 189]
[71, 73, 114, 163, 172, 174, 183, 188, 189, 220]
[2, 71, 73, 114, 163, 172, 174, 188, 220, 247]
[2, 71, 73, 114, 158, 163, 172, 174, 183, 188]
[71, 73, 114, 158, 160, 163, 172, 174, 183, 188]
[71, 73, 114, 126, 158, 160, 163, 172, 174, 188]
[71, 73, 87, 114, 130, 160, 172, 174, 177, 183]
[71, 87, 114, 127, 130, 160, 172, 174, 188, 220]
[71, 73, 114, 125, 127, 172, 174, 183, 188, 220]
[71, 73, 114, 125, 127, 172, 174, 183, 188, 189]
[71, 73, 114, 126, 127, 172, 174, 183, 188, 189]
[2, 71, 114, 126, 127, 158, 172, 174, 183, 189]
[2, 71, 114, 126, 158, 172, 174, 183, 189, 220]
[71, 114, 125, 126, 158, 172, 174, 188, 189, 220]
[71, 114, 125, 126, 158, 172, 174, 188, 189, 220]
[25, 26, 71, 114, 125, 126, 172, 174, 188, 220]
[2, 71, 114, 125, 126, 174, 183, 188, 189, 215]
[2, 71, 114, 125, 158, 174, 183, 188, 189, 215]
[71, 73, 87, 114, 126, 128, 160, 163, 174, 177]
[71, 73, 114, 128, 160, 163, 174, 177, 189, 220]
[71, 73, 87, 114, 160, 163, 174, 177, 188, 220]
[71, 73, 114, 160, 163, 174, 177, 188, 189, 220]
[71, 73, 87, 114, 163, 172, 174, 188, 189, 220]
[71, 73, 114, 158, 172, 174, 188, 189, 220, 223]
[73, 87, 114, 158, 163, 172, 174, 188, 189, 220]
[71, 73, 113, 114, 163, 174, 188, 189, 215, 220]
[71, 73, 113, 114, 163, 172, 174, 188, 215, 220]
[71, 73, 113, 114, 172, 174, 179, 188, 215, 220]
[71, 73, 114, 128, 172, 174, 188, 189, 215, 220]
[71, 73, 114, 117, 128, 172, 174, 179, 188, 248]
[71, 73, 114, 117, 128, 172, 174, 179, 188, 249]
[71, 73, 114, 125, 130, 172, 174, 179, 188, 189]
[71, 73, 114, 130, 163, 172, 174, 179, 188, 248]
[25, 71, 114, 128, 163, 174, 179, 188, 189, 220]
[71, 114, 126, 128, 158, 163, 172, 174, 188, 189]
[71, 87, 114, 126, 130, 163, 174, 177, 188, 189]
[2, 71, 87, 114, 126, 130, 160, 163, 174, 189]
[2, 25, 71, 114, 125, 126, 163, 172, 174, 189]
[71, 73, 114, 125, 126, 163, 172, 174, 188, 189]
[71, 73, 114, 125, 126, 163, 172, 174, 177, 189]
[71, 73, 114, 126, 128, 163, 172, 174, 188, 189]
[71, 73, 114, 125, 126, 158, 163, 174, 188, 189]
[26, 71, 73, 114, 126, 158, 172, 174, 188, 189]
[71, 73, 114, 126, 158, 163, 172, 174, 188, 189]
[71, 73, 114, 158, 163, 172, 174, 188, 189, 220]
[71, 73, 114, 128, 158, 163, 172, 174, 188, 189]
[71, 73, 87, 114, 160, 163, 172, 174, 188, 189]
[71, 73, 114, 126, 160, 163, 172, 174, 188, 189]
[71, 73, 87, 114, 126, 127, 172, 174, 188, 189]
[71, 73, 87, 114, 125, 127, 172, 174, 188, 189]
[71, 73, 87, 113, 114, 127, 158, 172, 174, 188]
[71, 73, 87, 114, 158, 172, 174, 183, 188, 189]
[71, 73, 114, 126, 127, 128, 172, 174, 188, 189]
[71, 73, 114, 128, 158, 172, 174, 183, 188, 189]
[71, 73, 114, 126, 158, 163, 172, 174, 188, 189]
[71, 73, 114, 158, 163, 172, 174, 183, 188, 189]
[71, 73, 114, 158, 160, 163, 172, 174, 188, 189]
[71, 73, 114, 158, 160, 163, 172, 174, 188, 189]
[71, 73, 114, 158, 172, 174, 177, 183, 188, 189]
[71, 114, 126, 163, 172, 174, 177, 183, 188, 189]
[71, 114, 160, 172, 174, 177, 183, 188, 189, 220]
[71, 114, 160, 172, 174, 177, 183, 188, 189, 220]
[71, 87, 114, 160, 172, 174, 177, 183, 189, 220]
[71, 114, 160, 172, 174, 177, 183, 188, 189, 220]
[71, 87, 114, 160, 172, 174, 177, 183, 188, 220]
[71, 114, 160, 174, 177, 183, 188, 189, 220, 223]
[71, 114, 160, 163, 174, 177, 183, 188, 189, 220]
[71, 73, 114, 160, 163, 174, 177, 183, 188, 189]
[71, 73, 87, 114, 125, 128, 172, 174, 188, 220]
[73, 87, 114, 125, 128, 158, 172, 174, 177, 220]
[73, 87, 114, 125, 128, 158, 172, 174, 177, 189]
[71, 73, 87, 114, 125, 158, 172, 174, 177, 189]
[71, 73, 87, 114, 125, 158, 172, 174, 188, 189]
[71, 73, 87, 114, 172, 174, 179, 188, 189, 248]
[71, 73, 87, 113, 114, 127, 163, 172, 174, 189]
[25, 71, 73, 113, 114, 128, 163, 172, 174, 188]
[71, 73, 87, 114, 128, 158, 172, 174, 188, 223]
[71, 73, 114, 126, 128, 158, 160, 172, 174, 177]
[71, 114, 126, 127, 128, 160, 172, 174, 188, 189]
[71, 73, 114, 128, 172, 174, 177, 188, 189, 249]
[71, 73, 114, 128, 172, 174, 177, 188, 189, 245]
[71, 73, 114, 160, 172, 174, 177, 179, 188, 189]
[71, 73, 114, 125, 126, 172, 174, 177, 188, 189]
[71, 73, 114, 125, 127, 128, 172, 174, 188, 189]
[71, 73, 114, 127, 128, 160, 172, 174, 188, 189]
[71, 73, 114, 127, 128, 172, 174, 188, 189, 220]
[71, 73, 114, 125, 126, 172, 174, 179, 215, 220]
[73, 114, 125, 126, 158, 160, 172, 174, 183, 188]
[73, 114, 125, 126, 128, 158, 160, 172, 174, 188]
[71, 73, 114, 128, 158, 160, 172, 174, 189, 220]
[71, 73, 114, 127, 128, 158, 160, 163, 172, 174]
[71, 73, 114, 126, 128, 158, 160, 163, 174, 189]
[71, 73, 114, 126, 128, 158, 172, 174, 177, 189]
[71, 73, 114, 126, 128, 160, 172, 174, 188, 189]
[71, 73, 126, 128, 158, 160, 172, 174, 188, 189]
[71, 73, 114, 125, 158, 160, 172, 174, 183, 189]
[71, 73, 114, 117, 125, 160, 172, 174, 183, 189]
[71, 73, 114, 160, 172, 174, 177, 183, 189, 223]
[71, 73, 114, 158, 172, 174, 177, 183, 188, 189]
[71, 73, 114, 158, 172, 174, 183, 188, 189, 249]
[71, 73, 87, 114, 127, 172, 174, 179, 183, 188]
[71, 113, 114, 125, 127, 160, 172, 174, 183, 188]
[71, 73, 113, 114, 125, 127, 172, 174, 188, 189]
[71, 73, 114, 125, 127, 163, 172, 174, 188, 189]
[71, 73, 114, 125, 127, 160, 163, 172, 174, 188]
[73, 87, 114, 125, 130, 160, 163, 172, 174, 188]
[71, 73, 114, 125, 130, 160, 163, 172, 174, 188]
[71, 73, 87, 114, 130, 160, 163, 172, 174, 188]
[71, 87, 114, 160, 163, 172, 174, 177, 188, 189]
[2, 25, 71, 114, 160, 163, 172, 174, 177, 188]
[71, 87, 114, 163, 172, 174, 177, 189, 220, 223]
[71, 87, 114, 160, 163, 172, 174, 177, 188, 220]
[71, 114, 127, 163, 172, 174, 177, 188, 189, 220]
[71, 114, 126, 163, 172, 174, 177, 188, 189, 223]
[71, 114, 125, 126, 158, 163, 172, 174, 177, 189]
[71, 114, 126, 128, 158, 163, 172, 174, 177, 188]
[71, 114, 126, 128, 163, 172, 174, 177, 188, 189]
[71, 114, 126, 128, 160, 163, 172, 174, 177, 189]
[73, 114, 126, 127, 158, 160, 163, 174, 177, 183]
[73, 114, 126, 158, 160, 163, 174, 183, 189, 215]
[71, 73, 87, 114, 158, 160, 163, 174, 183, 223]
[73, 87, 114, 126, 158, 160, 163, 174, 183, 223]
[73, 87, 114, 126, 158, 160, 174, 183, 188, 189]
[26, 71, 73, 87, 114, 158, 174, 183, 188, 189]
[71, 73, 87, 114, 127, 158, 172, 174, 177, 183]
[71, 73, 114, 125, 126, 158, 172, 174, 183, 189]
[71, 73, 114, 125, 126, 158, 174, 177, 183, 188]
[26, 71, 113, 114, 125, 128, 158, 172, 174, 188]
[71, 113, 114, 125, 158, 172, 174, 188, 189, 215]
[71, 114, 125, 128, 158, 160, 172, 174, 189, 215]
[71, 114, 127, 158, 160, 172, 174, 183, 189, 215]
[71, 73, 114, 158, 163, 172, 174, 183, 189, 249]
[71, 73, 114, 158, 160, 163, 172, 174, 183, 189]
[71, 73, 114, 126, 158, 163, 172, 174, 183, 189]
[71, 114, 126, 158, 160, 163, 172, 174, 183, 189]
[71, 114, 126, 158, 160, 163, 172, 174, 183, 189]
[25, 71, 87, 114, 158, 160, 172, 174, 183, 188]
[71, 87, 114, 158, 160, 174, 177, 183, 188, 220]
[71, 114, 125, 158, 160, 163, 174, 183, 188, 189]
[71, 114, 126, 128, 160, 163, 172, 174, 188, 189]
[71, 114, 126, 128, 158, 160, 163, 172, 174, 189]
[114, 126, 128, 158, 160, 163, 172, 174, 188, 189]
[71, 114, 126, 160, 163, 172, 174, 183, 189, 220]
[71, 114, 126, 127, 160, 163, 172, 174, 188, 189]
[25, 71, 73, 87, 114, 127, 160, 172, 174, 189]
[71, 73, 87, 114, 127, 160, 172, 174, 189, 220]
[71, 73, 114, 127, 160, 163, 172, 174, 188, 189]
[71, 73, 114, 127, 160, 163, 172, 174, 188, 189]
[71, 73, 87, 114, 127, 160, 172, 174, 183, 189]
[71, 73, 87, 114, 127, 172, 174, 183, 188, 189]
[26, 71, 73, 87, 114, 172, 174, 183, 188, 189]
[71, 73, 114, 117, 172, 174, 183, 188, 189, 249]
[71, 73, 87, 114, 171, 172, 174, 179, 189, 245]
[71, 73, 87, 114, 160, 163, 172, 174, 189, 248]
[71, 73, 87, 114, 160, 172, 174, 215, 220, 249]
[26, 71, 114, 160, 163, 172, 174, 183, 215, 220]
[71, 87, 114, 127, 160, 163, 172, 174, 183, 220]
[71, 114, 125, 127, 163, 172, 174, 189, 220, 223]
[71, 114, 125, 127, 163, 172, 174, 177, 220, 223]
[71, 114, 158, 160, 163, 172, 174, 177, 183, 220]
[71, 114, 158, 160, 163, 172, 174, 177, 183, 188]
[71, 87, 114, 158, 163, 172, 174, 177, 183, 220]
[71, 87, 114, 127, 130, 163, 174, 183, 188, 215]
[71, 73, 114, 160, 163, 172, 174, 183, 188, 189]
[26, 71, 73, 114, 160, 172, 174, 183, 188, 189]
[2, 71, 73, 114, 160, 163, 172, 174, 188, 189]
[2, 71, 73, 114, 160, 172, 174, 188, 189, 220]
[2, 26, 71, 73, 114, 117, 160, 172, 174, 220]"""
a = a.split("\n")
a = [eval(i) for i in a]
b = [j for i in a for j in i ]
from collections import Counter
d = Counter(b)
for i in d:
    d[i] /= len(a)

for i in d:
    print(i,d[i],simulator.match_probability_list[simulator.agent_idx][i])

114 0.992 0.08468468468468468
125 0.268 0.04767123287671233
160 0.52 0.06261682242990654
163 0.544 0.06261682242990654
172 0.876 0.09050301029326083
174 1.0 0.17872523686477174
183 0.396 0.04389420371412493
188 0.672 0.06734867860187553
220 0.228 0.04
223 0.08 0.03711790393013101
126 0.376 0.06463595839524518
158 0.376 0.049248747913188645
189 0.676 0.06734867860187553
71 0.884 0.05738476011288805
127 0.256 0.06463595839524518
87 0.268 0.039603960396039604
73 0.628 0.05024711696869852
130 0.056 0.03674540682414698
113 0.072 0.03568627450980392
245 0.02 0.027651268705270005
249 0.028 0.027651268705270005
128 0.24 0.06463595839524518
177 0.232 0.04332129963898917
215 0.064 0.035830618892508145
247 0.008 0.027651268705270005
2 0.064 0.03551609322974473
25 0.04 0.035337879727216366
179 0.052 0.03365114974761638
248 0.02 0.027651268705270005
26 0.036 0.035337879727216366
117 0.024 0.02804878048780488
171 0.004 0.02614938743248584


In [12]:
if n_arms * volunteers_per_arm <= 4:
    policy = q_iteration_policy
    per_epoch_function = q_iteration_custom_epoch()
    name = "optimal"

    rewards, memory, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function=per_epoch_function,test_length=episode_len*(n_episodes%50))
    results['{}_reward'.format(name)] = rewards['reward']
    results['{}_match'.format(name)] =  rewards['match'] 
    results['{}_active'.format(name)] = rewards['active_rate']
    results['{}_time'.format(name)] =  rewards['time']
    print(np.mean(rewards['reward']))

cohort [40 88 42 87]
instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
Took 0.05080914497375488 time for inference and 5.8581366539001465 time for training
13.572425195709457


In [11]:
if n_arms * volunteers_per_arm <= 1000:
    policy = shapley_whittle_custom_policy 
    name = "shapley_whittle_custom"

    rewards, memory, simulator = run_multi_seed(seed_list,policy,results['parameters'],test_length=episode_len*(n_episodes%50),shapley_iterations=1000)
    results['{}_reward'.format(name)] = rewards['reward']
    results['{}_match'.format(name)] =  rewards['match'] 
    results['{}_active'.format(name)] = rewards['active_rate']
    results['{}_time'.format(name)] =  rewards['time']
    print(np.mean(rewards['reward']))

acting should always be good! (0, 1) 0.108 < 0.183
good start state should always be good! 0.380 < 0.508
good start state should always be good! 0.506 < 0.760
cohort [80  1 98 86 89 61 50 66 18 49 17  5 40 73 23 20 24 44 32 54]
instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
Took 0.14577889442443848 time for inference and 0.1260237693786621 time for training
3.1664266867321205


In [16]:
if n_arms * volunteers_per_arm <= 25:
    policy = mcts_linear_policy
    name = "mcts_linear"

    rewards, memory, simulator = run_multi_seed(seed_list,policy,results['parameters'],test_length=episode_len*(n_episodes%50),mcts_test_iterations=400)
    results['{}_reward'.format(name)] = rewards['reward']
    results['{}_match'.format(name)] =  rewards['match'] 
    results['{}_active'.format(name)] = rewards['active_rate']
    results['{}_time'.format(name)] =  rewards['time']
    print(np.mean(rewards['reward']))


cohort [82  9 66 53]


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  choices_weights = [(c.q() / c.n()) + c_param * np.sqrt((2 * np.log(self.n()) / c.n())) for c in self.children]
  choices_weights = [(c.q() / c.n()) + c_param * np.sqrt((2 * np.log(self.n()) / c.n())) for c in self.children]


instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
Took 15.201551914215088 time for inference and 0.12212157249450684 time for training
5.75516038777406


In [51]:
if n_arms * volunteers_per_arm <= 25:
    policy = mcts_shapley_policy
    name = "mcts_shapley"

    rewards, memory, simulator = run_multi_seed(seed_list,policy,results['parameters'],test_length=episode_len*(n_episodes%50),mcts_test_iterations=400)
    results['{}_reward'.format(name)] = rewards['reward']
    results['{}_match'.format(name)] =  rewards['match'] 
    results['{}_active'.format(name)] = rewards['active_rate']
    results['{}_time'.format(name)] =  rewards['time']
    print(np.mean(rewards['reward']))


cohort [ 6 19 10  7  1  9 12 16 17  8 14  0 13  4  5  3 18 11  2 15]
instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
instance 0, ep 5
instance 0, ep 6
instance 0, ep 7
instance 0, ep 8
instance 0, ep 9
instance 0, ep 10
instance 0, ep 11
instance 0, ep 12
instance 0, ep 13
instance 0, ep 14
instance 0, ep 15
instance 0, ep 16
instance 0, ep 17
instance 0, ep 18
instance 0, ep 19
instance 0, ep 20
instance 0, ep 21
instance 0, ep 22
instance 0, ep 23
instance 0, ep 24
instance 0, ep 25
instance 0, ep 26
instance 0, ep 27
instance 0, ep 28
instance 0, ep 29
instance 0, ep 30
instance 0, ep 31
instance 0, ep 32
instance 0, ep 33
instance 0, ep 34
instance 0, ep 35
instance 0, ep 36
instance 0, ep 37
instance 0, ep 38
instance 0, ep 39
instance 0, ep 40
instance 0, ep 41
instance 0, ep 42
instance 0, ep 43
instance 0, ep 44
instance 0, ep 45
instance 0, ep 46
instance 0, ep 47
instance 0, ep 48
instance 0, ep 49
instance 0, ep 50
instance 0, ep 51
instance 0, ep 52
inst

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  choices_weights = [(c.q() / c.n()) + c_param * np.sqrt((2 * np.log(self.n()) / c.n())) for c in self.children]
  choices_weights = [(c.q() / c.n()) + c_param * np.sqrt((2 * np.log(self.n()) / c.n())) for c in self.children]


instance 0, ep 101
instance 0, ep 102
instance 0, ep 103
instance 0, ep 104
Took 193.34545016288757 time for inference and 2.6214048862457275 time for training
6.625318690059936


In [18]:
if n_arms * volunteers_per_arm <= 25:
    policy = mcts_shapley_policy
    name = "mcts_shapley_40"

    rewards, memory, simulator = run_multi_seed(seed_list,policy,results['parameters'],test_length=episode_len*(n_episodes%50),mcts_test_iterations=40)
    results['{}_reward'.format(name)] = rewards['reward']
    results['{}_match'.format(name)] =  rewards['match'] 
    results['{}_active'.format(name)] = rewards['active_rate']
    results['{}_time'.format(name)] =  rewards['time']
    print(np.mean(rewards['reward']))


cohort [82  9 66 53]
instance 0, ep 1
instance 0, ep 2


KeyboardInterrupt: 

In [None]:
if n_arms * volunteers_per_arm <= 25:
    policy = mcts_shapley_policy
    name = "mcts_shapley_4"

    rewards, memory, simulator = run_multi_seed(seed_list,policy,results['parameters'],test_length=episode_len*(n_episodes%50),mcts_test_iterations=4)
    results['{}_reward'.format(name)] = rewards['reward']
    results['{}_match'.format(name)] =  rewards['match'] 
    results['{}_active'.format(name)] = rewards['active_rate']
    results['{}_time'.format(name)] =  rewards['time']
    print(np.mean(rewards['reward']))


cohort [61 54 87 93]
instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
instance 0, ep 5
instance 0, ep 6
instance 0, ep 7
instance 0, ep 8
instance 0, ep 9
instance 0, ep 10
instance 0, ep 11
instance 0, ep 12
instance 0, ep 13
instance 0, ep 14
instance 0, ep 15
instance 0, ep 16
instance 0, ep 17
instance 0, ep 18
instance 0, ep 19
instance 0, ep 20
instance 0, ep 21
instance 0, ep 22
instance 0, ep 23
instance 0, ep 24
instance 0, ep 25
instance 0, ep 26
instance 0, ep 27
instance 0, ep 28
instance 0, ep 29
instance 0, ep 30
instance 0, ep 31
instance 0, ep 32
instance 0, ep 33
instance 0, ep 34
instance 0, ep 35
instance 0, ep 36
instance 0, ep 37
instance 0, ep 38
instance 0, ep 39
instance 0, ep 40
instance 0, ep 41
instance 0, ep 42
instance 0, ep 43
instance 0, ep 44
instance 0, ep 45
instance 0, ep 46
instance 0, ep 47
instance 0, ep 48
instance 0, ep 49
instance 0, ep 50
instance 0, ep 51
instance 0, ep 52
instance 0, ep 53
instance 0, ep 54
instance 0, ep 5

In [None]:
if n_arms * volunteers_per_arm <= 250:
    policy = whittle_iterative_policy
    name = "iterative_whittle"

    rewards, memory, simulator = run_multi_seed(seed_list,policy,results['parameters'],test_length=episode_len*(n_episodes%50))
    results['{}_reward'.format(name)] = rewards['reward']
    results['{}_match'.format(name)] =  rewards['match'] 
    results['{}_active'.format(name)] = rewards['active_rate']
    results['{}_time'.format(name)] =  rewards['time']
    print(np.mean(rewards['reward']))

cohort [61 54 87 93]
instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
instance 0, ep 5
instance 0, ep 6
instance 0, ep 7
instance 0, ep 8
instance 0, ep 9
instance 0, ep 10
instance 0, ep 11
instance 0, ep 12
instance 0, ep 13
instance 0, ep 14
instance 0, ep 15
instance 0, ep 16
instance 0, ep 17
instance 0, ep 18
instance 0, ep 19
instance 0, ep 20
instance 0, ep 21
instance 0, ep 22
instance 0, ep 23
instance 0, ep 24
instance 0, ep 25
instance 0, ep 26
instance 0, ep 27
instance 0, ep 28
instance 0, ep 29
instance 0, ep 30
instance 0, ep 31
instance 0, ep 32
instance 0, ep 33
instance 0, ep 34
instance 0, ep 35
instance 0, ep 36
instance 0, ep 37
instance 0, ep 38
instance 0, ep 39
instance 0, ep 40
instance 0, ep 41
instance 0, ep 42
instance 0, ep 43
instance 0, ep 44
instance 0, ep 45
instance 0, ep 46
instance 0, ep 47
instance 0, ep 48
instance 0, ep 49
instance 0, ep 50
instance 0, ep 51
instance 0, ep 52
instance 0, ep 53
instance 0, ep 54
instance 0, ep 5

In [10]:
if n_arms * volunteers_per_arm <= 25:
    policy = shapley_whittle_iterative_policy
    name = "shapley_iterative_whittle"

    rewards, memory, simulator = run_multi_seed(seed_list,policy,results['parameters'],test_length=episode_len*(n_episodes%50),shapley_iterations=1000)
    results['{}_reward'.format(name)] = rewards['reward']
    results['{}_match'.format(name)] =  rewards['match'] 
    results['{}_active'.format(name)] = rewards['active_rate']
    results['{}_time'.format(name)] =  rewards['time']
    print(np.mean(rewards['reward']))

acting should always be good! (0, 1) 0.108 < 0.183
good start state should always be good! 0.380 < 0.508
good start state should always be good! 0.506 < 0.760
cohort [80  1 98 86 89 61 50 66 18 49 17  5 40 73 23 20 24 44 32 54]


  shapley_indices = shapley_indices / num_by_shapley_index


KeyboardInterrupt: 

In [None]:
if n_arms * volunteers_per_arm <= 50:
    policy = shapley_whittle_iterative_policy
    name = "shapley_iterative_whittle_100"

    rewards, memory, simulator = run_multi_seed(seed_list,policy,results['parameters'],test_length=episode_len*(n_episodes%50),shapley_iterations=100)
    results['{}_reward'.format(name)] = rewards['reward']
    results['{}_match'.format(name)] =  rewards['match'] 
    results['{}_active'.format(name)] = rewards['active_rate']
    results['{}_time'.format(name)] =  rewards['time']
    print(np.mean(rewards['reward']))

cohort [61 54 87 93]
instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
instance 0, ep 5
instance 0, ep 6
instance 0, ep 7
instance 0, ep 8
instance 0, ep 9
instance 0, ep 10
instance 0, ep 11
instance 0, ep 12
instance 0, ep 13
instance 0, ep 14
instance 0, ep 15
instance 0, ep 16
instance 0, ep 17
instance 0, ep 18
instance 0, ep 19
instance 0, ep 20
instance 0, ep 21
instance 0, ep 22
instance 0, ep 23
instance 0, ep 24
instance 0, ep 25
instance 0, ep 26
instance 0, ep 27
instance 0, ep 28
instance 0, ep 29
instance 0, ep 30
instance 0, ep 31
instance 0, ep 32
instance 0, ep 33
instance 0, ep 34
instance 0, ep 35
instance 0, ep 36
instance 0, ep 37
instance 0, ep 38
instance 0, ep 39
instance 0, ep 40
instance 0, ep 41
instance 0, ep 42
instance 0, ep 43
instance 0, ep 44
instance 0, ep 45
instance 0, ep 46
instance 0, ep 47
instance 0, ep 48
instance 0, ep 49
instance 0, ep 50
instance 0, ep 51
instance 0, ep 52
instance 0, ep 53
instance 0, ep 54
instance 0, ep 5

In [None]:
if n_arms * volunteers_per_arm <= 50:
    policy = shapley_whittle_iterative_policy
    name = "shapley_iterative_whittle_10"

    rewards, memory, simulator = run_multi_seed(seed_list,policy,results['parameters'],test_length=episode_len,shapley_iterations=10)
    results['{}_reward'.format(name)] = rewards['reward']
    results['{}_match'.format(name)] =  rewards['match'] 
    results['{}_active'.format(name)] = rewards['active_rate']
    results['{}_time'.format(name)] =  rewards['time']
    print(np.mean(rewards['reward']))

cohort [61 54 87 93]
instance 0, ep 1
instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
instance 0, ep 5
instance 0, ep 6
instance 0, ep 7
instance 0, ep 8
instance 0, ep 9
instance 0, ep 10
instance 0, ep 11
instance 0, ep 12
instance 0, ep 13
instance 0, ep 14
instance 0, ep 15
instance 0, ep 16
instance 0, ep 17
instance 0, ep 18
instance 0, ep 19
instance 0, ep 20
instance 0, ep 21
instance 0, ep 22
instance 0, ep 23
instance 0, ep 24
instance 0, ep 25
instance 0, ep 26
instance 0, ep 27
instance 0, ep 28
instance 0, ep 29
instance 0, ep 30
instance 0, ep 31
instance 0, ep 32
instance 0, ep 33
instance 0, ep 34
instance 0, ep 35
instance 0, ep 36
instance 0, ep 37
instance 0, ep 38
instance 0, ep 39
instance 0, ep 40
instance 0, ep 41
instance 0, ep 42
instance 0, ep 43
instance 0, ep 44
instance 0, ep 45
instance 0, ep 46
instance 0, ep 47
instance 0, ep 48
instance 0, ep 49
instance 0, ep 50
instance 0, ep 51
instance 0, ep 52
instance 0, ep 53
instance 0, ep 54
instance 0, ep 5

In [None]:
if n_arms * volunteers_per_arm <= 50:
    policy = shapley_whittle_iterative_policy
    name = "shapley_iterative_whittle_1"

    rewards, memory, simulator = run_multi_seed(seed_list,policy,results['parameters'],test_length=episode_len*(n_episodes%50),shapley_iterations=1)
    results['{}_reward'.format(name)] = rewards['reward']
    results['{}_match'.format(name)] =  rewards['match'] 
    results['{}_active'.format(name)] = rewards['active_rate']
    results['{}_time'.format(name)] =  rewards['time']
    print(np.mean(rewards['reward']))

cohort [61 54 87 93]
instance 0, ep 1


instance 0, ep 2
instance 0, ep 3
instance 0, ep 4
instance 0, ep 5
instance 0, ep 6
instance 0, ep 7
instance 0, ep 8
instance 0, ep 9
instance 0, ep 10
instance 0, ep 11
instance 0, ep 12
instance 0, ep 13
instance 0, ep 14
instance 0, ep 15
instance 0, ep 16
instance 0, ep 17
instance 0, ep 18
instance 0, ep 19
instance 0, ep 20
instance 0, ep 21
instance 0, ep 22
instance 0, ep 23
instance 0, ep 24
instance 0, ep 25
instance 0, ep 26
instance 0, ep 27
instance 0, ep 28
instance 0, ep 29
instance 0, ep 30
instance 0, ep 31
instance 0, ep 32
instance 0, ep 33
instance 0, ep 34
instance 0, ep 35
instance 0, ep 36
instance 0, ep 37
instance 0, ep 38
instance 0, ep 39
instance 0, ep 40
instance 0, ep 41
instance 0, ep 42
instance 0, ep 43
instance 0, ep 44
instance 0, ep 45
instance 0, ep 46
instance 0, ep 47
instance 0, ep 48
instance 0, ep 49
instance 0, ep 50
instance 0, ep 51
instance 0, ep 52
instance 0, ep 53
instance 0, ep 54
instance 0, ep 55
instance 0, ep 56
instance 0, ep 57


  shapley_indices /= num_by_shapley_index


## Write Data

In [None]:
save_path = get_save_path(out_folder,save_name,seed,use_date=save_with_date)

In [None]:
delete_duplicate_results(out_folder,"",results)

In [None]:
json.dump(results,open('../../results/'+save_path,'w'))