In [64]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [65]:
import numpy as np
import random 
import matplotlib.pyplot as plt
import argparse
import secrets
import json
import sys
import math 

In [66]:
from patient.simulator import run_multi_seed
from patient.baseline_policies import *
from patient.lp_policies import *
from patient.group_based_policies import *
from patient.ordering_policies import *
from patient.provider_policies import *
from patient.utils import get_save_path, delete_duplicate_results, restrict_resources, one_shot_policy, MyEncoder

In [67]:
is_jupyter = 'ipykernel' in sys.modules

In [96]:
if is_jupyter: 
    seed        = 43
    num_patients = 1225
    num_providers = 700
    provider_capacity = 1
    top_choice_prob = 0.9
    true_top_choice_prob = 0.9
    choice_model = "uniform_choice"
    exit_option = 0.5
    utility_function = "semi_synthetic"
    out_folder = "policy_comparison"
    num_repetitions = 1
    num_trials = 100
    context_dim = 5
    max_menu_size = 1000
    previous_patients_per_provider = 10
    batch_size = 1
    order="custom"
    fairness_weight=0.5
else:
    parser = argparse.ArgumentParser()
    parser.add_argument('--seed', help='Random Seed', type=int, default=42)
    parser.add_argument('--n_patients',         '-N', help='Number of patients', type=int, default=100)
    parser.add_argument('--n_providers',        help='Number of providers', type=int, default=100)
    parser.add_argument('--batch_size',        help='Batch Size', type=int, default=1)
    parser.add_argument('--n_trials',          help='Number of trials ', type=int, default=100)
    parser.add_argument('--top_choice_prob',          help='Probability of picking top choice', type=float, default=0.75)
    parser.add_argument('--true_top_choice_prob',          help='Probability of picking top choice', type=float, default=0.75)
    parser.add_argument('--context_dim',          help='Context dim for patients and providers', type=int, default=5)
    parser.add_argument('--max_menu_size',          help='Context dim for patients and providers', type=int, default=50)
    parser.add_argument('--num_repetitions',          help='Context dim for patients and providers', type=int, default=1)
    parser.add_argument('--previous_patients_per_provider',          help='Context dim for patients and providers', type=int, default=10)
    parser.add_argument('--provider_capacity', help='Provider Capacity', type=int, default=5)
    parser.add_argument('--choice_model', help='Which choice model for patients', type=str, default='uniform_choice')
    parser.add_argument('--exit_option', help='What is the value of the exit option', type=float, default=0.5)
    parser.add_argument('--out_folder', help='Which folder to write results to', type=str, default='policy_comparison')
    parser.add_argument('--utility_function', help='Which folder to write results to', type=str, default='uniform')
    parser.add_argument('--order', help='Which folder to write results to', type=str, default='custom')
    parser.add_argument('--fairness_weight', help='How much to weight fairness', type=float, default=0)

    args = parser.parse_args()

    seed = args.seed
    num_patients = args.n_patients
    num_providers = args.n_providers 
    provider_capacity = args.provider_capacity
    top_choice_prob = args.top_choice_prob
    choice_model = args.choice_model
    exit_option = args.exit_option
    out_folder = args.out_folder
    num_trials = args.n_trials 
    context_dim = args.context_dim 
    num_repetitions = args.num_repetitions
    true_top_choice_prob = args.true_top_choice_prob
    max_menu_size = args.max_menu_size
    utility_function = args.utility_function
    order = args.order
    previous_patients_per_provider = args.previous_patients_per_provider
    batch_size = args.batch_size
    fairness_weight=args.fairness_weight

save_name = secrets.token_hex(4)  

In [97]:
results = {}
results['parameters'] = {'seed'      : seed,
        'num_patients'    : num_patients,
        'num_providers': num_providers, 
        'provider_capacity'    : provider_capacity,
        'top_choice_prob': top_choice_prob, 
        'choice_model': choice_model,
        'exit_option': exit_option,
        'num_trials': num_trials,
        'context_dim': context_dim, 
        'true_top_choice_prob': true_top_choice_prob, 
        'num_repetitions': num_repetitions, 
        'max_menu_size': max_menu_size, 
        'utility_function': utility_function, 
        'order': order, 
        'previous_patients_per_provider': previous_patients_per_provider, 
        'batch_size': batch_size, 
        'fairness_weight': fairness_weight} 

In [98]:
results['parameters']

{'seed': 43,
 'num_patients': 1225,
 'num_providers': 700,
 'provider_capacity': 1,
 'top_choice_prob': 0.9,
 'choice_model': 'uniform_choice',
 'exit_option': 0.5,
 'num_trials': 100,
 'context_dim': 5,
 'true_top_choice_prob': 0.9,
 'num_repetitions': 1,
 'max_menu_size': 1000,
 'utility_function': 'semi_synthetic',
 'order': 'custom',
 'previous_patients_per_provider': 10,
 'batch_size': 1,
 'fairness_weight': 0.5}

## Baselines

In [84]:
seed_list = [seed]
restrict_resources()

In [108]:
policy = one_shot_policy
per_epoch_function = random_policy
name = "random"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

print(np.sum(rewards['matches'])/(num_patients*num_repetitions*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_repetitions*num_trials*len(seed_list)))

random policy
Took 20.785586833953857 time
0.5714285714285714 0.4405741676029295


In [99]:
policy = one_shot_policy
per_epoch_function = all_ones_policy
name = "greedy_basic"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

print(np.mean(results['{}_minimums_all'.format(name)]),np.mean(results['{}_gaps_all'.format(name)]),np.mean(results['{}_variance_all'.format(name)]))

greedy_basic policy
On trial 0
On trial 1
On trial 2
On trial 3
On trial 4
On trial 5
On trial 6
On trial 7
On trial 8
On trial 9
On trial 10
On trial 11
On trial 12
On trial 13
On trial 14
On trial 15
On trial 16
On trial 17
On trial 18
On trial 19
On trial 20
On trial 21
On trial 22
On trial 23
On trial 24
On trial 25
On trial 26
On trial 27
On trial 28
On trial 29
On trial 30
On trial 31
On trial 32
On trial 33
On trial 34
On trial 35
On trial 36
On trial 37
On trial 38
On trial 39
On trial 40
On trial 41
On trial 42
On trial 43
On trial 44
On trial 45
On trial 46
On trial 47
On trial 48
On trial 49
On trial 50
On trial 51
On trial 52
On trial 53
On trial 54
On trial 55
On trial 56
On trial 57
On trial 58
On trial 59
On trial 60
On trial 61
On trial 62
On trial 63
On trial 64
On trial 65
On trial 66
On trial 67
On trial 68
On trial 69
On trial 70
On trial 71
On trial 72
On trial 73
On trial 74
On trial 75
On trial 76
On trial 77
On trial 78
On trial 79
On trial 80
On trial 81
On tri

In [106]:
policy = one_shot_policy
per_epoch_function = greedy_policy

name = "greedy"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

print(np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list)))

greedy policy
On trial 0
On trial 1
On trial 2
On trial 3
On trial 4
On trial 5
On trial 6
On trial 7
On trial 8
On trial 9
On trial 10
On trial 11
On trial 12
On trial 13
On trial 14
On trial 15
On trial 16
On trial 17
On trial 18
On trial 19
On trial 20
On trial 21
On trial 22
On trial 23
On trial 24
On trial 25
On trial 26
On trial 27
On trial 28
On trial 29
On trial 30
On trial 31
On trial 32
On trial 33
On trial 34
On trial 35
On trial 36
On trial 37
On trial 38
On trial 39
On trial 40
On trial 41
On trial 42
On trial 43
On trial 44
On trial 45
On trial 46
On trial 47
On trial 48
On trial 49
On trial 50
On trial 51
On trial 52
On trial 53
On trial 54
On trial 55
On trial 56
On trial 57
On trial 58
On trial 59
On trial 60
On trial 61
On trial 62
On trial 63
On trial 64
On trial 65
On trial 66
On trial 67
On trial 68
On trial 69
On trial 70
On trial 71
On trial 72
On trial 73
On trial 74
On trial 75
On trial 76
On trial 77
On trial 78
On trial 79
On trial 80
On trial 81
On trial 82


In [None]:
if 2**(num_patients*num_providers)*2**(num_patients)*math.factorial(num_patients) < 4000000:
    policy = one_shot_policy
    per_epoch_function = optimal_policy
    name = "optimal"
    print("{} policy".format(name))

    rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

    results['{}_matches'.format(name)] = rewards['matches']
    results['{}_utilities'.format(name)] = rewards['patient_utilities']
    results['{}_workloads'.format(name)] = rewards['provider_workloads']

    results['{}_minimums'.format(name)] = rewards['provider_minimums']
    results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
    results['{}_gaps'.format(name)] = rewards['provider_gaps']
    results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
    results['{}_variance'.format(name)] = rewards['provider_variance']
    results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
    results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

    print(np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list)))

In [None]:
policy = one_shot_policy
per_epoch_function = optimal_order_policy
name = "optimal_order"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

print(np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list)))

optimal_order policy
Took 0.17142271995544434 time
0.125 0.08528775637924887


## Offline

In [100]:
policy = one_shot_policy
per_epoch_function = lp_policy
name = "lp"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list)),np.max(np.mean(np.array(rewards['final_workloads'])[0],axis=0)), np.sum(rewards['provider_minimums'])/(num_patients*num_trials*len(seed_list))


lp policy
On trial 0
On trial 1
On trial 2
On trial 3
On trial 4
On trial 5
On trial 6
On trial 7
On trial 8
On trial 9
On trial 10
On trial 11
On trial 12
On trial 13
On trial 14
On trial 15
On trial 16
On trial 17
On trial 18
On trial 19
On trial 20
On trial 21
On trial 22
On trial 23
On trial 24
On trial 25
On trial 26
On trial 27
On trial 28
On trial 29
On trial 30
On trial 31
On trial 32
On trial 33
On trial 34
On trial 35
On trial 36
On trial 37
On trial 38
On trial 39
On trial 40
On trial 41
On trial 42
On trial 43
On trial 44
On trial 45
On trial 46
On trial 47
On trial 48
On trial 49
On trial 50
On trial 51
On trial 52
On trial 53
On trial 54
On trial 55
On trial 56
On trial 57
On trial 58
On trial 59
On trial 60
On trial 61
On trial 62
On trial 63
On trial 64
On trial 65
On trial 66
On trial 67
On trial 68
On trial 69
On trial 70
On trial 71
On trial 72
On trial 73
On trial 74
On trial 75
On trial 76
On trial 77
On trial 78
On trial 79
On trial 80
On trial 81
On trial 82
On t

(0.5148, 0.5140814777052688, 7.777426569368572, 0.0005163858589655141)

In [None]:
policy = one_shot_policy
name="lp_fairness"
per_epoch_function = lambda s: lp_fairness_policy(s,weight=fairness_weight)
print("{} policy".format(name))

if fairness_weight > 0:

    rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

    results['{}_matches'.format(name)] = rewards['matches']
    results['{}_utilities'.format(name)] = rewards['patient_utilities']
    results['{}_workloads'.format(name)] = rewards['provider_workloads']

    results['{}_minimums'.format(name)] = rewards['provider_minimums']
    results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
    results['{}_gaps'.format(name)] = rewards['provider_gaps']
    results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
    results['{}_variance'.format(name)] = rewards['provider_variance']
    results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
    results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

    print(fairness_weight,np.sum(rewards['provider_minimums'])/(num_patients*num_trials*len(seed_list)))


lp_fairness policy
Took 0.021974563598632812 time
0.9 0.057651483892302874


In [None]:
if 'semi_synthetic' not in utility_function:
    policy = one_shot_policy
    per_epoch_function = group_based_policy
    name = "group_based"
    print("{} policy".format(name))

    rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

    results['{}_matches'.format(name)] = rewards['matches']
    results['{}_utilities'.format(name)] = rewards['patient_utilities']
    results['{}_workloads'.format(name)] = rewards['provider_workloads']

    results['{}_minimums'.format(name)] = rewards['provider_minimums']
    results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
    results['{}_gaps'.format(name)] = rewards['provider_gaps']
    results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
    results['{}_variance'.format(name)] = rewards['provider_variance']
    results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
    results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

    np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list))

group_based policy
Took 0.13678646087646484 time


(0.1165, 0.11168247812802404)

In [107]:
policy = one_shot_policy 
per_epoch_function = gradient_descent_policy_fast
name = "gradient_descent_fast"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]
results['{}_matches_per'.format(name)] = rewards['matches_per']

print(np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list)))

gradient_descent_fast policy
On trial 0
On trial 1
On trial 2
On trial 3
On trial 4
On trial 5
On trial 6
On trial 7
On trial 8
On trial 9
On trial 10
On trial 11
On trial 12
On trial 13
On trial 14
On trial 15
On trial 16
On trial 17
On trial 18
On trial 19
On trial 20
On trial 21
On trial 22
On trial 23
On trial 24
On trial 25
On trial 26
On trial 27
On trial 28
On trial 29
On trial 30
On trial 31
On trial 32
On trial 33
On trial 34
On trial 35
On trial 36
On trial 37
On trial 38
On trial 39
On trial 40
On trial 41
On trial 42
On trial 43
On trial 44
On trial 45
On trial 46
On trial 47
On trial 48
On trial 49
On trial 50
On trial 51
On trial 52
On trial 53
On trial 54
On trial 55
On trial 56
On trial 57
On trial 58
On trial 59
On trial 60
On trial 61
On trial 62
On trial 63
On trial 64
On trial 65
On trial 66
On trial 67
On trial 68
On trial 69
On trial 70
On trial 71
On trial 72
On trial 73
On trial 74
On trial 75
On trial 76
On trial 77
On trial 78
On trial 79
On trial 80
On trial 

  
  if len(np.array(utilities_by_provider).shape) == 3:


## Save Data

In [None]:
save_path = get_save_path(out_folder,save_name)

In [None]:
delete_duplicate_results(out_folder,"",results)

In [None]:
json.dump(results,open('../../results/'+save_path,'w'),cls=MyEncoder)