In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import random 
import matplotlib.pyplot as plt
import argparse
import secrets
import json
import sys
import math 

In [3]:
from patient.simulator import run_multi_seed
from patient.baseline_policies import *
from patient.lp_policies import *
from patient.group_based_policies import *
from patient.ordering_policies import *
from patient.provider_policies import *
from patient.utils import get_save_path, delete_duplicate_results, restrict_resources, one_shot_policy, MyEncoder

In [4]:
is_jupyter = 'ipykernel' in sys.modules

In [141]:
if is_jupyter: 
    seed        = 43
    num_patients = 10
    num_providers = 10
    provider_capacity = 1
    top_choice_prob = 1
    true_top_choice_prob = 1
    choice_model = "uniform_choice"
    exit_option = 0.5
    utility_function = "normal"
    out_folder = "policy_comparison"
    num_repetitions = 1
    num_trials = 100
    context_dim = 5
    max_menu_size = 25
    previous_patients_per_provider = 10
    order="custom"
else:
    parser = argparse.ArgumentParser()
    parser.add_argument('--seed', help='Random Seed', type=int, default=42)
    parser.add_argument('--n_patients',         '-N', help='Number of patients', type=int, default=100)
    parser.add_argument('--n_providers',        help='Number of providers', type=int, default=100)
    parser.add_argument('--n_trials',          help='Number of trials ', type=int, default=2)
    parser.add_argument('--top_choice_prob',          help='Probability of picking top choice', type=float, default=0.75)
    parser.add_argument('--true_top_choice_prob',          help='Probability of picking top choice', type=float, default=0.75)
    parser.add_argument('--context_dim',          help='Context dim for patients and providers', type=int, default=5)
    parser.add_argument('--max_menu_size',          help='Context dim for patients and providers', type=int, default=50)
    parser.add_argument('--num_repetitions',          help='Context dim for patients and providers', type=int, default=1)
    parser.add_argument('--previous_patients_per_provider',          help='Context dim for patients and providers', type=int, default=10)
    parser.add_argument('--provider_capacity', help='Provider Capacity', type=int, default=5)
    parser.add_argument('--choice_model', help='Which choice model for patients', type=str, default='uniform_choice')
    parser.add_argument('--exit_option', help='What is the value of the exit option', type=float, default=0.5)
    parser.add_argument('--out_folder', help='Which folder to write results to', type=str, default='policy_comparison')
    parser.add_argument('--utility_function', help='Which folder to write results to', type=str, default='uniform')
    parser.add_argument('--order', help='Which folder to write results to', type=str, default='random')

    args = parser.parse_args()

    seed = args.seed
    num_patients = args.n_patients
    num_providers = args.n_providers 
    provider_capacity = args.provider_capacity
    top_choice_prob = args.top_choice_prob
    choice_model = args.choice_model
    exit_option = args.exit_option
    out_folder = args.out_folder
    num_trials = args.n_trials 
    context_dim = args.context_dim 
    num_repetitions = args.num_repetitions
    true_top_choice_prob = args.true_top_choice_prob
    max_menu_size = args.max_menu_size
    utility_function = args.utility_function
    order = args.order
    previous_patients_per_provider = args.previous_patients_per_provider

save_name = secrets.token_hex(4)  

In [142]:
results = {}
results['parameters'] = {'seed'      : seed,
        'num_patients'    : num_patients,
        'num_providers': num_providers, 
        'provider_capacity'    : provider_capacity,
        'top_choice_prob': top_choice_prob, 
        'choice_model': choice_model,
        'exit_option': exit_option,
        'num_trials': num_trials,
        'context_dim': context_dim, 
        'true_top_choice_prob': true_top_choice_prob, 
        'num_repetitions': num_repetitions, 
        'max_menu_size': max_menu_size, 
        'utility_function': utility_function, 
        'order': order, 
        'previous_patients_per_provider': previous_patients_per_provider} 

## Baselines

In [143]:
seed_list = [seed]
restrict_resources()

In [144]:
policy = random_policy
name = "random"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'])

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

np.sum(rewards['matches'])/(num_patients*num_repetitions*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_repetitions*num_trials*len(seed_list))

random policy
Took 0.04769539833068848 time


  if len(np.array(utilities_by_provider).shape) == 3:
  provider_workloads = [sum([len(j) for j in i])/len(i) for i in np.array(utilities_by_provider).T]


(0.918, 0.581633075095095)

In [83]:
policy = all_ones_policy
name = "greedy_basic"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'])

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

np.mean(results['{}_minimums_all'.format(name)]),np.mean(results['{}_gaps_all'.format(name)]),np.mean(results['{}_variance_all'.format(name)])

greedy_basic policy
Took 0.019397258758544922 time


(0.08989149327313839, 0.8866248012580625, 0.12791254660146195)

In [28]:
policy = greedy_policy
name = "greedy"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'])

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list))

greedy policy
Took 0.13260698318481445 time


(0.8944, 0.5635836492112621)

In [29]:
if 2**(num_patients*num_providers)*2**(num_patients)*math.factorial(num_patients) < 100000:
    policy = one_shot_policy
    per_epoch_function = optimal_policy
    name = "optimal"
    print("{} policy".format(name))

    rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

    results['{}_matches'.format(name)] = rewards['matches']
    results['{}_utilities'.format(name)] = rewards['patient_utilities']
    results['{}_workloads'.format(name)] = rewards['provider_workloads']

    results['{}_minimums'.format(name)] = rewards['provider_minimums']
    results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
    results['{}_gaps'.format(name)] = rewards['provider_gaps']
    results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
    results['{}_variance'.format(name)] = rewards['provider_variance']
    results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
    results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

    print(np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list)))

In [30]:
policy = one_shot_policy
per_epoch_function = optimal_order_policy
name = "optimal_order"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

print(np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list)))

optimal_order policy
Took 0.13132524490356445 time
0.8984 0.577863306442405


## Offline

In [31]:
policy = one_shot_policy
per_epoch_function = lp_policy
name = "lp"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list)),np.max(np.mean(np.array(rewards['final_workloads'])[0],axis=0))

lp policy
Took 0.12238168716430664 time


(0.8944, 0.5977416208999422, 7.497791691877962)

In [15]:
policy = one_shot_policy
per_epoch_function = lp_workload_policy
name = "lp_workload"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list)),np.max(np.mean(np.array(rewards['final_workloads'])[0],axis=0)),np.max(np.mean(np.array(rewards['final_workloads'])[0],axis=0))

lp_workload policy
Took 0.5431537628173828 time


(0.0, 0.0, 6.892960645691221, 6.892960645691221)

In [21]:
policy = one_shot_policy
per_epoch_function = lp_multiple_match_policy
name = "lp_multiple_match"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list))

lp_multiple_match policy
Took 0.09562230110168457 time


(0.2536, 0.23916547111746667)

In [16]:
if choice_model == 'threshold':
    policy = one_shot_policy 
    per_epoch_function = lp_threshold
    name = "lp_threshold"
    print("{} policy".format(name))

    rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

    results['{}_matches'.format(name)] = rewards['matches']
    results['{}_utilities'.format(name)] = rewards['patient_utilities']
    results['{}_workloads'.format(name)] = rewards['provider_workloads']

    results['{}_minimums'.format(name)] = rewards['provider_minimums']
    results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
    results['{}_gaps'.format(name)] = rewards['provider_gaps']
    results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
    results['{}_variance'.format(name)] = rewards['provider_variance']
    results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
    results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

    print(np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list)))

In [17]:
policy = one_shot_policy 
per_epoch_function = lp_more_patients_policy
name = "lp_more_patients"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list))

lp_more_patients policy
Took 0.6590728759765625 time


(0.7636, 0.5261014333470473)

In [18]:
policy = one_shot_policy
per_epoch_function = lp_fairness_policy
name = "lp_fairness"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list))

lp_fairness policy
Took 0.5672309398651123 time


(0.7636, 0.4479570155876951)

In [19]:
policy = one_shot_policy
per_epoch_function = group_based_policy
name = "group_based"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list))

group_based policy
Took 0.715339183807373 time


(0.7636, 0.5261014333470473)

In [20]:
policy = one_shot_policy
per_epoch_function = group_based_unidirectional_policy
name = "group_based_unidirectional"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list))

group_based_unidirectional policy
Took 0.7381973266601562 time


(0.7504, 0.5187739882075151)

In [22]:
policy = one_shot_policy 
per_epoch_function = provider_focused_policy
name = "provider_focused"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list))

provider_focused policy
B 3, Per provider [3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3.
 3.], Per Patient [1. 4. 1. 3. 4. 4. 3. 4. 2. 5. 1. 1. 2. 5. 2. 2. 3. 2. 4. 4. 5. 5. 1. 4.
 3.]
Took 0.1559138298034668 time


(0.6824, 0.511133744634547)

In [7]:
policy = one_shot_policy 
per_epoch_function = provider_focused_less_interference_policy
name = "provider_focused_less_interference"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list))

provider_focused_less_interference policy


NameError: name 'seed_list' is not defined

In [20]:
policy = one_shot_policy 
for lamb in [0.25,0.5,1,2,4]:
    per_epoch_function = provider_focused_linear_regularization_policy(lamb)
    name = "provider_focused_linear_regularization_{}".format(lamb)
    print("{} policy".format(name))

    rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

    results['{}_matches'.format(name)] = rewards['matches']
    results['{}_utilities'.format(name)] = rewards['patient_utilities']
    results['{}_workloads'.format(name)] = rewards['provider_workloads']

    results['{}_minimums'.format(name)] = rewards['provider_minimums']
    results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
    results['{}_gaps'.format(name)] = rewards['provider_gaps']
    results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
    results['{}_variance'.format(name)] = rewards['provider_variance']
    results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
    results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

    print(lamb,np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list)))

provider_focused_linear_regularization_0.25 policy
1 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1.] [0. 1. 0. 0. 3. 1. 2. 0. 1. 1. 0. 0. 1. 3. 1. 1. 1. 0. 2. 0. 2. 1. 0. 2.
 2.]
Took 0.10905957221984863 time
0.25 0.4896 0.37463299843460773
provider_focused_linear_regularization_0.5 policy
1 [1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 0. 1. 0. 1. 1. 1.
 1.] [0. 1. 1. 0. 1. 1. 2. 1. 0. 1. 0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 2. 1. 0. 1.
 2.]
Took 0.13837814331054688 time
0.5 0.49 0.39508399837446667
provider_focused_linear_regularization_1 policy
1 [0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 1. 1. 1. 0. 0. 0. 1. 0. 1. 1. 0.
 1.] [0. 0. 0. 0. 0. 1. 2. 1. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1.
 3.]
Took 0.0995171070098877 time
1 0.2404 0.2243772777589306
provider_focused_linear_regularization_2 policy
1 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.] [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0

In [14]:
# policy = one_shot_policy 
# for lamb in [0,0.1,0.25,0.5]:#,1,2,4]:
#     per_epoch_function = provider_focused_log_regularization_policy(lamb)
#     name = "provider_focused_log_regularization_{}".format(lamb)
#     print("{} policy".format(name))

#     rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

#     results['{}_matches'.format(name)] = rewards['matches']
#     results['{}_utilities'.format(name)] = rewards['patient_utilities']
#     results['{}_workloads'.format(name)] = rewards['provider_workloads']

#     results['{}_minimums'.format(name)] = rewards['provider_minimums']
#     results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
#     results['{}_gaps'.format(name)] = rewards['provider_gaps']
#     results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
#     results['{}_variance'.format(name)] = rewards['provider_variance']
#     results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
#     results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

#     print(lamb,np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list)))

In [9]:
policy = one_shot_policy 
per_epoch_function = gradient_descent_policy
name = "gradient_descent"
print("{} policy".format(name))

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'],per_epoch_function)

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

results['{}_minimums'.format(name)] = rewards['provider_minimums']
results['{}_minimums_all'.format(name)] = rewards['provider_minimums_all']
results['{}_gaps'.format(name)] = rewards['provider_gaps']
results['{}_gaps_all'.format(name)] = rewards['provider_gaps_all']
results['{}_variance'.format(name)] = rewards['provider_variance']
results['{}_variance_all'.format(name)] = rewards['provider_variance_all']
results['{}_workload_diff'.format(name)] = [max(rewards['final_workloads'][0][i])-max(rewards['initial_workloads'][0][i]) for i in range(len(rewards['final_workloads'][0]))]

print(np.sum(rewards['matches'])/(num_patients*num_trials*len(seed_list)),np.sum(rewards['patient_utilities'])/(num_patients*num_trials*len(seed_list)))

gradient_descent policy
Took 0.0078582763671875 time
0.765 0.7498209475184663


In [132]:
def objective(x, theta, p,lamb=1):
    # Compute the sum of x over rows for each column
    sum_x = torch.sum(x, dim=0)  # Shape: (columns,)
    
    # Compute the sum of x across all columns for each row
    row_sums = torch.sum(x, dim=1, keepdim=True)  # Shape: (rows, 1)
    
    # Normalize x by row sums
    normalized_x = (x / torch.maximum(row_sums, torch.tensor(1.0, device=sum_x.device)))  # Avoid division by zero
    
    # Compute numerator for the first term (using normalized x)
    term1_num = (1 - (1 - p) ** sum_x) * torch.sum(normalized_x * theta, dim=0)
    
    # Compute denominator for the first term (using normalized x)
    term1_den = torch.sum(x, dim=0)+1e-5  # Normalize column sum by total row sums
    
    # Smoothly adjust the term using sum_x as a weighting factor
    # smooth_factor = (sum_x /torch.maximum(sum_x, torch.tensor(1.0, device=sum_x.device))) # Smoothly scales from 0 to 1
    
    term1 = 1 * (term1_num / (term1_den+1e-5))
    # Sum over columns and normalize by M
    term1 = torch.sum(term1) / theta.shape[1]

    term1 -= lamb*torch.sum(x*(1-x))

    return term1

In [100]:
def objective(z, theta, p, lamb=1, smooth_reg='entropy', epsilon=1e-5):
    # Reparameterize x using sigmoid
    x = torch.sigmoid(z)  # x is now bounded in [0, 1]
    
    # Compute the sum of x over rows for each column
    sum_x = torch.sum(x, dim=0)  # Shape: (columns,)
    
    # Compute the sum of x across all columns for each row
    row_sums = torch.sum(x, dim=1, keepdim=True)  # Shape: (rows, 1)
    
    # Normalize x by row sums
    normalized_x = x / torch.maximum(row_sums, torch.tensor(1.0, device=sum_x.device))  # Avoid division by zero
    
    # Compute numerator for the first term (using normalized x)
    term1_num = (1 - (1 - p) ** sum_x) * torch.sum(normalized_x * theta, dim=0)
    
    # Compute denominator for the first term (using normalized x)
    term1_den = torch.sum(x, dim=0) + 1e-8  # Avoid division by zero
    term1_den = torch.maximum(term1_den,torch.tensor(1.0, device=sum_x.device))

    # Compute the main term
    term1 = (term1_num / term1_den)
        
    
    term1 = torch.sum(term1) / theta.shape[1]  # Normalize by number of columns



    # Add smooth regularization term
    if smooth_reg == 'logit':
        reg_term = torch.sum(torch.logit(x, eps=epsilon) ** 2)  # Logit-based penalty
    elif smooth_reg == 'entropy':
        reg_term = -torch.sum(x * torch.log(x + epsilon) + (1 - x) * torch.log(1 - x + epsilon))  # Entropy-based penalty
    else:
        raise ValueError("Unsupported regularization: choose 'logit' or 'entropy'")
    
    print("Term 1 {}, lamb {}".format(term1,lamb))
    print("Reg term {}".format(x * torch.log(x + epsilon) + (1 - x) * torch.log(1 - x + epsilon)))
    # Final loss with regularization
    loss = term1 - lamb * reg_term

    print("Reg term {}".format(reg_term))
    
    return loss


In [175]:
def test_gradient_descent(simulator):
    p = simulator.choice_model_settings['top_choice_prob']

    theta = [p.provider_rewards for p in simulator.patients]
    theta = torch.Tensor(theta)
    N = len(simulator.patients)
    M = theta.shape[1]

    lamb = 0
    lamb2 = 0

    best_loss = float('inf')
    best_x = None
    for _ in range(1):  # Run 5 independent optimizations
        x = torch.rand(N, M, requires_grad=True)  # Variables to optimize (not constrained to [0, 1])

        # Optimizer
        optimizer = optim.Adam([x], lr=0.1)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=200, gamma=0.5)

        values_by_loss = []

        # Training loop
        for epoch in range(1000):  # Adjust number of iterations as needed
            optimizer.zero_grad()
            
            if epoch > 200:
                lamb = 0.25
            else:
                lamb = 0
            # Compute the objective
            loss = -objective(x, theta, p,lamb=lamb)

            print("X {}, loss {}".format(x,loss))
            
            # Backpropagation
            loss.backward()
            torch.nn.utils.clip_grad_norm_([x], max_norm=10)

            # Gradient step
            optimizer.step()
            scheduler.step()
            
            # Clip x to enforce constraints
            # with torch.no_grad():
            #     x.clamp_(0, 1)  # Ensure x_{i,j} stays in [0, 1]
            values_by_loss.append((loss.detach(),torch.sigmoid(x).detach()))
        loss_values = [i[0] for i in values_by_loss]
        final_loss = np.min(loss_values)
        print("Loss {}".format(final_loss))
        if final_loss < best_loss:
            best_loss = final_loss
            best_x = values_by_loss[np.argmin(loss_values)][1]
    print("Best loss {}".format(best_loss))
    return ((best_x).detach().numpy())


In [169]:
theta = [p.provider_rewards for p in simulator.patients]
theta = torch.Tensor(theta)
theta

tensor([[0.7225, 0.6581, 0.9364, 0.4725, 0.3365, 0.9076, 0.3150, 0.4793, 0.9375,
         0.0904],
        [0.8381, 0.7274, 0.8825, 0.5129, 0.1084, 1.0000, 0.2919, 0.3870, 0.9331,
         0.0857],
        [1.0000, 0.5406, 0.8920, 0.6158, 0.0449, 0.8151, 0.3776, 0.3419, 0.9095,
         0.0308],
        [0.9192, 0.6188, 1.0000, 0.5839, 0.4209, 1.0000, 0.2674, 0.3736, 1.0000,
         0.0470],
        [0.7711, 0.7241, 1.0000, 0.5725, 0.1649, 0.9143, 0.2715, 0.4689, 0.7236,
         0.0000],
        [1.0000, 0.5708, 0.9881, 0.4770, 0.1797, 0.8299, 0.2128, 0.3730, 1.0000,
         0.1201],
        [0.8821, 0.7085, 1.0000, 0.4989, 0.2984, 0.9931, 0.4374, 0.3854, 0.8939,
         0.0000],
        [0.9078, 0.6863, 0.7930, 0.5520, 0.0000, 0.9860, 0.1616, 0.3985, 0.8697,
         0.2510],
        [1.0000, 0.8042, 1.0000, 0.4749, 0.1648, 0.9854, 0.1399, 0.2984, 0.9512,
         0.1290],
        [1.0000, 0.7499, 0.9906, 0.3541, 0.0377, 0.9957, 0.1135, 0.3419, 0.8217,
         0.1640]])

In [161]:
opt_tensor = torch.Tensor(lp_policy(simulator))
x = torch.Tensor([[0, 0, 1],
       [1, 0, 0],
       [1, 1, 0]])

In [165]:
objective(opt_tensor*10000-10000/2,theta,1,0)

Term 1 num tensor([1.0000, 0.8042, 1.0000, 0.6158, 0.4209, 1.0000, 0.4374, 0.4793, 1.0000,
        0.2510]) term 1 den tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
Term 1 0.7008588910102844, lamb 0
Reg term -0.0010013531427830458


tensor(0.7009)

In [178]:
x = torch.Tensor([[0.00543958, 0.01016342, 0.03320942, 0.02218951, 0.22548756,
        0.02486089, 0.02196584, 0.5690891 , 0.08391582, 0.00445737],
       [0.01757196, 0.21789153, 0.02834556, 0.07177927, 0.00220909,
        0.477677  , 0.01691479, 0.03539304, 0.12520522, 0.00489613],
       [0.21458767, 0.0072533 , 0.01050226, 0.5556463 , 0.00155194,
        0.01044561, 0.15096031, 0.00889995, 0.03931933, 0.00236623],
       [0.01108674, 0.01017485, 0.04443396, 0.06130726, 0.73342377,
        0.03912738, 0.00566547, 0.00488548, 0.07929403, 0.00322256],
       [0.00560649, 0.04872912, 0.34420466, 0.22532572, 0.00450574,
        0.02177885, 0.01385621, 0.32526928, 0.00392036, 0.00343825],
       [0.30042124, 0.00423778, 0.12924159, 0.01110364, 0.00571451,
        0.01021431, 0.00431036, 0.00889621, 0.509201  , 0.00903933],
       [0.0093897 , 0.01876806, 0.08391954, 0.0096648 , 0.02022246,
        0.05111115, 0.7767873 , 0.00855207, 0.01656941, 0.00295823],
       [0.02362274, 0.01194031, 0.00493261, 0.03258845, 0.00149355,
        0.05636407, 0.00228826, 0.01568928, 0.01867069, 0.8327569 ],
       [0.16980298, 0.45016873, 0.13618155, 0.01291505, 0.00351478,
        0.07806238, 0.00235695, 0.00616071, 0.11876746, 0.00973192],
       [0.2456705 , 0.23025295, 0.20153669, 0.0031023 , 0.00286899,
        0.24016902, 0.00360834, 0.00712348, 0.01289737, 0.0225281 ]])
objective(x*10000-10000/2,theta,1,1)

Term 1 num tensor([0.0000, 0.0000, 0.0000, 0.6158, 0.4209, 0.0000, 0.4374, 0.4793, 1.0000,
        0.2510]) term 1 den tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
Term 1 0.32044273614883423, lamb 1
Reg term -0.0010013531427830458


tensor(0.3214)

In [176]:
test_gradient_descent(simulator)

Term 1 num tensor([0.9282, 0.6779, 0.9649, 0.5179, 0.1799, 0.9095, 0.2488, 0.3909, 0.9226,
        0.0934], grad_fn=<MulBackward0>) term 1 den tensor([6.3909, 6.2281, 6.3277, 6.2544, 6.1923, 6.0367, 6.0039, 6.3266, 6.3321,
        6.1373], grad_fn=<MaximumBackward>)
Term 1 0.0933268815279007, lamb 0
Reg term 65.33904266357422
X tensor([[0.6372, 0.0233, 0.3152, 0.9487, 0.4189, 0.4831, 0.1576, 0.4015, 0.3593,
         0.0697],
        [0.8613, 0.8515, 0.8076, 0.7943, 0.0207, 0.1083, 0.1128, 0.7372, 0.2782,
         0.0391],
        [0.7445, 0.9774, 0.2297, 0.5090, 0.0267, 0.9465, 0.3224, 0.8456, 0.8445,
         0.0540],
        [0.2427, 0.9339, 0.4684, 0.7925, 0.4253, 0.3153, 0.4266, 0.1929, 0.2558,
         0.5528],
        [0.5552, 0.5366, 0.9717, 0.7552, 0.7878, 0.4558, 0.9584, 0.6534, 0.2616,
         0.7370],
        [0.8779, 0.1140, 0.6058, 0.3296, 0.9080, 0.6982, 0.3943, 0.2944, 0.6968,
         0.7666],
        [0.3750, 0.5406, 0.6673, 0.2846, 0.9687, 0.3764, 0.6014, 0.5145, 0.5

array([[0.00543958, 0.01016342, 0.03320942, 0.02218951, 0.22548756,
        0.02486089, 0.02196584, 0.5690891 , 0.08391582, 0.00445737],
       [0.01757196, 0.21789153, 0.02834556, 0.07177927, 0.00220909,
        0.477677  , 0.01691479, 0.03539304, 0.12520522, 0.00489613],
       [0.21458767, 0.0072533 , 0.01050226, 0.5556463 , 0.00155194,
        0.01044561, 0.15096031, 0.00889995, 0.03931933, 0.00236623],
       [0.01108674, 0.01017485, 0.04443396, 0.06130726, 0.73342377,
        0.03912738, 0.00566547, 0.00488548, 0.07929403, 0.00322256],
       [0.00560649, 0.04872912, 0.34420466, 0.22532572, 0.00450574,
        0.02177885, 0.01385621, 0.32526928, 0.00392036, 0.00343825],
       [0.30042124, 0.00423778, 0.12924159, 0.01110364, 0.00571451,
        0.01021431, 0.00431036, 0.00889621, 0.509201  , 0.00903933],
       [0.0093897 , 0.01876806, 0.08391954, 0.0096648 , 0.02022246,
        0.05111115, 0.7767873 , 0.00855207, 0.01656941, 0.00295823],
       [0.02362274, 0.01194031, 0.0049326

## Save Data

In [None]:
save_path = get_save_path(out_folder,save_name)

In [None]:
delete_duplicate_results(out_folder,"",results)

In [None]:
json.dump(results,open('../../results/'+save_path,'w'),cls=MyEncoder)