In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('/usr0/home/naveenr/projects/patient_provider')

In [3]:
import numpy as np
import random 
import matplotlib.pyplot as plt
import argparse
import secrets
import json

In [4]:
from patient.simulator import run_multi_seed
from patient.baseline_policies import random_policy, greedy_policy
from patient.online_policies import p_approximation, p_approximation_balance, p_approximation_with_additions, p_approximation_with_additions_balance, p_approximation_with_additions_balance_learning, solve_linear_program
from patient.offline_policies import offline_solution, offline_solution_balance, offline_learning_solution
from patient.utils import get_save_path, delete_duplicate_results, restrict_resources
from patient.learning import guess_coefficients

In [5]:
is_jupyter = 'ipykernel' in sys.modules

In [184]:
if is_jupyter: 
    seed        = 43
    num_patients = 10
    num_providers = 30
    provider_capacity = 1
    top_choice_prob = 0.25
    choice_model = "uniform_choice"
    out_folder = "policy_comparison"
    exit_option = 0.5
    true_top_choice_prob = 0.25
    num_repetitions = 20
    num_trials = 1
    context_dim = 5
    max_menu_size = 3
else:
    parser = argparse.ArgumentParser()
    parser.add_argument('--seed', help='Random Seed', type=int, default=42)
    parser.add_argument('--n_patients',         '-N', help='Number of patients', type=int, default=100)
    parser.add_argument('--n_providers',        help='Number of providers', type=int, default=100)
    parser.add_argument('--n_trials',          help='Number of trials ', type=int, default=2)
    parser.add_argument('--top_choice_prob',          help='Probability of picking top choice', type=float, default=0.75)
    parser.add_argument('--true_top_choice_prob',          help='Probability of picking top choice', type=float, default=0.75)
    parser.add_argument('--context_dim',          help='Context dim for patients and providers', type=int, default=5)
    parser.add_argument('--max_menu_size',          help='Context dim for patients and providers', type=int, default=5)
    parser.add_argument('--num_repetitions',          help='Context dim for patients and providers', type=int, default=10)
    parser.add_argument('--provider_capacity', help='Provider Capacity', type=int, default=5)
    parser.add_argument('--choice_model', help='Which choice model for patients', type=str, default='uniform_choice')
    parser.add_argument('--exit_option', help='What is the value of the exit option', type=float, default=0.5)
    parser.add_argument('--out_folder', help='Which folder to write results to', type=str, default='policy_comparison')

    args = parser.parse_args()

    seed = args.seed
    num_patients = args.n_patients
    num_providers = args.n_providers 
    provider_capacity = args.provider_capacity
    top_choice_prob = args.top_choice_prob
    choice_model = args.choice_model
    exit_option = args.exit_option
    out_folder = args.out_folder
    num_trials = args.n_trials 
    context_dim = args.context_dim 
    num_repetitions = args.num_repetitions
    true_top_choice_prob = args.true_top_choice_prob
    max_menu_size = args.max_menu_size

save_name = secrets.token_hex(4)  

In [185]:
results = {}
results['parameters'] = {'seed'      : seed,
        'num_patients'    : num_patients,
        'num_providers': num_providers, 
        'provider_capacity'    : provider_capacity,
        'top_choice_prob': top_choice_prob, 
        'choice_model': choice_model,
        'exit_option': exit_option,
        'num_trials': num_trials,
        'context_dim': context_dim, 
        'true_top_choice_prob': true_top_choice_prob, 
        'num_repetitions': num_repetitions, 
        'max_menu_size': max_menu_size} 

## Baselines

In [186]:
seed_list = [seed]
restrict_resources()

In [187]:
policy = random_policy
name = "random"

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'])

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

np.mean(rewards['matches'])/(num_patients*num_trials),np.mean(rewards['patient_utilities'])/(num_patients*num_trials),np.std(rewards['provider_workloads'])

Took 0.00045943260192871094 time
Took 0.0009038448333740234 time
Took 0.0008237361907958984 time
Took 0.0010585784912109375 time
Took 0.0009770393371582031 time
Took 0.0006456375122070312 time
Took 0.0010619163513183594 time
Took 0.00037598609924316406 time
Took 0.0004639625549316406 time
Took 0.00032329559326171875 time
Took 0.00036597251892089844 time
Took 0.0005540847778320312 time
Took 0.00037932395935058594 time
Took 0.0004949569702148438 time
Took 0.00031948089599609375 time
Took 0.0003256797790527344 time
Took 0.0004410743713378906 time
Took 0.0003676414489746094 time
Took 0.00040459632873535156 time
Took 0.0004074573516845703 time


(0.28500000000000003, 0.2224625744337529, 0.293214938227915)

In [188]:
policy = greedy_policy
name = "greedy"

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'])

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

np.mean(rewards['matches'])/(num_patients*num_trials),np.mean(rewards['patient_utilities'])/(num_patients*num_trials),np.std(rewards['provider_workloads'])

Took 0.00038623809814453125 time
Took 0.000335693359375 time
Took 0.0002925395965576172 time
Took 0.0004062652587890625 time
Took 0.0003654956817626953 time
Took 0.0002048015594482422 time
Took 0.0004050731658935547 time
Took 0.00028705596923828125 time
Took 0.00036406517028808594 time
Took 0.0002491474151611328 time
Took 0.0002930164337158203 time
Took 0.0004038810729980469 time
Took 0.0002951622009277344 time
Took 0.0004057884216308594 time
Took 0.0002529621124267578 time
Took 0.0002493858337402344 time
Took 0.0003705024719238281 time
Took 0.00028777122497558594 time
Took 0.00032901763916015625 time
Took 0.0003292560577392578 time


(0.28500000000000003, 0.20673231466265785, 0.29321493822791495)

## Online

In [189]:
policy = p_approximation
name = "p_approximation"

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'])

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

np.mean(rewards['matches'])/(num_patients*num_trials),np.mean(rewards['patient_utilities'])/(num_patients*num_trials),np.std(rewards['provider_workloads'])

Took 0.02940082550048828 time
Took 0.011836051940917969 time
Took 0.01099395751953125 time
Took 0.010931015014648438 time
Took 0.010799884796142578 time
Took 0.010624170303344727 time
Took 0.010961532592773438 time
Took 0.010861873626708984 time
Took 0.010889530181884766 time
Took 0.010916948318481445 time
Took 0.010812520980834961 time
Took 0.010880708694458008 time
Took 0.010921239852905273 time
Took 0.010839700698852539 time
Took 0.010724306106567383 time
Took 0.010925531387329102 time
Took 0.011120796203613281 time
Took 0.011067867279052734 time
Took 0.010752677917480469 time
Took 0.010899782180786133 time


(0.28500000000000003, 0.25181454132312714, 0.293214938227915)

In [190]:
policy = p_approximation_balance
name = "p_approximation_balance"

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'])

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

np.mean(rewards['matches'])/(num_patients*num_trials),np.mean(rewards['patient_utilities'])/(num_patients*num_trials),np.std(rewards['provider_workloads'])

Took 0.02114582061767578 time


Took 0.017541885375976562 time
Took 0.022260189056396484 time
Took 0.017647981643676758 time
Took 0.017377376556396484 time
Took 0.017734050750732422 time
Took 0.01768326759338379 time
Took 0.017641305923461914 time
Took 0.017470598220825195 time
Took 0.01785588264465332 time
Took 0.01731109619140625 time
Took 0.01766061782836914 time
Took 0.01773858070373535 time
Took 0.01761150360107422 time
Took 0.01749396324157715 time
Took 0.017453670501708984 time
Took 0.017470598220825195 time
Took 0.017267465591430664 time
Took 0.017473220825195312 time
Took 0.017688512802124023 time


(0.28500000000000003, 0.25181454132312714, 0.293214938227915)

In [191]:
policy = p_approximation_with_additions
name = "p_approximation_additions"

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'])

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

np.mean(rewards['matches'])/(num_patients*num_trials),np.mean(rewards['patient_utilities'])/(num_patients*num_trials),np.std(rewards['provider_workloads'])

Took 0.02149486541748047 time


Took 0.01766204833984375 time
Took 0.019522666931152344 time
Took 0.017618179321289062 time
Took 0.017702341079711914 time
Took 0.02013111114501953 time
Took 0.01750040054321289 time
Took 0.017164230346679688 time
Took 0.0171356201171875 time
Took 0.017150402069091797 time
Took 0.017426490783691406 time
Took 0.017616748809814453 time
Took 0.01759505271911621 time
Took 0.017780065536499023 time
Took 0.017434358596801758 time
Took 0.017384052276611328 time
Took 0.017458677291870117 time
Took 0.017269611358642578 time
Took 0.017216920852661133 time
Took 0.017216205596923828 time


(0.28500000000000003, 0.2517739508097061, 0.293214938227915)

In [192]:
policy = p_approximation_with_additions_balance
name = "p_approximation_additions_balance"

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'])

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

np.mean(rewards['matches'])/(num_patients*num_trials),np.mean(rewards['patient_utilities'])/(num_patients*num_trials),np.std(rewards['provider_workloads'])

Took 0.02791595458984375 time
Took 0.02511143684387207 time
Took 0.0240633487701416 time
Took 0.025356531143188477 time
Took 0.024120092391967773 time
Took 0.035864830017089844 time
Took 0.024016857147216797 time
Took 0.023802757263183594 time
Took 0.023792505264282227 time
Took 0.023800134658813477 time
Took 0.02388787269592285 time
Took 0.024000167846679688 time
Took 0.02411818504333496 time
Took 0.023822784423828125 time
Took 0.02385258674621582 time
Took 0.023755550384521484 time
Took 0.02470707893371582 time
Took 0.02528095245361328 time
Took 0.025487422943115234 time
Took 0.02400994300842285 time


(0.28500000000000003, 0.2517739508097061, 0.293214938227915)

In [193]:
policy = p_approximation_with_additions_balance_learning
name = "p_approximation_additions_balance_learning"

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'])

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

np.mean(rewards['matches'])/(num_patients*num_trials),np.mean(rewards['patient_utilities'])/(num_patients*num_trials),np.std(rewards['provider_workloads'])

Took 0.02613687515258789 time
Took 0.02606368064880371 time
Took 0.025888442993164062 time
Took 0.026528358459472656 time
Took 0.026907682418823242 time
Took 0.030434608459472656 time
Took 0.025998592376708984 time
Took 0.026938915252685547 time
Took 0.02877521514892578 time
Took 0.0255887508392334 time
Took 0.025862932205200195 time
Took 0.030858516693115234 time
Took 0.027021169662475586 time
Took 0.025871753692626953 time
Took 0.025850296020507812 time
Took 0.025569915771484375 time
Took 0.02601337432861328 time
Took 0.025961637496948242 time
Took 0.02568650245666504 time
Took 0.025908470153808594 time


(0.28500000000000003, 0.24388131220707293, 0.293214938227915)

## Offline

In [194]:
policy = offline_solution
name = "offline_solution"

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'])

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

np.mean(rewards['matches'])/(num_patients*num_trials),np.mean(rewards['patient_utilities'])/(num_patients*num_trials),np.std(rewards['provider_workloads'])

Took 0.011509418487548828 time
Took 0.011120319366455078 time
Took 0.010941743850708008 time
Took 0.011339187622070312 time
Took 0.011368513107299805 time
Took 0.01129770278930664 time
Took 0.011142253875732422 time
Took 0.010955333709716797 time
Took 0.01154184341430664 time
Took 0.011076688766479492 time
Took 0.011054754257202148 time
Took 0.01105046272277832 time
Took 0.010973930358886719 time
Took 0.011207342147827148 time
Took 0.011239767074584961 time
Took 0.011190414428710938 time
Took 0.011194467544555664 time
Took 0.011206865310668945 time
Took 0.011359930038452148 time
Took 0.011170387268066406 time


(0.28500000000000003, 0.25181454132312714, 0.293214938227915)

In [195]:
policy = offline_learning_solution
name = "offline_learning_solution"

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'])

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

np.mean(rewards['matches'])/(num_patients*num_trials),np.mean(rewards['patient_utilities'])/(num_patients*num_trials),np.std(rewards['provider_workloads'])

Took 0.013624191284179688 time
Took 0.012884140014648438 time
Took 0.013100624084472656 time


Took 0.014070987701416016 time
Took 0.013189554214477539 time
Took 0.012821197509765625 time
Took 0.013026237487792969 time
Took 0.012816190719604492 time
Took 0.012999296188354492 time
Took 0.012992143630981445 time
Took 0.012775897979736328 time
Took 0.013210535049438477 time
Took 0.013265371322631836 time
Took 0.012979745864868164 time
Took 0.012797117233276367 time
Took 0.012786149978637695 time
Took 0.012835264205932617 time
Took 0.012882232666015625 time
Took 0.012775897979736328 time
Took 0.012722492218017578 time


(0.28500000000000003, 0.2510279995963539, 0.293214938227915)

In [196]:
policy = offline_solution_balance
name = "offline_solution_balance"

rewards, simulator = run_multi_seed(seed_list,policy,results['parameters'])

results['{}_matches'.format(name)] = rewards['matches']
results['{}_utilities'.format(name)] = rewards['patient_utilities']
results['{}_workloads'.format(name)] = rewards['provider_workloads']

np.mean(rewards['matches'])/(num_patients*num_trials),np.mean(rewards['patient_utilities'])/(num_patients*num_trials),np.std(rewards['provider_workloads'])

Took 0.01783013343811035 time


Took 0.017917156219482422 time
Took 0.018023252487182617 time
Took 0.0179445743560791 time
Took 0.017887353897094727 time
Took 0.017737388610839844 time
Took 0.01779460906982422 time
Took 0.01763606071472168 time
Took 0.017967700958251953 time
Took 0.01766037940979004 time
Took 0.017620086669921875 time
Took 0.01778554916381836 time
Took 0.0177152156829834 time
Took 0.01778388023376465 time
Took 0.017684459686279297 time
Took 0.0175321102142334 time
Took 0.01774764060974121 time
Took 0.017699003219604492 time
Took 0.017595767974853516 time
Took 0.017682552337646484 time


(0.28500000000000003, 0.25181454132312714, 0.293214938227915)

## Save Data

In [91]:
save_path = get_save_path(out_folder,save_name)

In [92]:
delete_duplicate_results(out_folder,"",results)

In [93]:
json.dump(results,open('../../results/'+save_path,'w'))