In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import sys
sys.path.append('/Users/scortesg/Documents/risk_certificate')
sys.path.append('/usr0/home/naveenr/projects/risk_certificate')

In [4]:
import matplotlib.pyplot as plt
import pickle
import numpy as np
import random
import argparse
import secrets
from certificate.run_simulations import run_experiments, delete_duplicate_results
import json 

In [5]:
is_jupyter = 'ipykernel' in sys.modules

In [108]:
if is_jupyter: 
    seed        = 43
    trials = 100
    n_arms = 10
    max_pulls_per_arm = 50
    first_stage_pulls_per_arm = 25
    arm_distribution = 'beta_misspecified'
    out_folder = "prior"
    arm_parameters=  {'alpha': 50, 'beta': 50, 'diff_mean_1': 0.05, 'diff_std_1': 0.01,'diff_mean_2': 0.01, 'diff_std_2': 0.001}
    delta = 0.1
    run_all_k = True
else:
    parser = argparse.ArgumentParser()
    parser.add_argument('--seed', help='Random Seed', type=int, default=42)
    parser.add_argument('--trials', help='Trials', type=int, default=25)
    parser.add_argument('--n_arms',         '-N', help='Number of arms', type=int, default=10)
    parser.add_argument('--max_pulls_per_arm',        help='Maximum pulls per arm', type=int, default=10)
    parser.add_argument('--first_stage_pulls_per_arm',          help='Number of first stage pulls ', type=int, default=4)
    parser.add_argument('--arm_distribution',          help='Distribution of arms', type=str, default='uniform')
    parser.add_argument('--run_all_k',        help='Maximum pulls per arm', action='store_true')
    parser.add_argument('--delta',        help='Maximum pulls per arm', type=float, default=0.1)
    parser.add_argument('--alpha',        help='Maximum pulls per arm', type=float, default=2)
    parser.add_argument('--beta',        help='Maximum pulls per arm', type=float, default=2)
    parser.add_argument('--diff_mean_1',        help='Maximum pulls per arm', type=float, default=2)
    parser.add_argument('--diff_std_1',        help='Maximum pulls per arm', type=float, default=2)
    parser.add_argument('--diff_mean_2',        help='Maximum pulls per arm', type=float, default=2)
    parser.add_argument('--diff_std_2',        help='Maximum pulls per arm', type=float, default=2)
    parser.add_argument('--out_folder', help='Which folder to write results to', type=str, default='policy_comparison')

    args = parser.parse_args()

    seed = args.seed
    n_arms = args.n_arms
    max_pulls_per_arm = args.max_pulls_per_arm 
    first_stage_pulls_per_arm = args.first_stage_pulls_per_arm
    arm_distribution = args.arm_distribution
    out_folder = args.out_folder
    delta = args.delta 
    alpha = args.alpha 
    beta = args.beta 
    trials = args.trials 
    diff_mean_1 = args.diff_mean_1 
    diff_std_1 = args.diff_std_1 
    diff_mean_2 = args.diff_mean_2 
    diff_std_2 = args.diff_std_2
    arm_parameters = {'alpha': alpha, 'beta': beta, 'diff_mean_1': diff_mean_1, 'diff_mean_2': diff_mean_2, 'diff_std_1': diff_std_1, 'diff_std_2': diff_std_2}
    run_all_k = args.run_all_k

save_name = secrets.token_hex(4)  

In [109]:
random.seed(seed)
np.random.seed(seed)

In [110]:
arm_means = []
for i in range(n_arms):
    if arm_distribution == 'uniform':
        arm_means.append(random.random())
    elif arm_distribution == 'beta':
        arm_means.append(np.random.beta(arm_parameters['alpha'],arm_parameters['beta']))
    elif arm_distribution == 'beta_misspecified':
        arm_means.append(np.clip(np.random.beta(arm_parameters['alpha'],arm_parameters['beta']) + np.random.normal(arm_parameters['diff_mean_1'],arm_parameters['diff_std_1']),0,1))
if arm_distribution == 'unimodal_diff':
    arm_means.append(np.random.random())    
    for i in range(1,n_arms):
        diff = np.random.normal(arm_parameters['diff_mean_1'],arm_parameters['diff_std_1']) 
        arm_means.append(min(max(arm_means[-1]-diff,0.0001),1))
if arm_distribution == 'bimodal_diff':
    arm_means.append(np.random.random())    
    for i in range(1,n_arms):
        if np.random.random() < 0.5:
            diff = np.random.normal(arm_parameters['diff_mean_1'],arm_parameters['diff_std_1']) 
        else:
            diff = np.random.normal(arm_parameters['diff_mean_2'],arm_parameters['diff_std_2']) 
        arm_means.append(min(max(arm_means[-1]-diff,0.0001),1))

In [112]:
experiment_config = {
    'number_arms': n_arms, 
    'sample_size': max_pulls_per_arm*n_arms, 
    'first_stage_size': first_stage_pulls_per_arm*n_arms, 
    'distribution': arm_means, 
    'arm_distribution': arm_distribution, 
    'random_seed': seed+1, 
    'delta': delta,
    'run_all_k': run_all_k, 
    'reward_parameters': arm_parameters
}

In [114]:
all_results = []

for i in range(trials):
    experiment_config['random_seed'] = seed+i
    results = run_experiments(experiment_config)
    all_results.append(results)

0 100
1 100
2 100
3 100
4 100
5 100
6 100
7 100
8 100
9 100
10 100
11 100
12 100
13 100
14 100
15 100
16 100
17 100
18 100
19 100
20 100
21 100
22 100
23 100
24 100
25 100
26 100
27 100
28 100
29 100
30 100
31 100
32 100
33 100
34 100
35 100
36 100
37 100
38 100
39 100
40 100
41 100
42 100
43 100
44 100
45 100
46 100
47 100
48 100
49 100
50 100
51 100
52 100
53 100
54 100
55 100
56 100
57 100
58 100
59 100
60 100
61 100
62 100
63 100
64 100
65 100
66 100
67 100
68 100
69 100
70 100
71 100
72 100
73 100
74 100
75 100
76 100
77 100
78 100
79 100
80 100
81 100
82 100
83 100
84 100
85 100
86 100
87 100
88 100
89 100
90 100
91 100
92 100
93 100
94 100
95 100
96 100
97 100
98 100
99 100


In [115]:
aggregate_results = {}
aggregate_results['parameters'] = experiment_config
aggregate_results['parameters']['seed'] = seed 

for method in all_results[0]:
    aggregate_results[method] = {}
    aggregate_results[method]['certificate'] = [max(i[method]['certificate']) for i in all_results]
    aggregate_results[method]['delta'] = [i[method]['delta'].tolist() for i in all_results]
    aggregate_results[method]['true_value'] = all_results[0][method]['true_value']


In [116]:
np.mean(aggregate_results['sample_split_total']['certificate'])

0.35313344476818614

In [117]:
np.mean(aggregate_results['sample_split']['certificate'])

0.30484964132587705

In [118]:
if 'prior' in aggregate_results:
    print(np.mean(aggregate_results['prior']['certificate'])/np.mean(aggregate_results['sample_split_total']['certificate']))

1.0831522849468185


In [48]:
np.mean(aggregate_results['random']['certificate'])

0.44941111760557

In [397]:
np.mean(aggregate_results['k_{}'.format(n_arms)]['delta'])

0.34616367652045704

In [398]:
np.mean(aggregate_results['one_stage']['certificate'])

0.5020363234795429

In [399]:
np.mean(aggregate_results['k_{}'.format(1)]['certificate'])

0.3500334338977605

In [400]:
np.mean(aggregate_results['omniscient']['certificate'])

0.505036323479543

In [401]:
np.mean(aggregate_results['k_{}'.format(n_arms)]['true_value'])-np.mean(aggregate_results['omniscient']['delta'])

-0.34616367652045704

## Write Data

In [120]:
save_path = "{}/{}.json".format(out_folder,save_name)

In [121]:
delete_duplicate_results(out_folder,"",aggregate_results)

In [122]:
json.dump(aggregate_results,open('../../results/'+save_path,'w'))