-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Sourcery Starbot ⭐ refactored radsn23/bandits-codes #1
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,7 +11,7 @@ def __init__(self,versions=None, prior = None): #Taking in version set and p | |
self.trials = np.zeros((len(versions),), dtype = int) | ||
self.successes = np.zeros_like(self.trials) | ||
self.versions = versions | ||
if prior == None: | ||
if prior is None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
self.prior = [(1.0,1.0) for i in range(len(versions))] | ||
|
||
def add_data(self, version_num, success): | ||
|
@@ -38,7 +38,7 @@ def personalize(self): | |
versions = ['Survey','Brief','Acknowledgement'] | ||
trials_in = 1000 | ||
scores = [0,0,0] | ||
tried_outputs = [0,0,0] | ||
tried_outputs = [0,0,0] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lines
|
||
params = [] | ||
|
||
# Plotting the initial prior graphs | ||
|
@@ -49,17 +49,17 @@ def personalize(self): | |
plt.title(versions[i] + '- prior') | ||
plt.xlim([0,1]) | ||
|
||
# Now creating the posterior distribution as data is added. | ||
for trial in range(trials_in): | ||
# Now creating the posterior distribution as data is added. | ||
for _ in range(trials_in): | ||
e = email_mooclet(versions,prior) | ||
input_version = np.random.randint(len(versions)) #Choosing a version at random | ||
tried_outputs[input_version] +=1 | ||
#e.add_data(input_version,np.random.randint(2)) # Un-comment this to add successes randomly | ||
#e.add_data(input_version,(np.random.choice(np.arange(len(versions)),p = [0.6,0.1,0.3])== input_version)) #Uncomment this to add successes based on a probability distr to simulate actual patterns | ||
result,x,prior = e.personalize() | ||
scores[result]+=1 | ||
|
||
|
||
print(scores) # Checking how many times each version won the sampling contest | ||
print(tried_outputs) # Checking how many times each version was chosen to add a success to | ||
for i in range(len(versions)): # As one particular version gets chosen more, it's probability of | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,11 +39,7 @@ def calculate_linucb_single_bandit(source, num_actions, dest, models = None, for | |
:param dest: outfile for printing the chosen actions and received rewards. | ||
:param forced: Optional, indicates to process only up to a certain time step or force take specified actions. | ||
''' | ||
# number of trials used to compute expectation stats | ||
# set to small value when debugging for faster speed | ||
num_trials_prob_best_action = int(1e4) | ||
|
||
if models == None: | ||
if models is None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
models = [RLogReg(D = NUM_FEATURES, Lambda = 1) for _ in range(num_actions)] | ||
|
||
with open(source, newline='') as inf, open(dest, 'w', newline='') as outf: | ||
|
@@ -55,17 +51,20 @@ def calculate_linucb_single_bandit(source, num_actions, dest, models = None, for | |
writer = csv.DictWriter(outf, fieldnames=field_names_out) | ||
writer.writeheader() | ||
|
||
sample_number = 0 | ||
cumulative_sample_regret = 0 | ||
cumulative_expected_regret = 0 | ||
|
||
chosen_actions = [] | ||
|
||
alpha = 2 | ||
|
||
for row in reader: | ||
sample_number += 1 | ||
# TODO: compute expected regret for LinUCB | ||
expected_regret = 0 | ||
# number of trials used to compute expectation stats | ||
# set to small value when debugging for faster speed | ||
num_trials_prob_best_action = int(1e4) | ||
|
||
for sample_number, row in enumerate(reader, start=1): | ||
# get context features | ||
context = get_context(row) | ||
|
||
|
@@ -79,7 +78,7 @@ def calculate_linucb_single_bandit(source, num_actions, dest, models = None, for | |
# take forced action if requested | ||
action = forced.actions[sample_number - 1] | ||
|
||
|
||
# only return action chosen up to specified time step | ||
if forced.time_step > 0 and sample_number <= forced.time_step: | ||
chosen_actions.append(action) | ||
|
@@ -93,10 +92,7 @@ def calculate_linucb_single_bandit(source, num_actions, dest, models = None, for | |
models[action].update_posterior(context, 2 * reward - 1) | ||
|
||
# copy the input data to output file | ||
out_row = {} | ||
|
||
for i in range(len(reader.fieldnames)): | ||
out_row[reader.fieldnames[i]] = row[reader.fieldnames[i]] | ||
out_row = {fieldname: row[fieldname] for fieldname in reader.fieldnames} | ||
|
||
''' write performance data (e.g. regret) ''' | ||
optimal_action = int(row[HEADER_OPTIMALACTION]) - 1 | ||
|
@@ -115,16 +111,14 @@ def calculate_linucb_single_bandit(source, num_actions, dest, models = None, for | |
# The oracle always chooses the best arm, thus expected reward | ||
# is simply the probability of that arm getting a reward. | ||
optimal_expected_reward = true_probs[optimal_action] * num_trials_prob_best_action | ||
|
||
# TODO: compute expected regret for LinUCB | ||
expected_regret = 0 | ||
|
||
cumulative_expected_regret += expected_regret | ||
|
||
out_row[H_ALGO_REGRET_EXPECTED] = expected_regret | ||
out_row[H_ALGO_REGRET_EXPECTED_CUMULATIVE] = cumulative_expected_regret | ||
|
||
writer.writerow(out_row) | ||
|
||
return chosen_actions, models | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,7 +38,7 @@ def read_avg_regret(source, switch_time_step): | |
pos_regret.append(regret) | ||
all_regret.append(regret) | ||
|
||
if len(pre_regret) == 0: | ||
if not pre_regret: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
# some algorithms are oblivious to immediate data | ||
# so does not have regrets prior to switch | ||
pre_regret = [sys.maxsize] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -221,6 +221,7 @@ def calculate_bandits(probs_matrix, cc): | |
See generate_gaussian_rewards method for more explanations | ||
""" | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lines
|
||
|
||
# Different correlation values | ||
c_list = [-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1] | ||
|
||
|
@@ -258,6 +259,7 @@ def calculate_bandits(probs_matrix, cc): | |
|
||
|
||
|
||
num_algorithms = 12 # UPDATE THIS IF NEW ALGORITHMS ARE ADDED OR EXISTING ONES REMOVED !!! | ||
# Generate average graph across all samples | ||
# Average result across different samples within each time step and each correlation value | ||
for t in time_steps: | ||
|
@@ -549,7 +551,6 @@ def calculate_bandits(probs_matrix, cc): | |
vertical_line=t) | ||
|
||
"""Writing out average data to file""" | ||
num_algorithms = 12 # UPDATE THIS IF NEW ALGORITHMS ARE ADDED OR EXISTING ONES REMOVED !!! | ||
# write table data | ||
with open(table_file.format(t), 'w', newline='') as tfp: | ||
tfcsv = csv.writer(tfp, delimiter=',') | ||
|
@@ -573,9 +574,12 @@ def calculate_bandits(probs_matrix, cc): | |
'Delayed-All', 'Delayed-Before', 'Delayed-After', \ | ||
'Stderr-Delayed-All', 'Stderr-Delayed-Before', 'Stderr-Delayed-After'] \ | ||
for _ in range(num_algorithms)] | ||
header = [] | ||
header.append(header1) | ||
header.append(['Correlation'] + [item for sublist in header2 for item in sublist]) | ||
header = [ | ||
header1, | ||
['Correlation'] | ||
+ [item for sublist in header2 for item in sublist], | ||
] | ||
|
||
tfcsv.writerows(header) | ||
tfcsv.writerows(csv_avg_data) | ||
if args.writeAllData: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -80,9 +80,9 @@ | |
|
||
graph_title = 'Cumulative Regret from Single Sample as a function of action timestep' | ||
|
||
for t in time_steps: | ||
max_step_to_plot = num_rows | ||
max_step_to_plot = num_rows | ||
|
||
Comment on lines
+83
to
85
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lines
|
||
for t in time_steps: | ||
imm_random = [] | ||
imm_thompson = [] | ||
imm_epsilon = [] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,38 +46,36 @@ def calculateEpsilonGreedyPolicy(source, dest, eps=0.1): | |
:param dest: The output destination dataset. | ||
:param eps: Epsilon parameter. | ||
''' | ||
numActions = 3 | ||
numMooclets = 3 | ||
with open(source, newline='') as inf, open(dest, 'w', newline='') as outf: | ||
reader = csv.DictReader(inf) | ||
fieldNamesOut = reader.fieldnames[0:3] | ||
|
||
#output the conditions chosen | ||
fieldNamesOut.append('MOOClet1') | ||
fieldNamesOut.append('MOOClet2') | ||
fieldNamesOut.append('MOOClet3') | ||
|
||
Comment on lines
-49
to
+57
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
#output our samples drawn | ||
fieldNamesOut.append('RewardMOOClet1') | ||
fieldNamesOut.append('RewardMOOClet2') | ||
fieldNamesOut.append('RewardMOOClet3') | ||
|
||
writer = csv.DictWriter(outf, fieldnames=fieldNamesOut) | ||
writer.writeheader() | ||
sampleNumber = 0 | ||
for row in reader: | ||
sampleNumber += 1 | ||
numActions = 3 | ||
numMooclets = 3 | ||
for sampleNumber, row in enumerate(reader, start=1): | ||
#get the user vars | ||
ageQuartile = int(row['agequartilesUSER']); | ||
#user 0 instead of -1 for age quartiles | ||
if ageQuartile==-1: | ||
ageQuartile=0; | ||
|
||
nDaysAct = int(row['ndaysactUSER']); | ||
|
||
#choose a random action | ||
actions = [] | ||
for i in range(numMooclets): | ||
for _ in range(numMooclets): | ||
a, p = getEpsilonGreedyAction(eps, numActions, constant_policy) | ||
actions.append(a) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -109,11 +109,15 @@ def load_matrix_from_table(table_file, num_algos = 9, num_correlations = 9): | |
Set to 9 by default because len([-1:1:0.25]) = 9 | ||
''' | ||
reader = csv.reader(open(table_file, "r"), delimiter = ',') | ||
x = [] | ||
for i in range(2 + num_correlations): | ||
x.append(next(reader)) | ||
table = np.reshape(np.array([np.array(m[1:]) for m in x[2:]]).T, [num_algos,6,num_correlations]).astype(float).swapaxes(1,2) | ||
return table | ||
x = [next(reader) for _ in range(2 + num_correlations)] | ||
return ( | ||
np.reshape( | ||
np.array([np.array(m[1:]) for m in x[2:]]).T, | ||
[num_algos, 6, num_correlations], | ||
) | ||
.astype(float) | ||
.swapaxes(1, 2) | ||
) | ||
Comment on lines
-112
to
+120
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
|
||
def load_cumulative_regret_matrix(path, out_pickle_file): | ||
|
@@ -153,8 +157,6 @@ def load_cumulative_regret_matrix(path, out_pickle_file): | |
if int(row[HEADER_SAMPLENUMBER]) == t_switch: | ||
cum_regret_switch = cum_regret | ||
cum_regret_final = cum_regret | ||
pass | ||
pass | ||
Comment on lines
-156
to
-157
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
algorithm = file_meta[1] | ||
t_switch = file_meta[2] | ||
correlation = file_meta[3] | ||
|
@@ -189,10 +191,8 @@ def split_table_file(table_file): | |
with open(table_file) as tf: | ||
with open('average_regret_table.csv', 'w') as avgf: | ||
with open('all_regret_table.csv', 'w') as allf: | ||
index = 0 | ||
allf.write('algorithm,bandit type,t,correlation,sample,run,{}\n'.format(','.join([str(i + 1) for i in range(240)]))) | ||
for line in tf: | ||
index += 1 | ||
for index, line in enumerate(tf, start=1): | ||
Comment on lines
-192
to
+195
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
if index <= 11: | ||
avgf.write(line) | ||
else: | ||
|
@@ -214,7 +214,7 @@ def create_6D_MVN_file(path, output_path): | |
''' | ||
num_actions = 3 | ||
files_list = glob.glob(path + os.sep + "gauss_single_bandit_input*.csv") | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
name_to_prob_dict = {} | ||
for file_path in files_list: | ||
file_name = os.path.basename(file_path) | ||
|
@@ -234,7 +234,7 @@ def create_6D_MVN_file(path, output_path): | |
for row in reader: | ||
probs = [float(row[HEADER_TRUEPROB.format(a + 1)]) for a in range(num_actions)] | ||
break # just need to read the first row since reward probs are constant over t | ||
|
||
name = (correlation, sample) | ||
|
||
if name not in name_to_prob_dict: | ||
|
@@ -248,9 +248,16 @@ def create_6D_MVN_file(path, output_path): | |
with open(output_path, 'w', newline='') as outf: | ||
writer = csv.writer(outf, delimiter=',') | ||
data = [] | ||
header = ['Correlation', 'Sample'] | ||
header.extend(['Immediate-Prob-Arm-{}'.format(a + 1) for a in range(num_actions)]) | ||
header.extend(['Delayed-Prob-Arm-{}'.format(a + 1) for a in range(num_actions)]) | ||
header = [ | ||
'Correlation', | ||
'Sample', | ||
*[ | ||
'Immediate-Prob-Arm-{}'.format(a + 1) | ||
for a in range(num_actions) | ||
], | ||
*['Delayed-Prob-Arm-{}'.format(a + 1) for a in range(num_actions)], | ||
] | ||
|
||
data.append(header) | ||
for k,v in name_to_prob_dict.items(): | ||
row = [k[0], k[1]] + list(v.astype(str)) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Lines
104-160
refactored with the following changes:hoist-statement-from-loop
)