Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sourcery Starbot ⭐ refactored radsn23/bandits-codes #1

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,12 @@ def get_y(self,x,w):
ax5 = fig.add_subplot(433)

alphas = [1,2,0.5]
T = 500
N = 10 #number of batches
versions = 3 #or the number of arms of the bandit
l=0.1 #lambda
X_size = 3
for a in alphas:
T = 500
N = 10 #number of batches
versions = 3 #or the number of arms of the bandit
l=0.1 #lambda
X_size = 3
Comment on lines +103 to -108
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lines 104-160 refactored with the following changes:

  • Hoist statements out of for/while loops (hoist-statement-from-loop)

#global regret
regret=0
# The set of past observations is made of triplets (x_i,a_i,r_i), so the dimension of the observation is-
Expand Down Expand Up @@ -157,7 +157,7 @@ def get_y(self,x,w):
plt.plot(np.linspace(0,T,len(regrets)),regrets, color='r',label='Cumulative Regret')
plt.subplot(4,3,k+9)
plt.plot(np.linspace(0,T,len(perc_regrets)),perc_regrets, color='orange',label='% Regret')
#ax.scatter(np.linspace(0,T,len(chosen_rewards)),chosen_rewards)
#ax.scatter(np.linspace(0,T,len(chosen_rewards)),chosen_rewards)
ax1.set_ylabel('Loss')
ax1.set_xlabel('Time')
ax1.legend()
Expand Down
12 changes: 6 additions & 6 deletions bandit_rl_implementations/EmailMAB/Bandits_email_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def __init__(self,versions=None, prior = None): #Taking in version set and p
self.trials = np.zeros((len(versions),), dtype = int)
self.successes = np.zeros_like(self.trials)
self.versions = versions
if prior == None:
if prior is None:
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function email_mooclet.__init__ refactored with the following changes:

  • Use x is None rather than x == None (none-compare)

self.prior = [(1.0,1.0) for i in range(len(versions))]

def add_data(self, version_num, success):
Expand All @@ -38,7 +38,7 @@ def personalize(self):
versions = ['Survey','Brief','Acknowledgement']
trials_in = 1000
scores = [0,0,0]
tried_outputs = [0,0,0]
tried_outputs = [0,0,0]
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lines 41-62 refactored with the following changes:

  • Replace unused for index with underscore (for-index-underscore)

params = []

# Plotting the initial prior graphs
Expand All @@ -49,17 +49,17 @@ def personalize(self):
plt.title(versions[i] + '- prior')
plt.xlim([0,1])

# Now creating the posterior distribution as data is added.
for trial in range(trials_in):
# Now creating the posterior distribution as data is added.
for _ in range(trials_in):
e = email_mooclet(versions,prior)
input_version = np.random.randint(len(versions)) #Choosing a version at random
tried_outputs[input_version] +=1
#e.add_data(input_version,np.random.randint(2)) # Un-comment this to add successes randomly
#e.add_data(input_version,(np.random.choice(np.arange(len(versions)),p = [0.6,0.1,0.3])== input_version)) #Uncomment this to add successes based on a probability distr to simulate actual patterns
result,x,prior = e.personalize()
scores[result]+=1


print(scores) # Checking how many times each version won the sampling contest
print(tried_outputs) # Checking how many times each version was chosen to add a success to
for i in range(len(versions)): # As one particular version gets chosen more, it's probability of
Expand Down
30 changes: 12 additions & 18 deletions louie_experiments/LinUCB.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,7 @@ def calculate_linucb_single_bandit(source, num_actions, dest, models = None, for
:param dest: outfile for printing the chosen actions and received rewards.
:param forced: Optional, indicates to process only up to a certain time step or force take specified actions.
'''
# number of trials used to compute expectation stats
# set to small value when debugging for faster speed
num_trials_prob_best_action = int(1e4)

if models == None:
if models is None:
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function calculate_linucb_single_bandit refactored with the following changes:

  • Hoist statements out of for/while loops (hoist-statement-from-loop)
  • Move assignments closer to their usage (move-assign)
  • Use x is None rather than x == None (none-compare)
  • Replace index in for loop with direct reference (for-index-replacement)
  • Replace manual loop counter with call to enumerate (convert-to-enumerate)
  • Convert for loop into dictionary comprehension (dict-comprehension)

models = [RLogReg(D = NUM_FEATURES, Lambda = 1) for _ in range(num_actions)]

with open(source, newline='') as inf, open(dest, 'w', newline='') as outf:
Expand All @@ -55,17 +51,20 @@ def calculate_linucb_single_bandit(source, num_actions, dest, models = None, for
writer = csv.DictWriter(outf, fieldnames=field_names_out)
writer.writeheader()

sample_number = 0
cumulative_sample_regret = 0
cumulative_expected_regret = 0

chosen_actions = []

alpha = 2

for row in reader:
sample_number += 1
# TODO: compute expected regret for LinUCB
expected_regret = 0
# number of trials used to compute expectation stats
# set to small value when debugging for faster speed
num_trials_prob_best_action = int(1e4)

for sample_number, row in enumerate(reader, start=1):
# get context features
context = get_context(row)

Expand All @@ -79,7 +78,7 @@ def calculate_linucb_single_bandit(source, num_actions, dest, models = None, for
# take forced action if requested
action = forced.actions[sample_number - 1]


# only return action chosen up to specified time step
if forced.time_step > 0 and sample_number <= forced.time_step:
chosen_actions.append(action)
Expand All @@ -93,10 +92,7 @@ def calculate_linucb_single_bandit(source, num_actions, dest, models = None, for
models[action].update_posterior(context, 2 * reward - 1)

# copy the input data to output file
out_row = {}

for i in range(len(reader.fieldnames)):
out_row[reader.fieldnames[i]] = row[reader.fieldnames[i]]
out_row = {fieldname: row[fieldname] for fieldname in reader.fieldnames}

''' write performance data (e.g. regret) '''
optimal_action = int(row[HEADER_OPTIMALACTION]) - 1
Expand All @@ -115,16 +111,14 @@ def calculate_linucb_single_bandit(source, num_actions, dest, models = None, for
# The oracle always chooses the best arm, thus expected reward
# is simply the probability of that arm getting a reward.
optimal_expected_reward = true_probs[optimal_action] * num_trials_prob_best_action

# TODO: compute expected regret for LinUCB
expected_regret = 0

cumulative_expected_regret += expected_regret

out_row[H_ALGO_REGRET_EXPECTED] = expected_regret
out_row[H_ALGO_REGRET_EXPECTED_CUMULATIVE] = cumulative_expected_regret

writer.writerow(out_row)

return chosen_actions, models


Expand Down
2 changes: 1 addition & 1 deletion louie_experiments/data_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def read_avg_regret(source, switch_time_step):
pos_regret.append(regret)
all_regret.append(regret)

if len(pre_regret) == 0:
if not pre_regret:
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function read_avg_regret refactored with the following changes:

  • Simplify sequence comparison (simplify-len-comparison)

# some algorithms are oblivious to immediate data
# so does not have regrets prior to switch
pre_regret = [sys.maxsize]
Expand Down
12 changes: 8 additions & 4 deletions louie_experiments/driver_gaussian_rewards_two_bandits.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ def calculate_bandits(probs_matrix, cc):
See generate_gaussian_rewards method for more explanations
"""

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lines 552-578 refactored with the following changes:

  • Hoist statements out of for/while loops (hoist-statement-from-loop)
  • Swap positions of nested conditionals (swap-nested-ifs)
  • Hoist repeated code outside conditional statement (hoist-statement-from-if)
  • Merge append into list declaration (merge-list-append)


# Different correlation values
c_list = [-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1]

Expand Down Expand Up @@ -258,6 +259,7 @@ def calculate_bandits(probs_matrix, cc):



num_algorithms = 12 # UPDATE THIS IF NEW ALGORITHMS ARE ADDED OR EXISTING ONES REMOVED !!!
# Generate average graph across all samples
# Average result across different samples within each time step and each correlation value
for t in time_steps:
Expand Down Expand Up @@ -549,7 +551,6 @@ def calculate_bandits(probs_matrix, cc):
vertical_line=t)

"""Writing out average data to file"""
num_algorithms = 12 # UPDATE THIS IF NEW ALGORITHMS ARE ADDED OR EXISTING ONES REMOVED !!!
# write table data
with open(table_file.format(t), 'w', newline='') as tfp:
tfcsv = csv.writer(tfp, delimiter=',')
Expand All @@ -573,9 +574,12 @@ def calculate_bandits(probs_matrix, cc):
'Delayed-All', 'Delayed-Before', 'Delayed-After', \
'Stderr-Delayed-All', 'Stderr-Delayed-Before', 'Stderr-Delayed-After'] \
for _ in range(num_algorithms)]
header = []
header.append(header1)
header.append(['Correlation'] + [item for sublist in header2 for item in sublist])
header = [
header1,
['Correlation']
+ [item for sublist in header2 for item in sublist],
]

tfcsv.writerows(header)
tfcsv.writerows(csv_avg_data)
if args.writeAllData:
Expand Down
4 changes: 2 additions & 2 deletions louie_experiments/driver_two_bandits_contextual.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@

graph_title = 'Cumulative Regret from Single Sample as a function of action timestep'

for t in time_steps:
max_step_to_plot = num_rows
max_step_to_plot = num_rows

Comment on lines +83 to 85
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lines 84-85 refactored with the following changes:

  • Hoist statements out of for/while loops (hoist-statement-from-loop)

for t in time_steps:
imm_random = []
imm_thompson = []
imm_epsilon = []
Expand Down
18 changes: 8 additions & 10 deletions louie_experiments/epsilonGreedyPolicy.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,38 +46,36 @@ def calculateEpsilonGreedyPolicy(source, dest, eps=0.1):
:param dest: The output destination dataset.
:param eps: Epsilon parameter.
'''
numActions = 3
numMooclets = 3
with open(source, newline='') as inf, open(dest, 'w', newline='') as outf:
reader = csv.DictReader(inf)
fieldNamesOut = reader.fieldnames[0:3]

#output the conditions chosen
fieldNamesOut.append('MOOClet1')
fieldNamesOut.append('MOOClet2')
fieldNamesOut.append('MOOClet3')

Comment on lines -49 to +57
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function calculateEpsilonGreedyPolicy refactored with the following changes:

  • Move assignments closer to their usage (move-assign)
  • Replace unused for index with underscore (for-index-underscore)
  • Replace manual loop counter with call to enumerate (convert-to-enumerate)

#output our samples drawn
fieldNamesOut.append('RewardMOOClet1')
fieldNamesOut.append('RewardMOOClet2')
fieldNamesOut.append('RewardMOOClet3')

writer = csv.DictWriter(outf, fieldnames=fieldNamesOut)
writer.writeheader()
sampleNumber = 0
for row in reader:
sampleNumber += 1
numActions = 3
numMooclets = 3
for sampleNumber, row in enumerate(reader, start=1):
#get the user vars
ageQuartile = int(row['agequartilesUSER']);
#user 0 instead of -1 for age quartiles
if ageQuartile==-1:
ageQuartile=0;

nDaysAct = int(row['ndaysactUSER']);

#choose a random action
actions = []
for i in range(numMooclets):
for _ in range(numMooclets):
a, p = getEpsilonGreedyAction(eps, numActions, constant_policy)
actions.append(a)

Expand Down
37 changes: 22 additions & 15 deletions louie_experiments/gaussian_reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,15 @@ def load_matrix_from_table(table_file, num_algos = 9, num_correlations = 9):
Set to 9 by default because len([-1:1:0.25]) = 9
'''
reader = csv.reader(open(table_file, "r"), delimiter = ',')
x = []
for i in range(2 + num_correlations):
x.append(next(reader))
table = np.reshape(np.array([np.array(m[1:]) for m in x[2:]]).T, [num_algos,6,num_correlations]).astype(float).swapaxes(1,2)
return table
x = [next(reader) for _ in range(2 + num_correlations)]
return (
np.reshape(
np.array([np.array(m[1:]) for m in x[2:]]).T,
[num_algos, 6, num_correlations],
)
.astype(float)
.swapaxes(1, 2)
)
Comment on lines -112 to +120
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function load_matrix_from_table refactored with the following changes:

  • Inline variable that is immediately returned (inline-immediately-returned-variable)
  • Replace unused for index with underscore (for-index-underscore)
  • Convert for loop into list comprehension (list-comprehension)



def load_cumulative_regret_matrix(path, out_pickle_file):
Expand Down Expand Up @@ -153,8 +157,6 @@ def load_cumulative_regret_matrix(path, out_pickle_file):
if int(row[HEADER_SAMPLENUMBER]) == t_switch:
cum_regret_switch = cum_regret
cum_regret_final = cum_regret
pass
pass
Comment on lines -156 to -157
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function load_cumulative_regret_matrix refactored with the following changes:

  • Remove redundant pass statement (remove-redundant-pass)

algorithm = file_meta[1]
t_switch = file_meta[2]
correlation = file_meta[3]
Expand Down Expand Up @@ -189,10 +191,8 @@ def split_table_file(table_file):
with open(table_file) as tf:
with open('average_regret_table.csv', 'w') as avgf:
with open('all_regret_table.csv', 'w') as allf:
index = 0
allf.write('algorithm,bandit type,t,correlation,sample,run,{}\n'.format(','.join([str(i + 1) for i in range(240)])))
for line in tf:
index += 1
for index, line in enumerate(tf, start=1):
Comment on lines -192 to +195
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function split_table_file refactored with the following changes:

  • Replace manual loop counter with call to enumerate (convert-to-enumerate)

if index <= 11:
avgf.write(line)
else:
Expand All @@ -214,7 +214,7 @@ def create_6D_MVN_file(path, output_path):
'''
num_actions = 3
files_list = glob.glob(path + os.sep + "gauss_single_bandit_input*.csv")

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function create_6D_MVN_file refactored with the following changes:

  • Merge extend into list declaration (merge-list-extend)

name_to_prob_dict = {}
for file_path in files_list:
file_name = os.path.basename(file_path)
Expand All @@ -234,7 +234,7 @@ def create_6D_MVN_file(path, output_path):
for row in reader:
probs = [float(row[HEADER_TRUEPROB.format(a + 1)]) for a in range(num_actions)]
break # just need to read the first row since reward probs are constant over t

name = (correlation, sample)

if name not in name_to_prob_dict:
Expand All @@ -248,9 +248,16 @@ def create_6D_MVN_file(path, output_path):
with open(output_path, 'w', newline='') as outf:
writer = csv.writer(outf, delimiter=',')
data = []
header = ['Correlation', 'Sample']
header.extend(['Immediate-Prob-Arm-{}'.format(a + 1) for a in range(num_actions)])
header.extend(['Delayed-Prob-Arm-{}'.format(a + 1) for a in range(num_actions)])
header = [
'Correlation',
'Sample',
*[
'Immediate-Prob-Arm-{}'.format(a + 1)
for a in range(num_actions)
],
*['Delayed-Prob-Arm-{}'.format(a + 1) for a in range(num_actions)],
]

data.append(header)
for k,v in name_to_prob_dict.items():
row = [k[0], k[1]] + list(v.astype(str))
Expand Down
Loading