radsn23 · sourcery-ai-bot · Oct 26, 2020 · sourcery-ai-bot · Oct 26, 2020 · sourcery-ai-bot
diff --git a/bandit_rl_implementations/ContextualBandits/CBThompsonSampling_LogisticReg.py b/bandit_rl_implementations/ContextualBandits/CBThompsonSampling_LogisticReg.py
@@ -100,12 +100,12 @@ def get_y(self,x,w):
 	ax5 = fig.add_subplot(433)
 
 	alphas = [1,2,0.5]
+	T = 500
+	N = 10 #number of batches
+	versions = 3 #or the number of arms of the bandit
+	l=0.1 #lambda
+	X_size = 3
 	for a in alphas:
-		T = 500
-		N = 10 #number of batches
-		versions = 3 #or the number of arms of the bandit
-		l=0.1 #lambda
-		X_size = 3
 		#global regret
 		regret=0
 		# The set of past observations is made of triplets (x_i,a_i,r_i), so the dimension of the observation is-
@@ -157,7 +157,7 @@ def get_y(self,x,w):
 		plt.plot(np.linspace(0,T,len(regrets)),regrets, color='r',label='Cumulative Regret')
 		plt.subplot(4,3,k+9)
 		plt.plot(np.linspace(0,T,len(perc_regrets)),perc_regrets, color='orange',label='% Regret')
-		#ax.scatter(np.linspace(0,T,len(chosen_rewards)),chosen_rewards)
+			#ax.scatter(np.linspace(0,T,len(chosen_rewards)),chosen_rewards)
 	ax1.set_ylabel('Loss')
 	ax1.set_xlabel('Time')
 	ax1.legend()

diff --git a/bandit_rl_implementations/EmailMAB/Bandits_email_version.py b/bandit_rl_implementations/EmailMAB/Bandits_email_version.py
@@ -11,7 +11,7 @@ def __init__(self,versions=None, prior = None):					#Taking in version set and p
 		self.trials = np.zeros((len(versions),), dtype = int)
 		self.successes = np.zeros_like(self.trials)
 		self.versions = versions
-		if prior == None:
+		if prior is None:
 			self.prior = [(1.0,1.0) for i in range(len(versions))]
 
 	def add_data(self, version_num, success):
@@ -38,7 +38,7 @@ def personalize(self):
 versions = ['Survey','Brief','Acknowledgement']
 trials_in = 1000
 scores = [0,0,0]
-tried_outputs = [0,0,0] 
+tried_outputs = [0,0,0]
 params = []
 
 # Plotting the initial prior graphs
@@ -49,17 +49,17 @@ def personalize(self):
 	plt.title(versions[i] + '- prior')
 	plt.xlim([0,1])
 
-# Now creating the posterior distribution as data is added. 
-for trial in range(trials_in):
+# Now creating the posterior distribution as data is added.
+for _ in range(trials_in):
 	e = email_mooclet(versions,prior)
 	input_version = np.random.randint(len(versions))		#Choosing a version at random
 	tried_outputs[input_version] +=1	
 	#e.add_data(input_version,np.random.randint(2))		# Un-comment this to add successes randomly				
 	#e.add_data(input_version,(np.random.choice(np.arange(len(versions)),p = [0.6,0.1,0.3])== input_version)) #Uncomment this to add successes based on a probability distr to simulate actual patterns	
 	result,x,prior = e.personalize()
 	scores[result]+=1	
-	
-		   
+
+
 print(scores)													# Checking how many times each version won the sampling contest
 print(tried_outputs)											# Checking how many times each version was chosen to add a success to
 for i in range(len(versions)):									# As one particular version gets chosen more, it's probability of

diff --git a/louie_experiments/LinUCB.py b/louie_experiments/LinUCB.py
@@ -39,11 +39,7 @@ def calculate_linucb_single_bandit(source, num_actions, dest, models = None, for
     :param dest: outfile for printing the chosen actions and received rewards.
     :param forced: Optional, indicates to process only up to a certain time step or force take specified actions.
     '''
-    # number of trials used to compute expectation stats
-    # set to small value when debugging for faster speed
-    num_trials_prob_best_action = int(1e4)
-
-    if models == None:
+    if models is None:
         models = [RLogReg(D = NUM_FEATURES, Lambda = 1) for _ in range(num_actions)]
 
     with open(source, newline='') as inf, open(dest, 'w', newline='') as outf:
@@ -55,17 +51,20 @@ def calculate_linucb_single_bandit(source, num_actions, dest, models = None, for
         writer = csv.DictWriter(outf, fieldnames=field_names_out)
         writer.writeheader()
 
-        sample_number = 0
         cumulative_sample_regret = 0
         cumulative_expected_regret = 0
 
         chosen_actions = []
-        
+
         alpha = 2
 
-        for row in reader:
-            sample_number += 1
+        # TODO: compute expected regret for LinUCB
+        expected_regret = 0
+        # number of trials used to compute expectation stats
+        # set to small value when debugging for faster speed
+        num_trials_prob_best_action = int(1e4)
 
+        for sample_number, row in enumerate(reader, start=1):
             # get context features
             context = get_context(row)
 
@@ -79,7 +78,7 @@ def calculate_linucb_single_bandit(source, num_actions, dest, models = None, for
                 # take forced action if requested
                 action = forced.actions[sample_number - 1]
 
-            
+
             # only return action chosen up to specified time step
             if forced.time_step > 0 and sample_number <= forced.time_step:
                 chosen_actions.append(action)
@@ -93,10 +92,7 @@ def calculate_linucb_single_bandit(source, num_actions, dest, models = None, for
             models[action].update_posterior(context, 2 * reward - 1)
 
             # copy the input data to output file
-            out_row = {}
-
-            for i in range(len(reader.fieldnames)):
-                out_row[reader.fieldnames[i]] = row[reader.fieldnames[i]]
+            out_row = {fieldname: row[fieldname] for fieldname in reader.fieldnames}
 
             ''' write performance data (e.g. regret) '''
             optimal_action = int(row[HEADER_OPTIMALACTION]) - 1
@@ -115,16 +111,14 @@ def calculate_linucb_single_bandit(source, num_actions, dest, models = None, for
             # The oracle always chooses the best arm, thus expected reward
             # is simply the probability of that arm getting a reward.
             optimal_expected_reward = true_probs[optimal_action] * num_trials_prob_best_action
-
-            # TODO: compute expected regret for LinUCB
-            expected_regret = 0
+
             cumulative_expected_regret += expected_regret
 
             out_row[H_ALGO_REGRET_EXPECTED] = expected_regret
             out_row[H_ALGO_REGRET_EXPECTED_CUMULATIVE] = cumulative_expected_regret
 
             writer.writerow(out_row)
-        
+
         return chosen_actions, models
 
 

diff --git a/louie_experiments/data_reader.py b/louie_experiments/data_reader.py
@@ -38,7 +38,7 @@ def read_avg_regret(source, switch_time_step):
                 pos_regret.append(regret)
             all_regret.append(regret)
 
-    if len(pre_regret) == 0:
+    if not pre_regret:
         # some algorithms are oblivious to immediate data
         # so does not have regrets prior to switch
         pre_regret = [sys.maxsize]

diff --git a/louie_experiments/driver_gaussian_rewards_two_bandits.py b/louie_experiments/driver_gaussian_rewards_two_bandits.py
@@ -221,6 +221,7 @@ def calculate_bandits(probs_matrix, cc):
 See generate_gaussian_rewards method for more explanations
 """
 
+
 # Different correlation values
 c_list = [-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1]
 
@@ -258,6 +259,7 @@ def calculate_bandits(probs_matrix, cc):
 
 
 
+num_algorithms = 12  # UPDATE THIS IF NEW ALGORITHMS ARE ADDED OR EXISTING ONES REMOVED !!!
 # Generate average graph across all samples
 # Average result across different samples within each time step and each correlation value
 for t in time_steps:
@@ -549,7 +551,6 @@ def calculate_bandits(probs_matrix, cc):
                            vertical_line=t)
 
     """Writing out average data to file"""
-    num_algorithms = 12  # UPDATE THIS IF NEW ALGORITHMS ARE ADDED OR EXISTING ONES REMOVED !!!
     # write table data
     with open(table_file.format(t), 'w', newline='') as tfp:
         tfcsv = csv.writer(tfp, delimiter=',')
@@ -573,9 +574,12 @@ def calculate_bandits(probs_matrix, cc):
             'Delayed-All', 'Delayed-Before', 'Delayed-After', \
             'Stderr-Delayed-All', 'Stderr-Delayed-Before', 'Stderr-Delayed-After'] \
             for _ in range(num_algorithms)]
-        header = []
-        header.append(header1)
-        header.append(['Correlation'] + [item for sublist in header2 for item in sublist])
+        header = [
+            header1,
+            ['Correlation']
+            + [item for sublist in header2 for item in sublist],
+        ]
+
         tfcsv.writerows(header)
         tfcsv.writerows(csv_avg_data)
         if args.writeAllData:

diff --git a/louie_experiments/driver_two_bandits_contextual.py b/louie_experiments/driver_two_bandits_contextual.py
@@ -80,9 +80,9 @@
 
 graph_title = 'Cumulative Regret from Single Sample as a function of action timestep'
 
-for t in time_steps:
-    max_step_to_plot = num_rows
+max_step_to_plot = num_rows
 
+for t in time_steps:
     imm_random = []
     imm_thompson = []
     imm_epsilon = []

diff --git a/louie_experiments/epsilonGreedyPolicy.py b/louie_experiments/epsilonGreedyPolicy.py
@@ -46,38 +46,36 @@ def calculateEpsilonGreedyPolicy(source, dest, eps=0.1):
     :param dest: The output destination dataset.
     :param eps: Epsilon parameter.
     '''
-    numActions = 3
-    numMooclets = 3
     with open(source, newline='') as inf, open(dest, 'w', newline='') as outf:
         reader = csv.DictReader(inf)
         fieldNamesOut = reader.fieldnames[0:3]
-        
+
         #output the conditions chosen
         fieldNamesOut.append('MOOClet1')
         fieldNamesOut.append('MOOClet2')
         fieldNamesOut.append('MOOClet3')
-        
+
         #output our samples drawn
         fieldNamesOut.append('RewardMOOClet1')
         fieldNamesOut.append('RewardMOOClet2')
         fieldNamesOut.append('RewardMOOClet3')
 
         writer = csv.DictWriter(outf, fieldnames=fieldNamesOut)
         writer.writeheader()
-        sampleNumber = 0
-        for row in reader:
-            sampleNumber += 1
+        numActions = 3
+        numMooclets = 3
+        for sampleNumber, row in enumerate(reader, start=1):
             #get the user vars
             ageQuartile = int(row['agequartilesUSER']);
             #user 0 instead of -1 for age quartiles
             if ageQuartile==-1:
               ageQuartile=0;
-            
+
             nDaysAct = int(row['ndaysactUSER']);
-                
+
             #choose a random action
             actions = []
-            for i in range(numMooclets):
+            for _ in range(numMooclets):
                 a, p = getEpsilonGreedyAction(eps, numActions, constant_policy)
                 actions.append(a)
 

diff --git a/louie_experiments/gaussian_reward.py b/louie_experiments/gaussian_reward.py
@@ -109,11 +109,15 @@ def load_matrix_from_table(table_file, num_algos = 9, num_correlations = 9):
                              Set to 9 by default because len([-1:1:0.25]) = 9
     '''
     reader = csv.reader(open(table_file, "r"), delimiter = ',')
-    x = []
-    for i in range(2 + num_correlations):
-        x.append(next(reader))
-    table = np.reshape(np.array([np.array(m[1:]) for m in x[2:]]).T, [num_algos,6,num_correlations]).astype(float).swapaxes(1,2)
-    return table
+    x = [next(reader) for _ in range(2 + num_correlations)]
+    return (
+        np.reshape(
+            np.array([np.array(m[1:]) for m in x[2:]]).T,
+            [num_algos, 6, num_correlations],
+        )
+        .astype(float)
+        .swapaxes(1, 2)
+    )
 
 
 def load_cumulative_regret_matrix(path, out_pickle_file):
@@ -153,8 +157,6 @@ def load_cumulative_regret_matrix(path, out_pickle_file):
                 if int(row[HEADER_SAMPLENUMBER]) == t_switch:
                     cum_regret_switch = cum_regret
                 cum_regret_final = cum_regret
-                pass
-            pass
         algorithm = file_meta[1]
         t_switch = file_meta[2]
         correlation = file_meta[3]
@@ -189,10 +191,8 @@ def split_table_file(table_file):
     with open(table_file) as tf:
         with open('average_regret_table.csv', 'w') as avgf:
             with open('all_regret_table.csv', 'w') as allf:
-                index = 0
                 allf.write('algorithm,bandit type,t,correlation,sample,run,{}\n'.format(','.join([str(i + 1) for i in range(240)])))
-                for line in tf:
-                    index += 1
+                for index, line in enumerate(tf, start=1):
                     if index <= 11:
                         avgf.write(line)
                     else:
@@ -214,7 +214,7 @@ def create_6D_MVN_file(path, output_path):
     '''
     num_actions = 3
     files_list = glob.glob(path + os.sep + "gauss_single_bandit_input*.csv")
-    
+
     name_to_prob_dict = {}
     for file_path in files_list:
         file_name = os.path.basename(file_path)
@@ -234,7 +234,7 @@ def create_6D_MVN_file(path, output_path):
             for row in reader:
                 probs = [float(row[HEADER_TRUEPROB.format(a + 1)]) for a in range(num_actions)]
                 break # just need to read the first row since reward probs are constant over t
-            
+
             name = (correlation, sample)
 
             if name not in name_to_prob_dict:
@@ -248,9 +248,16 @@ def create_6D_MVN_file(path, output_path):
     with open(output_path, 'w', newline='') as outf:
         writer = csv.writer(outf, delimiter=',')
         data = []
-        header = ['Correlation', 'Sample']
-        header.extend(['Immediate-Prob-Arm-{}'.format(a + 1) for a in range(num_actions)])
-        header.extend(['Delayed-Prob-Arm-{}'.format(a + 1) for a in range(num_actions)])
+        header = [
+            'Correlation',
+            'Sample',
+            *[
+                'Immediate-Prob-Arm-{}'.format(a + 1)
+                for a in range(num_actions)
+            ],
+            *['Delayed-Prob-Arm-{}'.format(a + 1) for a in range(num_actions)],
+        ]
+
         data.append(header)
         for k,v in name_to_prob_dict.items():
             row = [k[0], k[1]] + list(v.astype(str))