Fixed bug in exp

ramos-ai · Oct 24, 2023 · ee90012 · ee90012
1 parent 6dfd601
commit ee90012
Show file tree

Hide file tree

Showing 3 changed files with 21 additions and 12 deletions.
diff --git a/experiments/gtq_learning_exp.py b/experiments/gtq_learning_exp.py
@@ -78,6 +78,9 @@ def run_experiment(self, r_id: int):
         # instantiate learning agents as drivers
         drivers = get_gtq_learning_agents(env, policy)
 
+        # learning rate
+        alpha = self.ALPHA
+
         # sum of the average regret per OD pair (used to measure the averages through time)
         # for each OD pair, it stores a tuple [w, x, y, z]
         # - w the average real regret
@@ -106,13 +109,13 @@ def run_experiment(self, r_id: int):
 
             # update strategy (Q table)
             for d_id in drivers.keys():
-                drivers[d_id].update_strategy(obs_n[d_id], reward_n[d_id], info_n[d_id], alpha=self.ALPHA)
+                drivers[d_id].update_strategy(obs_n[d_id], reward_n[d_id], info_n[d_id], alpha=alpha)
 
             # update global learning rate (alpha)
-            if self.ALPHA > self.MIN_ALPHA:
-                self.ALPHA = self.ALPHA * self.ALPHA_DECAY
+            if alpha > self.MIN_ALPHA:
+                alpha = alpha * self.ALPHA_DECAY
             else:
-                self.ALPHA = self.MIN_ALPHA
+                alpha = self.MIN_ALPHA
 
             # -- episode statistics
             # -------------------------

diff --git a/experiments/rmq_learning_exp.py b/experiments/rmq_learning_exp.py
@@ -70,6 +70,9 @@ def run_experiment(self, r_id: int):
         # instantiate learning agents as drivers
         drivers = get_rmq_learning_agents(env, policy)
 
+        # learning rate
+        alpha = self.ALPHA
+
         # sum of the average regret per OD pair (used to measure the averages through time)
         # for each OD pair, it stores a tuple [w, x, y, z]
         # - w the average real regret
@@ -98,13 +101,13 @@ def run_experiment(self, r_id: int):
 
             # update strategy (Q table)
             for d_id in drivers.keys():
-                drivers[d_id].update_strategy(obs_n_[d_id], reward_n[d_id], info_n[d_id], alpha=self.ALPHA)
+                drivers[d_id].update_strategy(obs_n_[d_id], reward_n[d_id], info_n[d_id], alpha=alpha)
 
             # update global learning rate (alpha)
-            if self.ALPHA > self.MIN_ALPHA:
-                self.ALPHA = self.ALPHA * self.ALPHA_DECAY
+            if alpha > self.MIN_ALPHA:
+                alpha = alpha * self.ALPHA_DECAY
             else:
-                self.ALPHA = self.MIN_ALPHA
+                alpha = self.MIN_ALPHA
 
             # -- episode statistics
             # -------------------------

diff --git a/experiments/tq_learning_exp.py b/experiments/tq_learning_exp.py
@@ -72,6 +72,9 @@ def run_experiment(self, r_id: int):
         # instantiate learning agents as drivers
         drivers = get_tq_learning_agents(env, policy)
 
+        # learning rate
+        alpha = self.ALPHA
+
         # sum of the average regret per OD pair (used to measure the averages through time)
         # for each OD pair, it stores a tuple [w, x, y, z]
         # - w the average real regret
@@ -100,13 +103,13 @@ def run_experiment(self, r_id: int):
 
             # update strategy (Q table)
             for d_id in drivers.keys():
-                drivers[d_id].update_strategy(obs_n[d_id], reward_n[d_id], info_n[d_id], alpha=self.ALPHA)
+                drivers[d_id].update_strategy(obs_n[d_id], reward_n[d_id], info_n[d_id], alpha=alpha)
 
             # update global learning rate (alpha)
-            if self.ALPHA > self.MIN_ALPHA:
-                self.ALPHA = self.ALPHA * self.ALPHA_DECAY
+            if alpha > self.MIN_ALPHA:
+                alpha = alpha * self.ALPHA_DECAY
             else:
-                self.ALPHA = self.MIN_ALPHA
+                alpha = self.MIN_ALPHA
 
             # -- episode statistics
             # -------------------------