Skip to content

Commit

Permalink
Fixed bug in exp
Browse files Browse the repository at this point in the history
  • Loading branch information
luizth committed Oct 24, 2023
1 parent 6dfd601 commit ee90012
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 12 deletions.
11 changes: 7 additions & 4 deletions experiments/gtq_learning_exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ def run_experiment(self, r_id: int):
# instantiate learning agents as drivers
drivers = get_gtq_learning_agents(env, policy)

# learning rate
alpha = self.ALPHA

# sum of the average regret per OD pair (used to measure the averages through time)
# for each OD pair, it stores a tuple [w, x, y, z]
# - w the average real regret
Expand Down Expand Up @@ -106,13 +109,13 @@ def run_experiment(self, r_id: int):

# update strategy (Q table)
for d_id in drivers.keys():
drivers[d_id].update_strategy(obs_n[d_id], reward_n[d_id], info_n[d_id], alpha=self.ALPHA)
drivers[d_id].update_strategy(obs_n[d_id], reward_n[d_id], info_n[d_id], alpha=alpha)

# update global learning rate (alpha)
if self.ALPHA > self.MIN_ALPHA:
self.ALPHA = self.ALPHA * self.ALPHA_DECAY
if alpha > self.MIN_ALPHA:
alpha = alpha * self.ALPHA_DECAY
else:
self.ALPHA = self.MIN_ALPHA
alpha = self.MIN_ALPHA

# -- episode statistics
# -------------------------
Expand Down
11 changes: 7 additions & 4 deletions experiments/rmq_learning_exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ def run_experiment(self, r_id: int):
# instantiate learning agents as drivers
drivers = get_rmq_learning_agents(env, policy)

# learning rate
alpha = self.ALPHA

# sum of the average regret per OD pair (used to measure the averages through time)
# for each OD pair, it stores a tuple [w, x, y, z]
# - w the average real regret
Expand Down Expand Up @@ -98,13 +101,13 @@ def run_experiment(self, r_id: int):

# update strategy (Q table)
for d_id in drivers.keys():
drivers[d_id].update_strategy(obs_n_[d_id], reward_n[d_id], info_n[d_id], alpha=self.ALPHA)
drivers[d_id].update_strategy(obs_n_[d_id], reward_n[d_id], info_n[d_id], alpha=alpha)

# update global learning rate (alpha)
if self.ALPHA > self.MIN_ALPHA:
self.ALPHA = self.ALPHA * self.ALPHA_DECAY
if alpha > self.MIN_ALPHA:
alpha = alpha * self.ALPHA_DECAY
else:
self.ALPHA = self.MIN_ALPHA
alpha = self.MIN_ALPHA

# -- episode statistics
# -------------------------
Expand Down
11 changes: 7 additions & 4 deletions experiments/tq_learning_exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ def run_experiment(self, r_id: int):
# instantiate learning agents as drivers
drivers = get_tq_learning_agents(env, policy)

# learning rate
alpha = self.ALPHA

# sum of the average regret per OD pair (used to measure the averages through time)
# for each OD pair, it stores a tuple [w, x, y, z]
# - w the average real regret
Expand Down Expand Up @@ -100,13 +103,13 @@ def run_experiment(self, r_id: int):

# update strategy (Q table)
for d_id in drivers.keys():
drivers[d_id].update_strategy(obs_n[d_id], reward_n[d_id], info_n[d_id], alpha=self.ALPHA)
drivers[d_id].update_strategy(obs_n[d_id], reward_n[d_id], info_n[d_id], alpha=alpha)

# update global learning rate (alpha)
if self.ALPHA > self.MIN_ALPHA:
self.ALPHA = self.ALPHA * self.ALPHA_DECAY
if alpha > self.MIN_ALPHA:
alpha = alpha * self.ALPHA_DECAY
else:
self.ALPHA = self.MIN_ALPHA
alpha = self.MIN_ALPHA

# -- episode statistics
# -------------------------
Expand Down

0 comments on commit ee90012

Please sign in to comment.