Skip to content

Commit

Permalink
input and target arguments are switched for smooth_l1_loss function t…
Browse files Browse the repository at this point in the history
…o follow pytorch guide
  • Loading branch information
seungeunrho committed Jun 8, 2019
1 parent 8e91b98 commit 46f9b32
Show file tree
Hide file tree
Showing 6 changed files with 6 additions and 6 deletions.
2 changes: 1 addition & 1 deletion a3c.py
Expand Up @@ -71,7 +71,7 @@ def train(model, rank):
advantage = td_target - model.v(s_batch)
pi = model.pi(s_batch,softmax_dim=1)
pi_a = pi.gather(1,a_batch)
loss = -torch.log(pi_a) * advantage.detach() + F.smooth_l1_loss(td_target.detach(), model.v(s_batch))
loss = -torch.log(pi_a) * advantage.detach() + F.smooth_l1_loss(model.v(s_batch), td_target.detach())

optimizer.zero_grad()
loss.mean().backward()
Expand Down
2 changes: 1 addition & 1 deletion acer.py
Expand Up @@ -102,7 +102,7 @@ def train(model, optimizer, memory, on_policy=False):

loss1 = -rho_bar * torch.log(pi_a) * (q_ret - v)
loss2 = -correction_coeff * pi * torch.log(pi) * (q.detach()-v) # bias correction term
loss = loss1 + loss2.sum(1) + F.smooth_l1_loss(q_ret, q_a)
loss = loss1 + loss2.sum(1) + F.smooth_l1_loss(q_a, q_ret)

optimizer.zero_grad()
loss.mean().backward()
Expand Down
2 changes: 1 addition & 1 deletion actor_critic.py
Expand Up @@ -57,7 +57,7 @@ def train_net(self):

pi = self.pi(s, softmax_dim=1)
pi_a = pi.gather(1,a)
loss = -torch.log(pi_a) * delta.detach() + F.smooth_l1_loss(td_target.detach(), self.v(s))
loss = -torch.log(pi_a) * delta.detach() + F.smooth_l1_loss(self.v(s), td_target.detach())

self.optimizer.zero_grad()
loss.mean().backward()
Expand Down
2 changes: 1 addition & 1 deletion ddpg.py
Expand Up @@ -87,7 +87,7 @@ def train(mu, mu_target, q, q_target, memory, q_optimizer, mu_optimizer):
torch.tensor(done_mask_lst)

target = r + gamma * q_target(s_prime, mu_target(s_prime))
q_loss = F.smooth_l1_loss(target.detach(), q(s, a))
q_loss = F.smooth_l1_loss(q(s,a), target.detach())
q_optimizer.zero_grad()
q_loss.backward()
q_optimizer.step()
Expand Down
2 changes: 1 addition & 1 deletion dqn.py
Expand Up @@ -65,7 +65,7 @@ def train(q, q_target, memory, optimizer):
q_a = q_out.gather(1,a)
max_q_prime = q_target(s_prime).max(1)[0].unsqueeze(1)
target = r + gamma * max_q_prime * done_mask
loss = F.smooth_l1_loss(target, q_a)
loss = F.smooth_l1_loss(q_a, target)

optimizer.zero_grad()
loss.backward()
Expand Down
2 changes: 1 addition & 1 deletion ppo-lstm.py
Expand Up @@ -90,7 +90,7 @@ def train_net(self):

surr1 = ratio * advantage
surr2 = torch.clamp(ratio, 1-eps_clip, 1+eps_clip) * advantage
loss = -torch.min(surr1, surr2) + F.smooth_l1_loss(td_target.detach(), v_s)
loss = -torch.min(surr1, surr2) + F.smooth_l1_loss(v_s, td_target.detach())

self.optimizer.zero_grad()
loss.mean().backward(retain_graph=True)
Expand Down

0 comments on commit 46f9b32

Please sign in to comment.