Skip to content

Commit

Permalink
로그 기능 추가
Browse files Browse the repository at this point in the history
이제 모든 transition을 추적할 수 있음
  • Loading branch information
kuro11pow2 committed Oct 18, 2021
1 parent 82b061a commit a5889d5
Show file tree
Hide file tree
Showing 6 changed files with 1,744 additions and 135 deletions.
15 changes: 13 additions & 2 deletions src/algorithms/actorcritic_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,14 @@ def _episode_prepare(self):
n_action = self._env.action_space.n
self._algo = ActorCritic(n_state, n_action, self._algo_params)
self._score = 0.0
self._score_sum = 0.0

def _episode_sim(self, n_epi):
s = self._env.reset()
done = False
self._score = 0.0
n_step = 0

while not done:
for t in range(self._algo.n_rollout):
prob = self._algo.pi(torch.from_numpy(s).float())
Expand All @@ -31,10 +35,17 @@ def _episode_sim(self, n_epi):

if self._train:
self._algo.put_data((s,a,r/self._reward_scale,s_prime,done))

if self._save_step_log:
self._write_step_log(n_step, n_epi, s, a, r, done)

s = s_prime
self._score += r
n_step += 1

if done:
break
break

if self._train:
self._algo.train_net()

self._score_sum += self._score
17 changes: 12 additions & 5 deletions src/algorithms/ddqn_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ def _episode_prepare(self):
n_action = self._env.action_space.n
self._algo = DDQN(n_state, n_action, self._algo_params)
self._algo.update_net()

self._score = 0.0
self._score_sum = 0.0

def _episode_sim(self, n_epi):
s = self._env.reset()
done = False
self._score = 0.0
n_step = 0

if self._train:
self._algo.epsilon = max(0.01, self._algo.start_epsilon - 0.01*(n_epi/200))
Expand All @@ -36,19 +38,24 @@ def _episode_sim(self, n_epi):

if self._train:
self._algo.append_data((s,a,r/self._reward_scale,s_prime, done))

if self._save_step_log:
self._write_step_log(n_step, n_epi, s, a, r, done)

s = s_prime
self._score += r
n_step += 1

if done:
break


self._score_sum += self._score
if self._train and self._algo.buffer_size() > self._algo.n_train_start:
self._algo.train_net()

if n_epi % self._algo.update_interval==0:
self._algo.update_net()

def _print_log(self, n_epi):
super()._print_log(n_epi)
def _print_log(self, n_epi, avg_score):
super()._print_log(n_epi, avg_score)
print(f"n_buffer : {self._algo.buffer_size()}, "\
+ f"eps : {self._algo.epsilon*100:.1f}%")
17 changes: 12 additions & 5 deletions src/algorithms/dqn_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ def _episode_prepare(self):
n_action = self._env.action_space.n
self._algo = DQN(n_state, n_action, self._algo_params)
self._algo.update_net()

self._score = 0.0
self._score_sum = 0.0

def _episode_sim(self, n_epi):
s = self._env.reset()
done = False
self._score = 0.0
n_step = 0

if self._train:
self._algo.epsilon = max(0.01, self._algo.start_epsilon - 0.01*(n_epi/200))
Expand All @@ -36,19 +38,24 @@ def _episode_sim(self, n_epi):

if self._train:
self._algo.append_data((s,a,r/self._reward_scale,s_prime, done))

if self._save_step_log:
self._write_step_log(n_step, n_epi, s, a, r, done)

s = s_prime
self._score += r
n_step += 1

if done:
break


self._score_sum += self._score
if self._train and self._algo.buffer_size() > self._algo.n_train_start:
self._algo.train_net()

if n_epi % self._algo.update_interval==0:
self._algo.update_net()

def _print_log(self, n_epi):
super()._print_log(n_epi)
def _print_log(self, n_epi, avg_score):
super()._print_log(n_epi, avg_score)
print(f"n_buffer : {self._algo.buffer_size()}, "\
+ f"eps : {self._algo.epsilon*100:.1f}%")
7 changes: 6 additions & 1 deletion src/algorithms/reinforce_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,23 @@ def _episode_sim(self, n_epi):
s = self._env.reset()
done = False
self._score = 0.0
n_step = 0

while not done:
prob = self._algo(torch.from_numpy(s).float())
m = Categorical(prob)
a = m.sample()
s_prime, r, done, info = self._env.step(a.item())
s_prime, r, done, info = self._step_wrapper(self._env.step(a.item()))

if self._train:
self._algo.put_data((r, prob[a]))
if self._save_step_log:
self._write_step_log(n_step, n_epi, s, a.item(), r, done)

s = s_prime
self._score += r
n_step += 1

if done:
break

Expand Down
Loading

0 comments on commit a5889d5

Please sign in to comment.