-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
109 lines (97 loc) · 4.68 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from unityagents import UnityEnvironment
import numpy as np
import torch
from collections import deque
import matplotlib.pyplot as plt
import os as os
import sys
import config as config
from agent import DPG
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
hyper_params = config.Configuration()
hyper_params.process_CLI(sys.argv[1:])
env = UnityEnvironment(file_name=hyper_params.reacher_location, worker_id=1)
hyper_params.process_env(env)
num_agents = hyper_params.num_agents
action_size = hyper_params.action_size
brain_name = hyper_params.brain_name
n_episodes = hyper_params.n_episodes
hyper_params.update_every = 3
solution = 30
solution_found = False
# create DPG Actor/Critic Agent
agent = DPG(hyper_params)
scores = [] # list containing scores from each episode
scores_window = deque(maxlen=100) # last 100 scores
agent_scores = np.zeros(num_agents) # initialize the score (for each agent)
frames = 0
log_file = open('train.log','w')
# initialize the buffer with random actions to speed up training later
env_info = env.reset(train_mode=True)[brain_name] # reset the environment
states = env_info.vector_observations # get the current state of each agent
for warm_up_it in range(0, 3000):
actions = np.random.randn(num_agents,action_size).clip(-1,1)
env_info = env.step(actions)[brain_name] # send all actions to tne environment
next_states = env_info.vector_observations # get next state (for each agent)
rewards = env_info.rewards # get reward (for each agent)
dones = env_info.local_done # see if episode finished
agent.memory.add(states, actions, rewards, next_states, dones)
states = next_states # roll over states to next time step
if np.any(dones): # exit loop if episode finished
env_info = env.reset(train_mode=True)[brain_name] # reset the environment
states = env_info.vector_observations # get the current state of each agent
for i_episode in range(1, n_episodes+1):
env_info = env.reset(train_mode=True)[brain_name] # reset the environment
states = env_info.vector_observations # get the current state of each agent
agent_scores = np.zeros(num_agents)
frames = 0
while True:
actions = agent.act(states)
env_info = env.step(actions)[brain_name] # send all actions to tne environment
next_states = env_info.vector_observations # get next state (for each agent)
rewards = env_info.rewards # get reward (for each agent)
dones = env_info.local_done # see if episode finished
agent.step(states, actions, rewards, next_states, dones)
agent_scores += rewards # update the score (for each agent)
states = next_states # roll over states to next time step
frames = frames+1
if frames % 10 == 0:
print('\rEpisode {}\t Frame: {:4}/1000 \t Score: {:.2f}'.format(i_episode, frames, np.mean(agent_scores)), end="")
if np.any(dones): # exit loop if episode finished
break
scores.append(np.mean(agent_scores)) # store episodes mean reward over agents
scores_window.append(np.mean(agent_scores)) # save most recent score
if i_episode % 10 == 0:
msg = str('\rEpisodes: {}\tAverage Score: {:.2f}\t\t Score: {:.2f}\t'.format(i_episode, np.mean(scores_window),np.mean(agent_scores)))
print(msg)
print(msg,file=log_file)
log_file.flush()
if np.mean(scores_window)>=solution and solution_found == False:
msg = str('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, np.mean(scores_window)))
print(msg)
print(msg,file=log_file)
log_file.flush()
solution_found = True
solution_num_episodes = i_episode-100
log_file.close()
print('\nEnvironment solved in {:d} episodes!'.format(solution_num_episodes))
# save model if requested
if (hyper_params.model_dir) :
if not os.path.exists(hyper_params.model_dir):
os.makedirs(hyper_params.model_dir)
agent.save_models(hyper_params.model_dir)
#plot results
plt.figure()
plt.title('Mean Score across Agents per Episode')
plt.xlabel("Episodes")
plt.ylabel("Mean Score")
episodes=range(len(scores))
plt.plot(episodes, scores)
plt.legend(loc='lower right')
hyper_params.plt_file = "results"
if (hyper_params.plt_file):
plt.savefig(hyper_params.plt_file)
plt.show()
else:
plt.show()
env.close()