In [1]:
from collections import deque

import torch
import numpy as np

from models.actor import Actor
from models.critic import Critic
from agent import Agent

from unityagents import UnityEnvironment
import numpy as np

In [2]:
path = 'env/multiple/Reacher.app'

In [3]:
env = UnityEnvironment(file_name=path)

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_speed -> 1.0
		goal_size -> 5.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


In [4]:
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

In [5]:
env_info = env.reset(train_mode=True)[brain_name]

num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

states = env_info.vector_observations
state_size = states.shape[1]
print(f'There are {states.shape[0]} agents. Each observes a state with length: {state_size}')
print(f'The state for the first agent looks like: {states[0]}')

Number of agents: 20
Size of each action: 4
There are 20 agents. Each observes a state with length: 33
The state for the first agent looks like: [ 0.00000000e+00 -4.00000000e+00  0.00000000e+00  1.00000000e+00
 -0.00000000e+00 -0.00000000e+00 -4.37113883e-08  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00 -1.00000000e+01  0.00000000e+00
  1.00000000e+00 -0.00000000e+00 -0.00000000e+00 -4.37113883e-08
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  5.75471878e+00 -1.00000000e+00
  5.55726624e+00  0.00000000e+00  1.00000000e+00  0.00000000e+00
 -1.68164849e-01]


In [6]:
agent = Agent(state_size, action_size, 25, [400, 300], [400, 300])
n_episodes = 1000
max_t = 3000
print_every = 100

In [7]:
action = agent.act(states)
env_info = env.step(action)[brain_name]

In [8]:
start_actions = agent.act(env_info.vector_observations)

In [9]:
# import pdb

# %pdb

In [10]:
scores_deque = deque(maxlen=print_every)
scores = []

for i_episode in range(1, n_episodes+1):
    env_info = env.reset(train_mode=True)[brain_name]
    state = env_info.vector_observations
    agent.reset()
    score = np.zeros(num_agents)
    
    for t in range(max_t):
        actions = agent.act(state)
#         actions = np.random.randn(num_agents, action_size) # select an action (for each agent)
#         actions = np.clip(actions, -1, 1)   
        env_info = env.step(actions)[brain_name]
        next_state = env_info.vector_observations
        reward = env_info.rewards
        done = env_info.local_done
        agent.step(state, actions, reward, next_state, done, t)
        state = next_state
        score += env_info.rewards
        if np.any(done):
            break

    scores_deque.append(score)
    scores.append(score)
    print(f"\rEpisode {i_episode}\tAverage Score: {np.mean(scores_deque):.4f}", end="")
    torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
    torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pth')

    if i_episode % print_every == 0:
        print(f"\rEpisode {i_episode}\tAverage Score: {np.mean(scores_deque):.4f}")

Episode 3	Average Score: 1.1842

KeyboardInterrupt: 

In [None]:
env_info = env.reset(train_mode=True)[brain_name]

In [None]:
actions = agent.act(env_info.vector_observations)

In [19]:
actions

array([[0.24392034, 0.3472263 , 0.1881258 , 0.29108587],
       [0.24421814, 0.34785667, 0.18833965, 0.291107  ],
       [0.2426352 , 0.34679607, 0.18594633, 0.2881274 ],
       [0.2413065 , 0.3487538 , 0.18859857, 0.28772318],
       [0.24418184, 0.34718484, 0.18800409, 0.2904595 ],
       [0.24199042, 0.34663332, 0.1859146 , 0.28813893],
       [0.24218266, 0.3486035 , 0.18625158, 0.2899211 ],
       [0.24096113, 0.34852502, 0.18891758, 0.287672  ],
       [0.24149069, 0.34656134, 0.18874589, 0.28863767],
       [0.24025865, 0.34725702, 0.1850503 , 0.28806186],
       [0.24237677, 0.3467571 , 0.18601176, 0.28817174],
       [0.24115026, 0.34832227, 0.18574867, 0.2896075 ],
       [0.24174449, 0.3471652 , 0.18864496, 0.28788882],
       [0.24410841, 0.34805518, 0.18807338, 0.2908631 ],
       [0.2416726 , 0.34654933, 0.18860294, 0.28902555],
       [0.23976067, 0.34635547, 0.18517072, 0.28770882],
       [0.24133277, 0.34687412, 0.18884438, 0.2882663 ],
       [0.24152294, 0.3487299 ,

In [11]:
env.close()