In [1]:
from unityagents import UnityEnvironment
import numpy as np
import torch
from ddpg_agent import Agent

In [2]:
env = UnityEnvironment(file_name='path/to/Reacher_Linux_NoVis/Reacher.x86_64')

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_size -> 5.0
		goal_speed -> 1.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


In [3]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))
agent = Agent(state_size=state_size, action_size=action_size, random_seed=2)

Number of agents: 1
Size of each action: 4
There are 1 agents. Each observes a state with length: 33


In [4]:
def runagent(agent, episodes=5):
    for i_episode in range(episodes):
        env_info = env.reset(train_mode=False)[brain_name]     # reset the environment    
        states = env_info.vector_observations                  # get the current state (for each agent)
        scores = np.zeros(num_agents)                          # initialize the score (for each agent)
        while True:
            actions = np.random.randn(num_agents, action_size) # select an action (for each agent)
            actions = agent.act(states, add_noise=False)       # all actions between -1 and 1
            env_info = env.step(actions)[brain_name]           # send all actions to tne environment
            next_states = env_info.vector_observations         # get next state (for each agent)
            rewards = env_info.rewards                         # get reward (for each agent)
            dones = env_info.local_done                        # see if episode finished
            scores += env_info.rewards                         # update the score (for each agent)
            states = next_states                               # roll over states to next time step
            if np.any(dones):                                  # exit loop if episode finished
                break
        print('Episode: {} Score: {}'.format(i_episode, np.mean(scores)))

In [5]:
def load(agent, actor_file, critic_file):
    agent.actor_local.load_state_dict(torch.load(actor_file))
    agent.actor_target.load_state_dict(torch.load(actor_file))
    agent.critic_local.load_state_dict(torch.load(critic_file))
    agent.critic_target.load_state_dict(torch.load(critic_file))
    
load(agent, 'checkpoint_actor_solved_single.pth', 'checkpoint_critic_solved_single.pth')  

In [6]:
runagent(agent, episodes=1)

Episode: 0 Score: 36.94999917410314


In [7]:
env.close()