## Test Reacher Notebook

The essential steps to this are:

* Define Unity environment
* Get the default brain
* Import DDPG Agent
* Load in checkpoint
* Run testing loop

In [1]:
from unityagents import UnityEnvironment
import numpy as np
from ddpg_agent import Agents
import torch
from collections import deque
import matplotlib.pyplot as plt

In [2]:
env = UnityEnvironment(file_name="Reacher.app", worker_id=10)

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_speed -> 1.0
		goal_size -> 5.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


In [3]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

In [6]:
import os

# reset the environment
env_info = env.reset(train_mode=False)[brain_name]

# number of agents
num_agents = len(env_info.agents)

# size of each action
action_size = brain.vector_action_space_size

# size of each observation
states = env_info.vector_observations
state_size = states.shape[1]

# Initialise Agents
agent = Agents(state_size=state_size, action_size=action_size, num_agents=num_agents, random_seed=1234)

# Also load in checkpoints
agent.actor_local.load_state_dict(torch.load(os.path.join('checkpoints', 'checkpoint_actor.pth'), map_location='cpu'))
agent.critic_local.load_state_dict(torch.load(os.path.join('checkpoints', 'checkpoint_critic.pth'), map_location='cpu'))

Using default hyperparameters
{'batch_size': 64,
 'buffer_size': 1000000,
 'fc1_units': 400,
 'fc2_units': 300,
 'gamma': 0.99,
 'lr_actor': 0.0001,
 'lr_critic': 0.001,
 'num_iters_learn': 10,
 'tau': 0.001,
 'update_every': 20,
 'weight_decay': 0.0}


<All keys matched successfully>

In [7]:
## Loop to iterate over a single episode
env_info = env.reset(train_mode=False)[brain_name]     # reset the environment    
states = env_info.vector_observations                  # get the current state (for each agent)
scores = np.zeros(num_agents)                          # initialize the score (for each agent)
num_iter = 0
while True:
    num_iter += 1
    actions = agent.act(states)                        # select an action
    actions = np.clip(actions, -1, 1)                  # all actions between -1 and 1
    env_info = env.step(actions)[brain_name]           # send all actions to the environment
    next_states = env_info.vector_observations         # get next state (for each agent)
    rewards = np.array(env_info.rewards)               # get reward (for each agent)
    dones = np.array(env_info.local_done, dtype=np.bool)  # see if episode finished
    scores += env_info.rewards                         # update the score (for each agent)
    states = next_states                               # roll over states to next time step
    if np.any(dones):                                  # exit loop if episode finished
        break

print('Total score (averaged over agents) this episode: {}'.format(np.mean(scores)))
print(f'Number of iterations: {num_iter}')

Total score (averaged over agents) this episode: 24.675499448459597
Number of iterations: 1001
