# Trained agent
---
Watch a trained agent in the reacher environment.

In [1]:
from src.ddpg import AgentDDPG
from unityagents import UnityEnvironment
import matplotlib as mpl
import matplotlib.pyplot as plt
import json
import numpy as np
import os
import random
import time
import sys

Load the environment (choose either Reacher1 with one agent or Reacher20 with twenty agents).

In [2]:
# start the environment
env = UnityEnvironment(file_name="src/exec/Reacher1.app") # choose Reacher1.app or Reacher20.app
# get default brain (responsible for deciding agent actions)
brain_name = env.brain_names[0]
brain = env.brains[brain_name]
# examine state and action space
env_info = env.reset(train_mode=True)[brain_name]
action_size = brain.vector_action_space_size
state_size = brain.vector_observation_space_size
n_agents = len(env_info.agents)
print('Number of agents:', n_agents)
print('Action size:', action_size)
print('State size:', state_size)

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_size -> 5.0
		goal_speed -> 1.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


Number of agents: 1
Action size: 4
State size: 33


Load the trained agent parameters and saved weights.

In [3]:
seed = 123
with open("runs/params.json") as json_file:
    agentParams = json.load(json_file)
agent = AgentDDPG(env, seed, **agentParams)
agent.load_weights("runs")

Watch a trained agent for one episode.

In [4]:
# reset environment
env_info = env.reset(train_mode=False)[brain_name]
# get current state (for each agent)
states = env_info.vector_observations
# initialize score (for each agent)
score = 0
t_i = 0
while True:
    # select action (for each agent)
    actions = agent.act(states)
    # execute actions
    env_info = env.step(actions)[brain_name]
    # get next state, reward, done (for each agent)
    next_states = env_info.vector_observations
    rewards = env_info.rewards
    dones = env_info.local_done
    # update scores and states (for each agent)
    score += rewards[0]
    states = next_states
    t_i += 1
    print("\rtime step {} reward {:.2f} ".format(t_i, rewards[0]), end="")
    sys.stdout.flush()
    if np.any(dones):
        break
print('\nTotal score (averaged over agents) this episode: {}'.format(score))

time step 1001 reward 0.04 
Total score (averaged over agents) this episode: 36.11999919265509


Close the environment when done.

In [5]:
env.close()