# Run with trained model

In [1]:
from unityagents import UnityEnvironment
import numpy as np

env = UnityEnvironment(file_name='Reacher_Linux_NoVis_20/Reacher.x86_64')

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))
print('The state for the first agent looks like:', states[0])

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_size -> 5.0
		goal_speed -> 1.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


Number of agents: 20
Size of each action: 4
There are 20 agents. Each observes a state with length: 33
The state for the first agent looks like: [ 0.00000000e+00 -4.00000000e+00  0.00000000e+00  1.00000000e+00
 -0.00000000e+00 -0.00000000e+00 -4.37113883e-08  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00 -1.00000000e+01  0.00000000e+00
  1.00000000e+00 -0.00000000e+00 -0.00000000e+00 -4.37113883e-08
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  5.75471878e+00 -1.00000000e+00
  5.55726624e+00  0.00000000e+00  1.00000000e+00  0.00000000e+00
 -1.68164849e-01]


In [2]:
import sys, platform, json
import torch
import random
import numpy as np

from scores import Scores
from ddpg_agent import Agent
from unityagents import UnityEnvironment

In [3]:
hparams = {
    "output": "reacher_result",
    "seed": 0,
    "epoch": 1000,
    "scores": {
        "expectation": 30,
        "window_size": 100,
        "check_solved": True,
    },
    "t_max": 1000,
    "buffer_size":1000000,
    "batch_size":128,
    "gamma":0.99,
    "tau":1e-3,
    "lr": { 
        "actor":1e-4,
        "critic":1e-4
    },
    "weight_decay": 0,

    "learn_per_step": 20,
    "update_times": 10,
    
    "hidden_layers": {
        "actor": [ 256, 128 ],
        "critic": [ 256, 128 ],
    }
}


In [4]:
# size of each action
action_size = brain.vector_action_space_size
states = env_info.vector_observations                  # get the current state (for each agent)
state_size = states.shape[1]

Agent.set_hparams(state_size, action_size, hparams)
agent = Agent(action_size)

Agent.load('reacher_result')

agents = []
scores = np.zeros(num_agents)
for i in range(num_agents):
    agents.append(Agent(action_size))

In [5]:
while True:
    actions = np.array([agents[i].act(states[i]) for i in range(num_agents)])

    env_info = env.step(actions)[brain_name]        # send the action to the environment
    next_states = env_info.vector_observations     # get the next state
    rewards = env_info.rewards                     # get the reward
    dones = env_info.local_done        

    states = next_states
    scores += rewards

    print('\rScore: {:.2f}\tmin: {:.2f}\tmax: {:.2f}'
              .format(np.mean(scores), np.min(scores), np.max(scores)), end="") 
    
    if np.any(dones):
        break
        
print("\nScores: {}".format(scores))

Score: 32.09	min: 25.83	max: 39.42
Scores: [29.41999934 33.58999925 30.01999933 39.41999912 31.58999929 28.23999937
 27.0599994  31.3799993  34.47999923 33.38999925 34.45999923 37.26999917
 32.44999927 30.78999931 31.85999929 33.95999924 25.82999942 34.34999923
 31.83999929 30.39999932]


In [6]:
env.close()