# Continuous Control Run

---

In this notebook, you can run a trained Agents in the Reacher. Unity ML-Agents 

### 1. Start the Environment

We begin by importing the necessary packages.  If the code cell below returns an error, please revisit the project instructions to double-check that you have installed [Unity ML-Agents](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Installation.md) and [NumPy](http://www.numpy.org/).

Select which environment you want to start:
env = UnityEnvironment(file_name='Reacher_single.app')
env = UnityEnvironment(file_name='Reacher_20.app')

In [None]:
from unityagents import UnityEnvironment
import numpy as np


env = UnityEnvironment(file_name='Reacher_20.app')

In [None]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

## 2. Load a trained agent
Pick your favorite agent.
You can choose from the checkpoint folder

In [None]:
import os.path

def restore_agent(actor_name, filepath_local_actor, filepath_local_critic, filepath_target_actor, filepath_target_critic):
    # function to read and load saved weights into agent networks
    
    checkpoint_local_actor = torch.load(filepath_local_actor, map_location=torch.device('cpu'))
    checkpoint_local_critic = torch.load(filepath_local_critic, map_location=torch.device('cpu'))
    checkpoint_target_actor = torch.load(filepath_target_actor, map_location=torch.device('cpu'))
    checkpoint_target_critic = torch.load(filepath_target_critic, map_location=torch.device('cpu'))
    
    if actor_name == 'ddpg':
        loaded_agent = Agent(state_size, action_size, random_seed = 33)
    elif actor_name == 'td3':
        loaded_agent = Agent(state_size, action_size, random_seed = 33, policy_noise=0.2)
    
    loaded_agent.actor_local.load_state_dict(checkpoint_local_actor)
    loaded_agent.actor_target.load_state_dict(checkpoint_target_actor)
    loaded_agent.critic_local.load_state_dict(checkpoint_local_critic)
    loaded_agent.critic_target.load_state_dict(checkpoint_target_critic)
    
    return loaded_agent

### 3. Let's run

Note you must select the train_mode = False

In [None]:
# reset the environment
env_info = env.reset(train_mode=False)[brain_name]

# number of agents
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))
print('The state for the first agent looks like:', states[0])

## TD3 
Here i load a TD3 agent but you can choose which one you prefer

In [None]:
from actors.td3_actor import Agent
from collections import deque
import torch

agent_name = 'checkpoints/agent_td3_20'
critic_name = 'checkpoints/critic_td3_20'

local_actor_path = agent_name+'_ckpt_local.pth'
target_actor_path = agent_name+'_ckpt_target.pth'
local_critic_path = critic_name+'_ckpt_local.pth'
target_critic_path = critic_name+'_ckpt_target.pth'

        
# if checkpoint exists we load the agent
if os.path.isfile(local_actor_path):
    agent = restore_agent('td3', local_actor_path, local_critic_path, target_actor_path, target_critic_path)
    print("Agent loaded.") 
else:
    print("Error no agent with the specified name")

In [None]:
env_info = env.reset(train_mode=False)[brain_name]     # reset the environment    
states = env_info.vector_observations                  # get the current state (for each agent)
scores = np.zeros(num_agents)                          # initialize the score (for each agent)
while True:
    actions = agent.act(states, add_noise=False)             
    env_info = env.step(actions)[brain_name]           # send all actions to tne environment
    next_states = env_info.vector_observations         # get next state (for each agent)
    rewards = env_info.rewards                         # get reward (for each agent)
    dones = env_info.local_done                        # see if episode finished
    scores += env_info.rewards                         # update the score (for each agent)
    states = next_states                               # roll over states to next time step
    if np.any(dones):                                  # exit loop if episode finished
        break
print('Total score (averaged over agents) this episode: {}'.format(np.mean(scores)))

In [None]:
env.close()