# Evaluate Agent on Unity Environment

---

## Start the Environment

Below assumes that one has followed the instruction on the README file such that the Unity environment is ready.

In [None]:
from unityagents import UnityEnvironment
import numpy as np

env = UnityEnvironment(file_name="Tennis.app")

Environments contain **_brains_** which are responsible for deciding the actions of their associated agents. Here we check for the first brain available, and set it as the default brain we will be controlling from Python.

In [None]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

## Run the Agents

Specify the saved models to test.

In [None]:
actor_model_path = "actor.pt"
critic_model_path = "critic.pt"

Run below to see the agents interact with the Unity environment.

In [None]:
import torch
from maddpg import Controller

seed = 69

env_info = env.reset(train_mode=False)[brain_name]     # reset the environment
state_size = len(env_info.vector_observations[1])      # get state size
action_size = brain.vector_action_space_size           # get action size
num_agents = len(env_info.agents)                      # number of agents
episodes = 21                                          # number of episodes

# initialize the algorithm controller and networks
controller = Controller(state_size, action_size, seed) 
controller.actor_local.load_state_dict(torch.load(actor_model_path, map_location=lambda storage, loc: storage))
controller.critic_local.load_state_dict(torch.load(critic_model_path, map_location=lambda storage, loc: storage))

for i_episode in range(1, episodes):
    scores = np.zeros(num_agents)                          # initialize the score (for each agent)
    env_info = env.reset(train_mode=False)[brain_name]     # reset the environment
    states = env_info.vector_observations                  # get the current state (for each agent)
    
    while True:
        actions = controller.act(states, add_noise=False)  # select an action (for each agent)
        env_info = env.step(actions)[brain_name]           # send all actions to the environment
        next_states = env_info.vector_observations         # get next state (for each agent)
        rewards = env_info.rewards                         # get reward (for each agent)
        dones = env_info.local_done                        # see if episode finishes
        scores += rewards                                  # update the score (for each agent)
        states = next_states                               # roll over states to next time step
        if np.any(dones):                                  # exit loop if episode finishes
            break

    print('Total score (max over agents): {} for episode {}'.format(np.amax(scores), i_episode))