### Import necessary modules

In [1]:
from unityagents import UnityEnvironment
import numpy as np

import torch
from collections import deque 
import matplotlib.pyplot as plt 
%matplotlib inline

from ddpg_agent import Agent

### Setup environment

In [2]:
# Load Environment
env = UnityEnvironment(file_name="./Tennis.app")   # OSX
# env = UnityEnvironment(file_name="./Tennis_Linux/Tennis.x86_64") # Linux

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=False)[brain_name]   # Note : train mode is turned off

# number of agents 
num_agents = len(env_info.agents)

# size of each action
action_size = brain.vector_action_space_size

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: TennisBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 8
        Number of stacked Vector Observation: 3
        Vector Action space type: continuous
        Vector Action space size (per agent): 2
        Vector Action descriptions: , 


### Create agent

In [3]:
random_seed = 20
agent = Agent(state_size=state_size, action_size=action_size, random_seed=random_seed, num_agents=num_agents)

### Load trained network weights

In [4]:
agent.actor_local.load_state_dict(torch.load('{}'.format("solved_checkpoint_actor.pth")))
agent.critic_local.load_state_dict(torch.load('{}'.format("solved_checkpoint_critic.pth")))

### Test trained agent

In [5]:
n_episodes = 3
max_t = 1000

for i_episode in range(1, n_episodes+1):
    
    agent.reset()
    episode_scores = np.zeros(num_agents)
    
    for t in range(max_t):
        actions = agent.act(states, add_noise=False)  # noise is turned off in testing a trained agent
        env_info = env.step(actions)[brain_name]
        next_states = env_info.vector_observations
        rewards = env_info.rewards
        dones = env_info.local_done
        # agent.step(t, states, actions, rewards, next_states, dones)  # agent step only required in training 
        states = next_states
        episode_scores += rewards
        
        if np.any(dones):
            break
        avg_episode_score = np.max(episode_scores)
        
    print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, avg_episode_score))

Episode 1	Average Score: 2.60
Episode 2	Average Score: 2.60
Episode 3	Average Score: 2.60


In [6]:
env.close()