# Udacity Deep Reinforcement Learning Nanodegree - Project 3: Collaboration and Competition

## Demonstrating the Trained Multi-Agents

In [1]:
import numpy as np
import torch
from unityagents import UnityEnvironment

from agent import Agent

In [2]:
params = {
    'buffer_size': 2e5,
    'batch_size': 256,
    'n_random_episodes': 500,
    'n_episodes': 2000,    
    'max_steps': 750,
    'update_step': 4,
    'solution_threshold': .5,
    'eval_window_length': 100,
    'num_agents': 2,
    'agent_seed': 33,
    'env_seed': 33,
    'buffer_seed': 33,
    'gamma': 0.95,
    'tau': 1e-2,
    'first_hidden_units': 256,
    'second_hidden_units': 256,
    'lr_actor': 1e-3,
    'lr_critic': 1e-3,
    'critic_weight_decay': 0,
    'add_noise': True,
    'noise_sigma': 0.2,
    'noise_scale_start': 2.0,
    'noise_scale_min': 0.2,
    'noise_scale_decay': 0.9995
}

In [3]:
env = UnityEnvironment(file_name="Tennis.app",
                       no_graphics=False)
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: TennisBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 8
        Number of stacked Vector Observation: 3
        Vector Action space type: continuous
        Vector Action space size (per agent): 2
        Vector Action descriptions: , 


In [4]:
env_info = env.reset(train_mode=False)[brain_name]
state_size = env_info.vector_observations[0].shape[0]
action_size = brain.vector_action_space_size

In [5]:
agent_0_actor_filename = 'agent_0_best_actor_e_1876.pth'
agent_0_critic_filename = 'agent_0_best_critic_e_1876.pth'
agent_1_actor_filename = 'agent_1_best_actor_e_1876.pth'
agent_1_critic_filename = 'agent_1_best_critic_e_1876.pth'

In [6]:
agents = {}
for num_agent in range(params['num_agents']):
    agents[num_agent] = Agent(state_size=state_size, action_size=action_size,
                              agent_no=num_agent, params=params)

In [7]:
agents[0].actor_local.load_state_dict(torch.load(agent_0_actor_filename))
agents[0].critic_local.load_state_dict(torch.load(agent_0_critic_filename))
agents[1].actor_local.load_state_dict(torch.load(agent_1_actor_filename))
agents[1].critic_local.load_state_dict(torch.load(agent_1_critic_filename))

In [8]:
env_info = env.reset(train_mode=False)[brain_name]
states = env_info.vector_observations
for t in range(500):
    actions = [agent.act(env_info.vector_observations[no_agent], add_noise=False)
               for no_agent, agent in agents.items()]
    actions = np.concatenate(actions, axis=0).reshape((params['num_agents'], action_size))
    env_info = env.step(actions)[brain_name]
    next_states = env_info.vector_observations
    states = next_states

In [9]:
env.close()