In [None]:
# --- Load Agents --- #
from agents.agent_ddpg.agent import DDPG_Agent

# --- Load Environments --- #
from environment.environment_loader import environment_loader

# --- Load Necessary --- #
from collections import deque
import numpy as np
from utilities.helper import flatten, load_previous


# --- Load Training --- #
from experiments.experiment import Experiment

%load_ext autoreload
%autoreload 2


# Load Environment

In [None]:
env, state_size, action_size, brain_name, num_agents = environment_loader (name="environment/Tennis.exe", no_graphics = False)

# Load Agent

In [None]:
from experiments.experiment_list import exp_config_ddpg
from agents.agent_ddpg.config import DDPG_AgentConfig

agent_config = DDPG_AgentConfig(ACTOR_H=[140,140], CRITIC_H=[140,140], LR_ACTOR=0.0001, LR_CRITIC=0.0001, OUTPUT_TYPE='vectors')
agent = DDPG_Agent(state_size, action_size, seed=1, config=agent_config)

In [None]:
agent = load_previous(agent = agent, filename="exp_23__agent_0",path="experiments/trained_agents/")

In [None]:
agents = [agent for _ in range(num_agents)]

# Run Episode

In [None]:
def test(agents, env, num_episode = 5, max_t = 1000):
    for i in range(num_episode):                               # play game for 5 episodes
        env_info = env.reset(train_mode=False)[brain_name]     # reset the environment    
        states = env_info.vector_observations                  # get the current state (for each agent)
        scores = np.zeros(num_agents)                          # initialize the score (for each agent)
        while True:
            #actions = np.random.randn(num_agents, action_size) # select an action (for each agent)
            actions = [agent.act(state) for agent, state in zip(agents, states)]
            actions = np.clip(actions, -1, 1)                  # all actions between -1 and 1
            env_info = env.step(actions)[brain_name]           # send all actions to tne environment
            next_states = env_info.vector_observations         # get next state (for each agent)
            rewards = env_info.rewards                         # get reward (for each agent)
            dones = env_info.local_done                        # see if episode finished
            scores += env_info.rewards                         # update the score (for each agent)
            states = next_states                               # roll over states to next time step
            if np.any(dones):                                  # exit loop if episode finished
                break
        print('Score (max over agents) from episode {}: {}'.format(i, np.max(scores)))

In [None]:
test(agents, env)
env.close()