In [1]:
!pip -q install ./python

[31mtensorflow 1.7.1 has requirement numpy>=1.13.3, but you'll have numpy 1.12.1 which is incompatible.[0m
[31mipython 6.5.0 has requirement prompt-toolkit<2.0.0,>=1.0.15, but you'll have prompt-toolkit 3.0.2 which is incompatible.[0m


In [2]:
from unityagents import UnityEnvironment
import numpy as np
import torch
import os

In [3]:
env = UnityEnvironment(file_name="/data/Tennis_Linux_NoVis/Tennis")

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: TennisBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 8
        Number of stacked Vector Observation: 3
        Vector Action space type: continuous
        Vector Action space size (per agent): 2
        Vector Action descriptions: , 


In [4]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

In [5]:
env_info = env.reset(train_mode=False)[brain_name]

# number of agents
num_agents = len(env_info.agents)                      
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

Number of agents: 2
Size of each action: 2


In [6]:
from ddpg.maddpg_agent import maddpg_agent

maddpg = maddpg_agent()

In [7]:
# Loading the trained weights
def load(dir):    
        for i in range(num_agents):
            maddpg.agents[i].actor_local.load_state_dict(
                torch.load(os.path.join(dir, 'checkpoint_actor_{}.pth'.format(i))) )
            maddpg.agents[i].critic_local.load_state_dict(
                torch.load(os.path.join(dir, 'checkpoint_critic_{}.pth'.format(i))) )

In [8]:
# Doing inference and playing the game

def inference_play(maddpg, env, num_games=3):
    """Tests the training results by having both agents play a match
        maddpg (MADDPG): instance of MADDPG wrapper class
        env (UnityEnvironment): instance of Unity environment for testing
        num_games (int): number of games to be played
    """
    
    print("Agent 0: Red racket")
    print("Agent 1: Blue racket\n")

    game_scores = [0 for _ in range(num_agents)]

    # Environment information
    brain_name = env.brain_names[0]

    for i_episode in range(1, num_games+1):
        env_info = env.reset(train_mode=False)[brain_name]   
        states = env_info.vector_observations
        scores = np.zeros(num_agents)

        t_step = 0
        
        while True:
            actions = maddpg.act(states)

            env_info = env.step(actions)[brain_name]
            next_states = env_info.vector_observations
            rewards = env_info.rewards
            scores += rewards
            dones = env_info.local_done
            t_step += 1

            if np.any(dones):
                winner = np.argmax(scores)
                game_scores[winner] += 1
                if (t_step > 0):
                    t_step -= 1
                print('Game: {}, partial score: {},  Score agent #0: {:.2f}, Score agent #1: {:.2f}, # of timesteps: {} '.
                       format(i_episode, game_scores, scores[0], scores[1], t_step))
                break

            states = next_states

    print("\nWinner is Agent {}".format(np.argmax(game_scores)))

In [9]:
# Playing 3 games with the trained networks
load("saved_weights")
inference_play(maddpg, env)

Agent 0: Red racket
Agent 1: Blue racket

Game: 1, partial score: [1, 0],  Score agent #0: 2.60, Score agent #1: 2.60, # of timesteps: 1000 
Game: 2, partial score: [2, 0],  Score agent #0: 2.10, Score agent #1: 2.09, # of timesteps: 789 
Game: 3, partial score: [3, 0],  Score agent #0: 2.60, Score agent #1: 2.60, # of timesteps: 1000 

Winner is Agent 0


In [10]:
env.close()