In [None]:
import torch
import time
import os
import numpy as np

from pathlib import Path
from torch.autograd import Variable
from tensorboardX import SummaryWriter

from model import MADDPG

In [None]:
from unityagents import UnityEnvironment

In [None]:
env = UnityEnvironment(file_name='./Tennis_Windows_x86_64/Tennis.exe')

In [None]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents 
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))
print('The state for the first agent looks like:', states[0])

In [None]:
#edit current run
curr_run = 'run1'
model_dir = Path('./models') 
run_dir = model_dir / curr_run
log_dir = run_dir / 'logs'

os.makedirs(run_dir)
os.makedirs(log_dir)

logger = SummaryWriter(str(log_dir))

In [None]:

# number of training episodes.
# change this to higher number to experiment. say 30000.
number_of_episodes = 10
episode_length = 100
batchsize = 128

# amplitude of OU noise
# this slowly decreases to 0
noise = 1
noise_reduction = 0.9999

hidden_dim = 64
gamma=0.95
tau=0.02
lr_actor=1.0e-4
lr_critic=1.0e-3

cuda_enabled = torch.cuda.is_available()

agent_init_params = []
agent_init_params.append({'num_in_pol': state_size,
                                      'num_out_pol': action_size,
                                      'num_in_critic': state_size})

init_dict = {'gamma': gamma, 'tau': tau, 'lr': lr_actor,
                     'hidden_dim': hidden_dim,
                     'alg_types': 'MADDPG',
                     'agent_init_params': agent_init_params,
                     'discrete_action': False}



In [None]:
maddpg = MADDPG(agent_init_params,'MADDPG',num_agents)

In [None]:
maddpg.agents

In [None]:
i = 0
for params in agent_init_params:
    print(params)
