In [1]:
from unityagents import UnityEnvironment
import numpy as np

env = UnityEnvironment(file_name='Reacher1.app')
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]
num_agents = states.shape[0]
allStates = states
print('There are {} agents. Each observes a state with length: {}'.format(num_agents, state_size))
print('The state for the first agent looks like:', states[0])

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_speed -> 1.0
		goal_size -> 5.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


Number of agents: 1
Size of each action: 4
There are 1 agents. Each observes a state with length: 33
The state for the first agent looks like: [ 0.00000000e+00 -4.00000000e+00  0.00000000e+00  1.00000000e+00
 -0.00000000e+00 -0.00000000e+00 -4.37113883e-08  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00 -1.00000000e+01  0.00000000e+00
  1.00000000e+00 -0.00000000e+00 -0.00000000e+00 -4.37113883e-08
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  5.75471878e+00 -1.00000000e+00
  5.55726671e+00  0.00000000e+00  1.00000000e+00  0.00000000e+00
 -1.68164849e-01]


In [2]:
# !pip install baselines
# !git clone https://github.com/lanpa/tensorboardX && cd tensorboardX && python setup.py install
# !pip uninstall protobuf -y
# !pip install -U protobuf
# !pip install scikit-image
# !pip install torchvision

In [3]:
from deep_rl import *

def ddpg_continuous(**kwargs):
    global action_size
    global env
    global brain    
    generate_tag(kwargs)
    kwargs.setdefault('skip', False)
    config = Config()
    config.merge(kwargs)

    config.task_fn = lambda: Task(config.game,env)
    config.eval_env = env
    config.max_steps = int(1e6)
    config.eval_interval = int(1e4)
    config.eval_episodes = 20

    config.network_fn = lambda: DeterministicActorCriticNet(
        config.state_dim, config.action_dim,
        actor_body=FCBody(config.state_dim, (400, 300), gate=F.relu),
        critic_body=TwoLayerFCBodyWithAction(
            config.state_dim, config.action_dim, (400, 300), gate=F.relu),
        actor_opt_fn=lambda params: torch.optim.Adam(params, lr=1e-4),
        critic_opt_fn=lambda params: torch.optim.Adam(params, lr=1e-3))

    config.replay_fn = lambda: Replay(memory_size=int(1e6), batch_size=64)
    config.discount = 0.99
    config.random_process_fn = lambda: OrnsteinUhlenbeckProcess(
        size=(config.action_dim,), std=LinearSchedule(0.2))
    config.warm_up = int(1e4)
    config.target_network_mix = 1e-3
    run_steps(DDPGAgent(action_size,env,brain,config))

In [4]:
set_one_thread()
random_seed()
select_device(-1)
# select_device(0)
game = 'new'
ddpg_continuous(game=game)

INFO:root:steps 0, 466033777.78 steps/s
INFO:root:steps 0, episodic_return_test 0.00(0.00)
INFO:root:steps 1000, 19.82 steps/s
INFO:root:steps 2000, 535.83 steps/s
INFO:root:steps 3000, 545.76 steps/s
INFO:root:steps 4000, 571.60 steps/s
INFO:root:steps 5000, 431.40 steps/s
INFO:root:steps 6000, 556.19 steps/s
INFO:root:steps 7000, 486.50 steps/s
INFO:root:steps 8000, 487.78 steps/s
INFO:root:steps 9000, 555.56 steps/s
INFO:root:steps 10000, 560.05 steps/s
INFO:root:steps 10000, episodic_return_test 0.00(0.00)
INFO:root:steps 11000, 15.95 steps/s


KeyboardInterrupt: 