In [13]:
import numpy as np
#from rl.agents.ddpg import DDPGAgent
from rl.agents.ddpg import DDPGAgent
from rl.memory import SimpleMemory
from rl.random import OrnsteinUhlenbeckProcess
from perf_config_mcc import PerfConfig
from lib.actor import build_actor
from lib.critic import build_critic
from rl.runtime.experiment import Experiment
from rl.utils.printer import print_info
import os

# Configuration
config = PerfConfig()

debug = config.debug
if debug:
    tensorboard = True
    render = True
    plots = True
else:
    plots = False
    tensorboard = False
    render = False
    
env = config.env
if not config.random_seed:
    np.random.seed(123)
    env.seed(123)    

critic = build_critic(env, network_type=config.critic_type)

trial = 1
delta_clip = 100.0
    
experiment = Experiment("toto/{}/{}".format(delta_clip, trial), force=True)

# Memory
memory = SimpleMemory.from_file(env=env, limit=1000000, file_path=config.memory_path)

# Noise
random_process = OrnsteinUhlenbeckProcess(size=env.action_space.dim, theta=.15, mu=0., sigma=config.noise_sigma)


[2017-11-15 13:30:11,980] Making new env: MountainCarContinuous-v1


[31mOverwriting ./experiments/toto/100.0/1/[0m


In [14]:
actors_root = "./actors/"
cpt = 0
for actor_name in os.listdir(actors_root):
    actor = build_actor(env, network_type=config.actor_type) 
    actor.load_weights(actors_root + actor_name)
    
    # Agent
    agent = DDPGAgent(
        actor=actor,
        critic=critic,
        env=env,
        memory=memory,
        random_process=random_process,
        batch_size=config.batch_size,
        gamma=config.gamma,
        target_actor_update=config.target_actor_update,
        target_critic_update=config.target_critic_update,
        experiment=experiment,
        reset_controlers=config.grad_reset,
        actor_reset_threshold=config.actor_reset_threshold,
        critic_gradient_clip = delta_clip,
        warmup_critic_steps=200,
        warmup_actor_steps=200
    )
    agent.compile()

    # Test the agent at the beginning of the experiment
    agent.test(
        env=env,
        episodes=10,
        visualize=False,
        verbosity=0,
        nb_max_episode_steps=200,
        tensorboard=False,
        plots=False)




delta-clip : 100.0
INFO:tensorflow:Summary name actor/dense_28/kernel:0/gradient_norm is illegal; using actor/dense_28/kernel_0/gradient_norm instead.


[2017-11-15 13:30:15,611] Summary name actor/dense_28/kernel:0/gradient_norm is illegal; using actor/dense_28/kernel_0/gradient_norm instead.


INFO:tensorflow:Summary name actor/dense_28/bias:0/gradient_norm is illegal; using actor/dense_28/bias_0/gradient_norm instead.


[2017-11-15 13:30:15,613] Summary name actor/dense_28/bias:0/gradient_norm is illegal; using actor/dense_28/bias_0/gradient_norm instead.


INFO:tensorflow:Summary name actor/dense_29/kernel:0/gradient_norm is illegal; using actor/dense_29/kernel_0/gradient_norm instead.


[2017-11-15 13:30:15,616] Summary name actor/dense_29/kernel:0/gradient_norm is illegal; using actor/dense_29/kernel_0/gradient_norm instead.


INFO:tensorflow:Summary name actor/dense_29/bias:0/gradient_norm is illegal; using actor/dense_29/bias_0/gradient_norm instead.


[2017-11-15 13:30:15,619] Summary name actor/dense_29/bias:0/gradient_norm is illegal; using actor/dense_29/bias_0/gradient_norm instead.


INFO:tensorflow:Summary name actor/dense_30/kernel:0/gradient_norm is illegal; using actor/dense_30/kernel_0/gradient_norm instead.


[2017-11-15 13:30:15,622] Summary name actor/dense_30/kernel:0/gradient_norm is illegal; using actor/dense_30/kernel_0/gradient_norm instead.


INFO:tensorflow:Summary name actor/dense_30/bias:0/gradient_norm is illegal; using actor/dense_30/bias_0/gradient_norm instead.


[2017-11-15 13:30:15,624] Summary name actor/dense_30/bias:0/gradient_norm is illegal; using actor/dense_30/bias_0/gradient_norm instead.


INFO:tensorflow:Summary name actor/dense_28/kernel:0/norm is illegal; using actor/dense_28/kernel_0/norm instead.


[2017-11-15 13:30:15,628] Summary name actor/dense_28/kernel:0/norm is illegal; using actor/dense_28/kernel_0/norm instead.


INFO:tensorflow:Summary name actor/dense_28/bias:0/norm is illegal; using actor/dense_28/bias_0/norm instead.


[2017-11-15 13:30:15,631] Summary name actor/dense_28/bias:0/norm is illegal; using actor/dense_28/bias_0/norm instead.


INFO:tensorflow:Summary name actor/dense_29/kernel:0/norm is illegal; using actor/dense_29/kernel_0/norm instead.


[2017-11-15 13:30:15,633] Summary name actor/dense_29/kernel:0/norm is illegal; using actor/dense_29/kernel_0/norm instead.


INFO:tensorflow:Summary name actor/dense_29/bias:0/norm is illegal; using actor/dense_29/bias_0/norm instead.


[2017-11-15 13:30:15,636] Summary name actor/dense_29/bias:0/norm is illegal; using actor/dense_29/bias_0/norm instead.


INFO:tensorflow:Summary name actor/dense_30/kernel:0/norm is illegal; using actor/dense_30/kernel_0/norm instead.


[2017-11-15 13:30:15,640] Summary name actor/dense_30/kernel:0/norm is illegal; using actor/dense_30/kernel_0/norm instead.


INFO:tensorflow:Summary name actor/dense_30/bias:0/norm is illegal; using actor/dense_30/bias_0/norm instead.


[2017-11-15 13:30:15,643] Summary name actor/dense_30/bias:0/norm is illegal; using actor/dense_30/bias_0/norm instead.


INFO:tensorflow:Summary name target_actor/dense_25_1/kernel:0/norm is illegal; using target_actor/dense_25_1/kernel_0/norm instead.


[2017-11-15 13:30:15,647] Summary name target_actor/dense_25_1/kernel:0/norm is illegal; using target_actor/dense_25_1/kernel_0/norm instead.


INFO:tensorflow:Summary name target_actor/dense_25_1/bias:0/norm is illegal; using target_actor/dense_25_1/bias_0/norm instead.


[2017-11-15 13:30:15,650] Summary name target_actor/dense_25_1/bias:0/norm is illegal; using target_actor/dense_25_1/bias_0/norm instead.


INFO:tensorflow:Summary name target_actor/dense_26_1/kernel:0/norm is illegal; using target_actor/dense_26_1/kernel_0/norm instead.


[2017-11-15 13:30:15,652] Summary name target_actor/dense_26_1/kernel:0/norm is illegal; using target_actor/dense_26_1/kernel_0/norm instead.


INFO:tensorflow:Summary name target_actor/dense_26_1/bias:0/norm is illegal; using target_actor/dense_26_1/bias_0/norm instead.


[2017-11-15 13:30:15,654] Summary name target_actor/dense_26_1/bias:0/norm is illegal; using target_actor/dense_26_1/bias_0/norm instead.


INFO:tensorflow:Summary name target_actor/dense_27_1/kernel:0/norm is illegal; using target_actor/dense_27_1/kernel_0/norm instead.


[2017-11-15 13:30:15,656] Summary name target_actor/dense_27_1/kernel:0/norm is illegal; using target_actor/dense_27_1/kernel_0/norm instead.


INFO:tensorflow:Summary name target_actor/dense_27_1/bias:0/norm is illegal; using target_actor/dense_27_1/bias_0/norm instead.


[2017-11-15 13:30:15,657] Summary name target_actor/dense_27_1/bias:0/norm is illegal; using target_actor/dense_27_1/bias_0/norm instead.


INFO:tensorflow:Summary name critic/dense_25/kernel:0/gradient_norm is illegal; using critic/dense_25/kernel_0/gradient_norm instead.


[2017-11-15 13:30:15,662] Summary name critic/dense_25/kernel:0/gradient_norm is illegal; using critic/dense_25/kernel_0/gradient_norm instead.


INFO:tensorflow:Summary name critic/dense_25/bias:0/gradient_norm is illegal; using critic/dense_25/bias_0/gradient_norm instead.


[2017-11-15 13:30:15,664] Summary name critic/dense_25/bias:0/gradient_norm is illegal; using critic/dense_25/bias_0/gradient_norm instead.


INFO:tensorflow:Summary name critic/dense_26/kernel:0/gradient_norm is illegal; using critic/dense_26/kernel_0/gradient_norm instead.


[2017-11-15 13:30:15,666] Summary name critic/dense_26/kernel:0/gradient_norm is illegal; using critic/dense_26/kernel_0/gradient_norm instead.


INFO:tensorflow:Summary name critic/dense_26/bias:0/gradient_norm is illegal; using critic/dense_26/bias_0/gradient_norm instead.


[2017-11-15 13:30:15,669] Summary name critic/dense_26/bias:0/gradient_norm is illegal; using critic/dense_26/bias_0/gradient_norm instead.


INFO:tensorflow:Summary name critic/dense_27/kernel:0/gradient_norm is illegal; using critic/dense_27/kernel_0/gradient_norm instead.


[2017-11-15 13:30:15,671] Summary name critic/dense_27/kernel:0/gradient_norm is illegal; using critic/dense_27/kernel_0/gradient_norm instead.


INFO:tensorflow:Summary name critic/dense_27/bias:0/gradient_norm is illegal; using critic/dense_27/bias_0/gradient_norm instead.


[2017-11-15 13:30:15,674] Summary name critic/dense_27/bias:0/gradient_norm is illegal; using critic/dense_27/bias_0/gradient_norm instead.


INFO:tensorflow:Summary name critic/dense_25/kernel:0/norm is illegal; using critic/dense_25/kernel_0/norm instead.


[2017-11-15 13:30:15,679] Summary name critic/dense_25/kernel:0/norm is illegal; using critic/dense_25/kernel_0/norm instead.


INFO:tensorflow:Summary name critic/dense_25/bias:0/norm is illegal; using critic/dense_25/bias_0/norm instead.


[2017-11-15 13:30:15,681] Summary name critic/dense_25/bias:0/norm is illegal; using critic/dense_25/bias_0/norm instead.


INFO:tensorflow:Summary name critic/dense_26/kernel:0/norm is illegal; using critic/dense_26/kernel_0/norm instead.


[2017-11-15 13:30:15,685] Summary name critic/dense_26/kernel:0/norm is illegal; using critic/dense_26/kernel_0/norm instead.


INFO:tensorflow:Summary name critic/dense_26/bias:0/norm is illegal; using critic/dense_26/bias_0/norm instead.


[2017-11-15 13:30:15,689] Summary name critic/dense_26/bias:0/norm is illegal; using critic/dense_26/bias_0/norm instead.


INFO:tensorflow:Summary name critic/dense_27/kernel:0/norm is illegal; using critic/dense_27/kernel_0/norm instead.


[2017-11-15 13:30:15,691] Summary name critic/dense_27/kernel:0/norm is illegal; using critic/dense_27/kernel_0/norm instead.


INFO:tensorflow:Summary name critic/dense_27/bias:0/norm is illegal; using critic/dense_27/bias_0/norm instead.


[2017-11-15 13:30:15,694] Summary name critic/dense_27/bias:0/norm is illegal; using critic/dense_27/bias_0/norm instead.


INFO:tensorflow:Summary name target_critic/dense_25_1/kernel:0/norm is illegal; using target_critic/dense_25_1/kernel_0/norm instead.


[2017-11-15 13:30:15,696] Summary name target_critic/dense_25_1/kernel:0/norm is illegal; using target_critic/dense_25_1/kernel_0/norm instead.


INFO:tensorflow:Summary name target_critic/dense_25_1/bias:0/norm is illegal; using target_critic/dense_25_1/bias_0/norm instead.


[2017-11-15 13:30:15,698] Summary name target_critic/dense_25_1/bias:0/norm is illegal; using target_critic/dense_25_1/bias_0/norm instead.


INFO:tensorflow:Summary name target_critic/dense_26_1/kernel:0/norm is illegal; using target_critic/dense_26_1/kernel_0/norm instead.


[2017-11-15 13:30:15,700] Summary name target_critic/dense_26_1/kernel:0/norm is illegal; using target_critic/dense_26_1/kernel_0/norm instead.


INFO:tensorflow:Summary name target_critic/dense_26_1/bias:0/norm is illegal; using target_critic/dense_26_1/bias_0/norm instead.


[2017-11-15 13:30:15,703] Summary name target_critic/dense_26_1/bias:0/norm is illegal; using target_critic/dense_26_1/bias_0/norm instead.


INFO:tensorflow:Summary name target_critic/dense_27_1/kernel:0/norm is illegal; using target_critic/dense_27_1/kernel_0/norm instead.


[2017-11-15 13:30:15,705] Summary name target_critic/dense_27_1/kernel:0/norm is illegal; using target_critic/dense_27_1/kernel_0/norm instead.


INFO:tensorflow:Summary name target_critic/dense_27_1/bias:0/norm is illegal; using target_critic/dense_27_1/bias_0/norm instead.


[2017-11-15 13:30:15,708] Summary name target_critic/dense_27_1/bias:0/norm is illegal; using target_critic/dense_27_1/bias_0/norm instead.


AttributeError: Layer model_9 has multiple inbound nodes, hence the notion of "layer output" is ill-defined. Use `get_output_at(node_index)` instead.