In [1]:
import gym
import gym_Physics_RL
from torch import nn as nn

from rlkit.exploration_strategies.base import PolicyWrappedWithExplorationStrategy
from rlkit.exploration_strategies.epsilon_greedy import EpsilonGreedy
from rlkit.policies.argmax import ArgmaxDiscretePolicy
from rlkit.torch.dqn.double_dqn import DoubleDQNTrainer
from rlkit.torch.networks import Mlp
import rlkit.torch.pytorch_util as ptu
from rlkit.data_management.env_replay_buffer import EnvReplayBuffer
from rlkit.launchers.launcher_util import setup_logger
from rlkit.samplers.data_collector import MdpPathCollector
from rlkit.torch.torch_rl_algorithm import TorchBatchRLAlgorithm


def experiment(variant):
    expl_env = gym.make('Physics_RL-v0')
    eval_env = gym.make('Physics_RL-v0')
    obs_dim = expl_env.observation_space.low.size
    action_dim = eval_env.action_space.n

    qf = Mlp(
        hidden_sizes=[32, 32],
        input_size=obs_dim,
        output_size=action_dim,
    )
    target_qf = Mlp(
        hidden_sizes=[32, 32],
        input_size=obs_dim,
        output_size=action_dim,
    )
    qf_criterion = nn.MSELoss()
    eval_policy = ArgmaxDiscretePolicy(qf)
    expl_policy = PolicyWrappedWithExplorationStrategy(
        EpsilonGreedy(expl_env.action_space),
        eval_policy,
    )
    eval_path_collector = MdpPathCollector(
        eval_env,
        eval_policy,
    )
    expl_path_collector = MdpPathCollector(
        expl_env,
        expl_policy,
    )
    trainer = DoubleDQNTrainer(
        qf=qf,
        target_qf=target_qf,
        qf_criterion=qf_criterion,
        **variant['trainer_kwargs']
    )
    replay_buffer = EnvReplayBuffer(
        variant['replay_buffer_size'],
        expl_env,
    )
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        **variant['algorithm_kwargs']
    )
    algorithm.to(ptu.device)
    algorithm.train()

doodad not detected


In [2]:
if __name__ == "__main__":
    # noinspection PyTypeChecker
    variant = dict(
        algorithm="DDQN",
        version="normal",
        layer_size=256,
        replay_buffer_size=int(1E6),
        algorithm_kwargs=dict(
            num_epochs=3000,
            num_eval_steps_per_epoch=5000,
            num_trains_per_train_loop=1000,
            num_expl_steps_per_train_loop=10,
            min_num_steps_before_training=1000,
            max_path_length=100,
            batch_size=16,
        ),
        trainer_kwargs=dict(
            discount=0.99,
            learning_rate=3E-5,
        ),
    )
    setup_logger('./ddqn-train', variant=variant)
    # ptu.set_gpu_mode(True)  # optionally set the GPU (default=False)
    experiment(variant)

2019-06-09 01:34:02.724874 Pacific Daylight Time | Variant:
2019-06-09 01:34:02.727877 Pacific Daylight Time | {
  "algorithm": "DDQN",
  "version": "normal",
  "layer_size": 256,
  "replay_buffer_size": 1000000,
  "algorithm_kwargs": {
    "num_epochs": 3000,
    "num_eval_steps_per_epoch": 5000,
    "num_trains_per_train_loop": 1000,
    "num_expl_steps_per_train_loop": 10,
    "min_num_steps_before_training": 1000,
    "max_path_length": 100,
    "batch_size": 16
  },
  "trainer_kwargs": {
    "discount": 0.99,
    "learning_rate": 3e-05
  }
}
2019-06-09 01:34:09.217871 Pacific Daylight Time | [ddqn-train_2019_06_09_01_34_02_0000--s-0] Epoch 0 finished
---------------------------------------------  --------------
replay_buffer/size                             1010
trainer/QF Loss                                   1.97087
trainer/Y Predictions Mean                        0.226821
trainer/Y Predictions Std                         0.082281
trainer/Y Predictions Max                     

evaluation/env_infos/X_pos_2 Min                133
time/data storing (s)                             0.000129322
time/evaluation sampling (s)                      3.7404
time/exploration sampling (s)                     0.00771993
time/logging (s)                                  0.040161
time/saving (s)                                   0.00782462
time/training (s)                                 1.79598
time/epoch (s)                                    5.59221
time/total (s)                                    8.28735
Epoch                                             0
---------------------------------------------  --------------
2019-06-09 01:34:15.564566 Pacific Daylight Time | [ddqn-train_2019_06_09_01_34_02_0000--s-0] Epoch 1 finished
---------------------------------------------  -------------
replay_buffer/size                             1020
trainer/QF Loss                                   0.117002
trainer/Y Predictions Mean                       -0.985309
trainer/Y Predicti

evaluation/env_infos/X_pos_2 Max                580
evaluation/env_infos/X_pos_2 Min                123
time/data storing (s)                             0.00015765
time/evaluation sampling (s)                      3.85624
time/exploration sampling (s)                     0.0073492
time/logging (s)                                  0.0465634
time/saving (s)                                   0.00693086
time/training (s)                                 2.25099
time/epoch (s)                                    6.16823
time/total (s)                                   14.6384
Epoch                                             1
---------------------------------------------  -------------
2019-06-09 01:34:22.410624 Pacific Daylight Time | [ddqn-train_2019_06_09_01_34_02_0000--s-0] Epoch 2 finished
---------------------------------------------  ---------------
replay_buffer/size                              1030
trainer/QF Loss                                   37.4872
trainer/Y Predictions Mea

evaluation/env_infos/initial/X_pos_2 Max         580
evaluation/env_infos/initial/X_pos_2 Min         121
evaluation/env_infos/X_pos_2 Mean                401.086
evaluation/env_infos/X_pos_2 Std                 127.393
evaluation/env_infos/X_pos_2 Max                 580
evaluation/env_infos/X_pos_2 Min                 121
time/data storing (s)                              0.000145334
time/evaluation sampling (s)                       4.28354
time/exploration sampling (s)                      0.0084273
time/logging (s)                                   0.0348846
time/saving (s)                                    0.00872289
time/training (s)                                  2.26127
time/epoch (s)                                     6.59699
time/total (s)                                    21.4715
Epoch                                              2
---------------------------------------------  ---------------
2019-06-09 01:34:28.870639 Pacific Daylight Time | [ddqn-train_2019_06_09_01

evaluation/env_infos/final/X_pos_2 Max           578
evaluation/env_infos/final/X_pos_2 Min           121
evaluation/env_infos/initial/X_pos_2 Mean        331.561
evaluation/env_infos/initial/X_pos_2 Std         135.3
evaluation/env_infos/initial/X_pos_2 Max         578
evaluation/env_infos/initial/X_pos_2 Min         121
evaluation/env_infos/X_pos_2 Mean                357.422
evaluation/env_infos/X_pos_2 Std                 132.465
evaluation/env_infos/X_pos_2 Max                 578
evaluation/env_infos/X_pos_2 Min                 121
time/data storing (s)                              0.000144513
time/evaluation sampling (s)                       4.26719
time/exploration sampling (s)                      0.0081703
time/logging (s)                                   0.0350858
time/saving (s)                                    0.00977102
time/training (s)                                  1.95071
time/epoch (s)                                     6.27107
time/total (s)                  

evaluation/env_infos/Velocity_y Max               10
evaluation/env_infos/Velocity_y Min                1
evaluation/env_infos/final/X_pos_2 Mean          349.217
evaluation/env_infos/final/X_pos_2 Std           133.451
evaluation/env_infos/final/X_pos_2 Max           579
evaluation/env_infos/final/X_pos_2 Min           120
evaluation/env_infos/initial/X_pos_2 Mean        349.217
evaluation/env_infos/initial/X_pos_2 Std         133.451
evaluation/env_infos/initial/X_pos_2 Max         579
evaluation/env_infos/initial/X_pos_2 Min         120
evaluation/env_infos/X_pos_2 Mean                345.053
evaluation/env_infos/X_pos_2 Std                 139.079
evaluation/env_infos/X_pos_2 Max                 579
evaluation/env_infos/X_pos_2 Min                 120
time/data storing (s)                              0.000283688
time/evaluation sampling (s)                       4.30915
time/exploration sampling (s)                      0.0100539
time/logging (s)                                   

evaluation/env_infos/initial/Velocity_y Max       10
evaluation/env_infos/initial/Velocity_y Min        1
evaluation/env_infos/Velocity_y Mean               5.5302
evaluation/env_infos/Velocity_y Std                2.82855
evaluation/env_infos/Velocity_y Max               10
evaluation/env_infos/Velocity_y Min                1
evaluation/env_infos/final/X_pos_2 Mean          348.995
evaluation/env_infos/final/X_pos_2 Std           132.255
evaluation/env_infos/final/X_pos_2 Max           580
evaluation/env_infos/final/X_pos_2 Min           121
evaluation/env_infos/initial/X_pos_2 Mean        348.995
evaluation/env_infos/initial/X_pos_2 Std         132.255
evaluation/env_infos/initial/X_pos_2 Max         580
evaluation/env_infos/initial/X_pos_2 Min         121
evaluation/env_infos/X_pos_2 Mean                358.342
evaluation/env_infos/X_pos_2 Std                 133.353
evaluation/env_infos/X_pos_2 Max                 580
evaluation/env_infos/X_pos_2 Min                 121
time/data s

evaluation/env_infos/final/Velocity_y Std          2.8646
evaluation/env_infos/final/Velocity_y Max         10
evaluation/env_infos/final/Velocity_y Min          1
evaluation/env_infos/initial/Velocity_y Mean       5.55275
evaluation/env_infos/initial/Velocity_y Std        2.8646
evaluation/env_infos/initial/Velocity_y Max       10
evaluation/env_infos/initial/Velocity_y Min        1
evaluation/env_infos/Velocity_y Mean               5.60608
evaluation/env_infos/Velocity_y Std                2.85466
evaluation/env_infos/Velocity_y Max               10
evaluation/env_infos/Velocity_y Min                1
evaluation/env_infos/final/X_pos_2 Mean          361.36
evaluation/env_infos/final/X_pos_2 Std           137.743
evaluation/env_infos/final/X_pos_2 Max           580
evaluation/env_infos/final/X_pos_2 Min           120
evaluation/env_infos/initial/X_pos_2 Mean        361.36
evaluation/env_infos/initial/X_pos_2 Std         137.743
evaluation/env_infos/initial/X_pos_2 Max         580
eval

evaluation/env_infos/Y_Pos_1 Std                   0
evaluation/env_infos/Y_Pos_1 Max                 149
evaluation/env_infos/Y_Pos_1 Min                 149
evaluation/env_infos/final/Velocity_y Mean         5.57525
evaluation/env_infos/final/Velocity_y Std          2.90833
evaluation/env_infos/final/Velocity_y Max         10
evaluation/env_infos/final/Velocity_y Min          1
evaluation/env_infos/initial/Velocity_y Mean       5.57525
evaluation/env_infos/initial/Velocity_y Std        2.90833
evaluation/env_infos/initial/Velocity_y Max       10
evaluation/env_infos/initial/Velocity_y Min        1
evaluation/env_infos/Velocity_y Mean               5.53639
evaluation/env_infos/Velocity_y Std                2.92229
evaluation/env_infos/Velocity_y Max               10
evaluation/env_infos/Velocity_y Min                1
evaluation/env_infos/final/X_pos_2 Mean          338.913
evaluation/env_infos/final/X_pos_2 Std           135.46
evaluation/env_infos/final/X_pos_2 Max           579
eva

evaluation/env_infos/initial/Y_Pos_1 Std           0
evaluation/env_infos/initial/Y_Pos_1 Max         149
evaluation/env_infos/initial/Y_Pos_1 Min         149
evaluation/env_infos/Y_Pos_1 Mean                149
evaluation/env_infos/Y_Pos_1 Std                   0
evaluation/env_infos/Y_Pos_1 Max                 149
evaluation/env_infos/Y_Pos_1 Min                 149
evaluation/env_infos/final/Velocity_y Mean         5.42667
evaluation/env_infos/final/Velocity_y Std          2.86049
evaluation/env_infos/final/Velocity_y Max         10
evaluation/env_infos/final/Velocity_y Min          1
evaluation/env_infos/initial/Velocity_y Mean       5.42667
evaluation/env_infos/initial/Velocity_y Std        2.86049
evaluation/env_infos/initial/Velocity_y Max       10
evaluation/env_infos/initial/Velocity_y Min        1
evaluation/env_infos/Velocity_y Mean               5.385
evaluation/env_infos/Velocity_y Std                2.90042
evaluation/env_infos/Velocity_y Max               10
evaluation/e

evaluation/env_infos/final/Y_Pos_1 Std             0
evaluation/env_infos/final/Y_Pos_1 Max           149
evaluation/env_infos/final/Y_Pos_1 Min           149
evaluation/env_infos/initial/Y_Pos_1 Mean        149
evaluation/env_infos/initial/Y_Pos_1 Std           0
evaluation/env_infos/initial/Y_Pos_1 Max         149
evaluation/env_infos/initial/Y_Pos_1 Min         149
evaluation/env_infos/Y_Pos_1 Mean                149
evaluation/env_infos/Y_Pos_1 Std                   0
evaluation/env_infos/Y_Pos_1 Max                 149
evaluation/env_infos/Y_Pos_1 Min                 149
evaluation/env_infos/final/Velocity_y Mean         5.37668
evaluation/env_infos/final/Velocity_y Std          2.85632
evaluation/env_infos/final/Velocity_y Max         10
evaluation/env_infos/final/Velocity_y Min          1
evaluation/env_infos/initial/Velocity_y Mean       5.37668
evaluation/env_infos/initial/Velocity_y Std        2.85632
evaluation/env_infos/initial/Velocity_y Max       10
evaluation/env_infos/i

evaluation/env_infos/actions Std                   0
evaluation/env_infos/actions Max                   0
evaluation/env_infos/actions Min                   0
evaluation/env_infos/final/Y_Pos_1 Mean          149
evaluation/env_infos/final/Y_Pos_1 Std             0
evaluation/env_infos/final/Y_Pos_1 Max           149
evaluation/env_infos/final/Y_Pos_1 Min           149
evaluation/env_infos/initial/Y_Pos_1 Mean        149
evaluation/env_infos/initial/Y_Pos_1 Std           0
evaluation/env_infos/initial/Y_Pos_1 Max         149
evaluation/env_infos/initial/Y_Pos_1 Min         149
evaluation/env_infos/Y_Pos_1 Mean                149
evaluation/env_infos/Y_Pos_1 Std                   0
evaluation/env_infos/Y_Pos_1 Max                 149
evaluation/env_infos/Y_Pos_1 Min                 149
evaluation/env_infos/final/Velocity_y Mean         5.42809
evaluation/env_infos/final/Velocity_y Std          2.97661
evaluation/env_infos/final/Velocity_y Max         10
evaluation/env_infos/final/Velocit

evaluation/env_infos/initial/actions Mean          0
evaluation/env_infos/initial/actions Std           0
evaluation/env_infos/initial/actions Max           0
evaluation/env_infos/initial/actions Min           0
evaluation/env_infos/actions Mean                  0
evaluation/env_infos/actions Std                   0
evaluation/env_infos/actions Max                   0
evaluation/env_infos/actions Min                   0
evaluation/env_infos/final/Y_Pos_1 Mean          149
evaluation/env_infos/final/Y_Pos_1 Std             0
evaluation/env_infos/final/Y_Pos_1 Max           149
evaluation/env_infos/final/Y_Pos_1 Min           149
evaluation/env_infos/initial/Y_Pos_1 Mean        149
evaluation/env_infos/initial/Y_Pos_1 Std           0
evaluation/env_infos/initial/Y_Pos_1 Max         149
evaluation/env_infos/initial/Y_Pos_1 Min         149
evaluation/env_infos/Y_Pos_1 Mean                149
evaluation/env_infos/Y_Pos_1 Std                   0
evaluation/env_infos/Y_Pos_1 Max              

evaluation/env_infos/final/actions Mean            0
evaluation/env_infos/final/actions Std             0
evaluation/env_infos/final/actions Max             0
evaluation/env_infos/final/actions Min             0
evaluation/env_infos/initial/actions Mean          0
evaluation/env_infos/initial/actions Std           0
evaluation/env_infos/initial/actions Max           0
evaluation/env_infos/initial/actions Min           0
evaluation/env_infos/actions Mean                  0
evaluation/env_infos/actions Std                   0
evaluation/env_infos/actions Max                   0
evaluation/env_infos/actions Min                   0
evaluation/env_infos/final/Y_Pos_1 Mean          149
evaluation/env_infos/final/Y_Pos_1 Std             0
evaluation/env_infos/final/Y_Pos_1 Max           149
evaluation/env_infos/final/Y_Pos_1 Min           149
evaluation/env_infos/initial/Y_Pos_1 Mean        149
evaluation/env_infos/initial/Y_Pos_1 Std           0
evaluation/env_infos/initial/Y_Pos_1 Max      

evaluation/Actions Max                            98
evaluation/Actions Min                             4
evaluation/Num Paths                             383
evaluation/Average Returns                       -31.8198
evaluation/env_infos/final/actions Mean            0
evaluation/env_infos/final/actions Std             0
evaluation/env_infos/final/actions Max             0
evaluation/env_infos/final/actions Min             0
evaluation/env_infos/initial/actions Mean          0
evaluation/env_infos/initial/actions Std           0
evaluation/env_infos/initial/actions Max           0
evaluation/env_infos/initial/actions Min           0
evaluation/env_infos/actions Mean                  0
evaluation/env_infos/actions Std                   0
evaluation/env_infos/actions Max                   0
evaluation/env_infos/actions Min                   0
evaluation/env_infos/final/Y_Pos_1 Mean          149
evaluation/env_infos/final/Y_Pos_1 Std             0
evaluation/env_infos/final/Y_Pos_1 Max   

evaluation/Returns Max                             4
evaluation/Returns Min                           -64
evaluation/Actions Mean                           21.0701
evaluation/Actions Std                            20.8413
evaluation/Actions Max                            98
evaluation/Actions Min                             4
evaluation/Num Paths                             176
evaluation/Average Returns                       -45.8466
evaluation/env_infos/final/actions Mean            0
evaluation/env_infos/final/actions Std             0
evaluation/env_infos/final/actions Max             0
evaluation/env_infos/final/actions Min             0
evaluation/env_infos/initial/actions Mean          0
evaluation/env_infos/initial/actions Std           0
evaluation/env_infos/initial/actions Max           0
evaluation/env_infos/initial/actions Min           0
evaluation/env_infos/actions Mean                  0
evaluation/env_infos/actions Std                   0
evaluation/env_infos/actions Ma

evaluation/Rewards Max                            20
evaluation/Rewards Min                           -20
evaluation/Returns Mean                          -38.2879
evaluation/Returns Std                            10.7183
evaluation/Returns Max                             1
evaluation/Returns Min                           -64
evaluation/Actions Mean                           30.9113
evaluation/Actions Std                            30.8706
evaluation/Actions Max                            98
evaluation/Actions Min                             4
evaluation/Num Paths                             257
evaluation/Average Returns                       -38.2879
evaluation/env_infos/final/actions Mean            0
evaluation/env_infos/final/actions Std             0
evaluation/env_infos/final/actions Max             0
evaluation/env_infos/final/actions Min             0
evaluation/env_infos/initial/actions Mean          0
evaluation/env_infos/initial/actions Std           0
evaluation/env_infos/

evaluation/path length Max                        18
evaluation/path length Min                         7
evaluation/Rewards Mean                           -2.8356
evaluation/Rewards Std                             5.68753
evaluation/Rewards Max                            20
evaluation/Rewards Min                           -20
evaluation/Returns Mean                          -28.7241
evaluation/Returns Std                             4.63177
evaluation/Returns Max                             8
evaluation/Returns Min                           -37
evaluation/Actions Mean                           60.1135
evaluation/Actions Std                            24.8389
evaluation/Actions Max                            98
evaluation/Actions Min                            13
evaluation/Num Paths                             493
evaluation/Average Returns                       -28.7241
evaluation/env_infos/final/actions Mean            0
evaluation/env_infos/final/actions Std             0
evaluatio

evaluation/num steps total                     89787
evaluation/num paths total                      6410
evaluation/path length Mean                       12.6212
evaluation/path length Std                         2.56786
evaluation/path length Max                        41
evaluation/path length Min                         9
evaluation/Rewards Mean                           -2.5054
evaluation/Rewards Std                             5.1319
evaluation/Rewards Max                            -1
evaluation/Rewards Min                           -20
evaluation/Returns Mean                          -31.6212
evaluation/Returns Std                             2.56786
evaluation/Returns Max                           -28
evaluation/Returns Min                           -60
evaluation/Actions Mean                           47.2017
evaluation/Actions Std                            20.7607
evaluation/Actions Max                            92
evaluation/Actions Min                             1
eval

exploration/env_infos/X_pos_2 Mean               479
exploration/env_infos/X_pos_2 Std                  0
exploration/env_infos/X_pos_2 Max                479
exploration/env_infos/X_pos_2 Min                479
evaluation/num steps total                     94785
evaluation/num paths total                      6777
evaluation/path length Mean                       13.6185
evaluation/path length Std                         4.76894
evaluation/path length Max                        52
evaluation/path length Min                         9
evaluation/Rewards Mean                           -2.37915
evaluation/Rewards Std                             4.96366
evaluation/Rewards Max                            20
evaluation/Rewards Min                           -20
evaluation/Returns Mean                          -32.4005
evaluation/Returns Std                             5.66747
evaluation/Returns Max                            10
evaluation/Returns Min                           -71
evaluation/A

exploration/env_infos/final/X_pos_2 Min          127
exploration/env_infos/initial/X_pos_2 Mean       216
exploration/env_infos/initial/X_pos_2 Std         89
exploration/env_infos/initial/X_pos_2 Max        305
exploration/env_infos/initial/X_pos_2 Min        127
exploration/env_infos/X_pos_2 Mean               269.4
exploration/env_infos/X_pos_2 Std                 71.2
exploration/env_infos/X_pos_2 Max                305
exploration/env_infos/X_pos_2 Min                127
evaluation/num steps total                     99785
evaluation/num paths total                      7220
evaluation/path length Mean                       11.2867
evaluation/path length Std                         6.32966
evaluation/path length Max                        92
evaluation/path length Min                         7
evaluation/Rewards Mean                           -2.6754
evaluation/Rewards Std                             5.40311
evaluation/Rewards Max                            20
evaluation/Rewards M

exploration/env_infos/Velocity_y Max                1
exploration/env_infos/Velocity_y Min                1
exploration/env_infos/final/X_pos_2 Mean          552
exploration/env_infos/final/X_pos_2 Std             0
exploration/env_infos/final/X_pos_2 Max           552
exploration/env_infos/final/X_pos_2 Min           552
exploration/env_infos/initial/X_pos_2 Mean        552
exploration/env_infos/initial/X_pos_2 Std           0
exploration/env_infos/initial/X_pos_2 Max         552
exploration/env_infos/initial/X_pos_2 Min         552
exploration/env_infos/X_pos_2 Mean                552
exploration/env_infos/X_pos_2 Std                   0
exploration/env_infos/X_pos_2 Max                 552
exploration/env_infos/X_pos_2 Min                 552
evaluation/num steps total                     104783
evaluation/num paths total                       7583
evaluation/path length Mean                        13.7686
evaluation/path length Std                          8.09526
evaluation/path l

exploration/env_infos/final/Velocity_y Min          6
exploration/env_infos/initial/Velocity_y Mean       6
exploration/env_infos/initial/Velocity_y Std        0
exploration/env_infos/initial/Velocity_y Max        6
exploration/env_infos/initial/Velocity_y Min        6
exploration/env_infos/Velocity_y Mean               6
exploration/env_infos/Velocity_y Std                0
exploration/env_infos/Velocity_y Max                6
exploration/env_infos/Velocity_y Min                6
exploration/env_infos/final/X_pos_2 Mean          382
exploration/env_infos/final/X_pos_2 Std             0
exploration/env_infos/final/X_pos_2 Max           382
exploration/env_infos/final/X_pos_2 Min           382
exploration/env_infos/initial/X_pos_2 Mean        382
exploration/env_infos/initial/X_pos_2 Std           0
exploration/env_infos/initial/X_pos_2 Max         382
exploration/env_infos/initial/X_pos_2 Min         382
exploration/env_infos/X_pos_2 Mean                382
exploration/env_infos/X_pos_

exploration/env_infos/Y_Pos_1 Mean                198
exploration/env_infos/Y_Pos_1 Std                   0
exploration/env_infos/Y_Pos_1 Max                 198
exploration/env_infos/Y_Pos_1 Min                 198
exploration/env_infos/final/Velocity_y Mean        10
exploration/env_infos/final/Velocity_y Std          0
exploration/env_infos/final/Velocity_y Max         10
exploration/env_infos/final/Velocity_y Min         10
exploration/env_infos/initial/Velocity_y Mean      10
exploration/env_infos/initial/Velocity_y Std        0
exploration/env_infos/initial/Velocity_y Max       10
exploration/env_infos/initial/Velocity_y Min       10
exploration/env_infos/Velocity_y Mean              10
exploration/env_infos/Velocity_y Std                0
exploration/env_infos/Velocity_y Max               10
exploration/env_infos/Velocity_y Min               10
exploration/env_infos/final/X_pos_2 Mean          320
exploration/env_infos/final/X_pos_2 Std             0
exploration/env_infos/final/

exploration/env_infos/final/Y_Pos_1 Std             0
exploration/env_infos/final/Y_Pos_1 Max           198
exploration/env_infos/final/Y_Pos_1 Min           198
exploration/env_infos/initial/Y_Pos_1 Mean        198
exploration/env_infos/initial/Y_Pos_1 Std           0
exploration/env_infos/initial/Y_Pos_1 Max         198
exploration/env_infos/initial/Y_Pos_1 Min         198
exploration/env_infos/Y_Pos_1 Mean                198
exploration/env_infos/Y_Pos_1 Std                   0
exploration/env_infos/Y_Pos_1 Max                 198
exploration/env_infos/Y_Pos_1 Min                 198
exploration/env_infos/final/Velocity_y Mean         4
exploration/env_infos/final/Velocity_y Std          0
exploration/env_infos/final/Velocity_y Max          4
exploration/env_infos/final/Velocity_y Min          4
exploration/env_infos/initial/Velocity_y Mean       4
exploration/env_infos/initial/Velocity_y Std        0
exploration/env_infos/initial/Velocity_y Max        4
exploration/env_infos/initia

exploration/env_infos/initial/actions Max           0
exploration/env_infos/initial/actions Min           0
exploration/env_infos/actions Mean                  0
exploration/env_infos/actions Std                   0
exploration/env_infos/actions Max                   0
exploration/env_infos/actions Min                   0
exploration/env_infos/final/Y_Pos_1 Mean          198
exploration/env_infos/final/Y_Pos_1 Std             0
exploration/env_infos/final/Y_Pos_1 Max           198
exploration/env_infos/final/Y_Pos_1 Min           198
exploration/env_infos/initial/Y_Pos_1 Mean        198
exploration/env_infos/initial/Y_Pos_1 Std           0
exploration/env_infos/initial/Y_Pos_1 Max         198
exploration/env_infos/initial/Y_Pos_1 Min         198
exploration/env_infos/Y_Pos_1 Mean                198
exploration/env_infos/Y_Pos_1 Std                   0
exploration/env_infos/Y_Pos_1 Max                 198
exploration/env_infos/Y_Pos_1 Min                 198
exploration/env_infos/final/

exploration/Average Returns                       -10
exploration/env_infos/final/actions Mean            0
exploration/env_infos/final/actions Std             0
exploration/env_infos/final/actions Max             0
exploration/env_infos/final/actions Min             0
exploration/env_infos/initial/actions Mean          0
exploration/env_infos/initial/actions Std           0
exploration/env_infos/initial/actions Max           0
exploration/env_infos/initial/actions Min           0
exploration/env_infos/actions Mean                  0
exploration/env_infos/actions Std                   0
exploration/env_infos/actions Max                   0
exploration/env_infos/actions Min                   0
exploration/env_infos/final/Y_Pos_1 Mean          198
exploration/env_infos/final/Y_Pos_1 Std             0
exploration/env_infos/final/Y_Pos_1 Max           198
exploration/env_infos/final/Y_Pos_1 Min           198
exploration/env_infos/initial/Y_Pos_1 Mean        198
exploration/env_infos/initia

exploration/Returns Max                           -10
exploration/Returns Min                           -10
exploration/Actions Mean                           19.1
exploration/Actions Std                            21.6631
exploration/Actions Max                            77
exploration/Actions Min                             9
exploration/Num Paths                               1
exploration/Average Returns                       -10
exploration/env_infos/final/actions Mean            0
exploration/env_infos/final/actions Std             0
exploration/env_infos/final/actions Max             0
exploration/env_infos/final/actions Min             0
exploration/env_infos/initial/actions Mean          0
exploration/env_infos/initial/actions Std           0
exploration/env_infos/initial/actions Max           0
exploration/env_infos/initial/actions Min           0
exploration/env_infos/actions Mean                  0
exploration/env_infos/actions Std                   0
exploration/env_infos

exploration/path length Min                        10
exploration/Rewards Mean                           -1
exploration/Rewards Std                             0
exploration/Rewards Max                            -1
exploration/Rewards Min                            -1
exploration/Returns Mean                          -10
exploration/Returns Std                             0
exploration/Returns Max                           -10
exploration/Returns Min                           -10
exploration/Actions Mean                           19.2
exploration/Actions Std                            13.4298
exploration/Actions Max                            59
exploration/Actions Min                            14
exploration/Num Paths                               1
exploration/Average Returns                       -10
exploration/env_infos/final/actions Mean            0
exploration/env_infos/final/actions Std             0
exploration/env_infos/final/actions Max             0
exploration/env_infos

trainer/Y Predictions Max                          -1.52716
trainer/Y Predictions Min                          -4.83997
exploration/num steps total                      1290
exploration/num paths total                        52
exploration/path length Mean                       10
exploration/path length Std                         0
exploration/path length Max                        10
exploration/path length Min                        10
exploration/Rewards Mean                           -1
exploration/Rewards Std                             0
exploration/Rewards Max                            -1
exploration/Rewards Min                            -1
exploration/Returns Mean                          -10
exploration/Returns Std                             0
exploration/Returns Max                           -10
exploration/Returns Min                           -10
exploration/Actions Mean                           21.9
exploration/Actions Std                            24.7081
explorati

---------------------------------------------  ---------------
replay_buffer/size                               1300
trainer/QF Loss                                     0.276618
trainer/Y Predictions Mean                         -4.37503
trainer/Y Predictions Std                           4.23181
trainer/Y Predictions Max                          -1.73928
trainer/Y Predictions Min                         -20.0848
exploration/num steps total                      1300
exploration/num paths total                        53
exploration/path length Mean                       10
exploration/path length Std                         0
exploration/path length Max                        10
exploration/path length Min                        10
exploration/Rewards Mean                           -1
exploration/Rewards Std                             0
exploration/Rewards Max                            -1
exploration/Rewards Min                            -1
exploration/Returns Mean                   

time/epoch (s)                                      5.97728
time/total (s)                                    200.267
Epoch                                              29
---------------------------------------------  ---------------
2019-06-09 01:37:28.056501 Pacific Daylight Time | [ddqn-train_2019_06_09_01_34_02_0000--s-0] Epoch 30 finished
---------------------------------------------  ---------------
replay_buffer/size                               1310
trainer/QF Loss                                     0.0987837
trainer/Y Predictions Mean                         -3.47012
trainer/Y Predictions Std                           0.885604
trainer/Y Predictions Max                          -1.60192
trainer/Y Predictions Min                          -4.92212
exploration/num steps total                      1310
exploration/num paths total                        54
exploration/path length Mean                       10
exploration/path length Std                         0
exploration/path 

time/data storing (s)                               0.00014287
time/evaluation sampling (s)                        4.09242
time/exploration sampling (s)                       0.00651128
time/logging (s)                                    0.0400452
time/saving (s)                                     0.00751507
time/training (s)                                   2.4627
time/epoch (s)                                      6.60934
time/total (s)                                    207.094
Epoch                                              30
---------------------------------------------  ---------------
2019-06-09 01:37:34.415000 Pacific Daylight Time | [ddqn-train_2019_06_09_01_34_02_0000--s-0] Epoch 31 finished
---------------------------------------------  ----------------
replay_buffer/size                               1320
trainer/QF Loss                                    32.2248
trainer/Y Predictions Mean                         -3.76552
trainer/Y Predictions Std                     

evaluation/env_infos/initial/X_pos_2 Std          125.912
evaluation/env_infos/initial/X_pos_2 Max          580
evaluation/env_infos/initial/X_pos_2 Min          121
evaluation/env_infos/X_pos_2 Mean                 309.576
evaluation/env_infos/X_pos_2 Std                  139.853
evaluation/env_infos/X_pos_2 Max                  580
evaluation/env_infos/X_pos_2 Min                  121
time/data storing (s)                               0.000320227
time/evaluation sampling (s)                        4.12053
time/exploration sampling (s)                       0.00970533
time/logging (s)                                    0.0381456
time/saving (s)                                     0.00774292
time/training (s)                                   1.99294
time/epoch (s)                                      6.16939
time/total (s)                                    213.449
Epoch                                              31
---------------------------------------------  ----------------
20

evaluation/env_infos/Velocity_y Max                10
evaluation/env_infos/Velocity_y Min                 1
evaluation/env_infos/final/X_pos_2 Mean           339.458
evaluation/env_infos/final/X_pos_2 Std            140.522
evaluation/env_infos/final/X_pos_2 Max            579
evaluation/env_infos/final/X_pos_2 Min            120
evaluation/env_infos/initial/X_pos_2 Mean         339.458
evaluation/env_infos/initial/X_pos_2 Std          140.522
evaluation/env_infos/initial/X_pos_2 Max          579
evaluation/env_infos/initial/X_pos_2 Min          120
evaluation/env_infos/X_pos_2 Mean                 329.066
evaluation/env_infos/X_pos_2 Std                  146.337
evaluation/env_infos/X_pos_2 Max                  579
evaluation/env_infos/X_pos_2 Min                  120
time/data storing (s)                               0.000135481
time/evaluation sampling (s)                        4.07225
time/exploration sampling (s)                       0.0106241
time/logging (s)                  

evaluation/env_infos/initial/Velocity_y Mean        5.57286
evaluation/env_infos/initial/Velocity_y Std         2.88184
evaluation/env_infos/initial/Velocity_y Max        10
evaluation/env_infos/initial/Velocity_y Min         1
evaluation/env_infos/Velocity_y Mean                4.83938
evaluation/env_infos/Velocity_y Std                 2.97446
evaluation/env_infos/Velocity_y Max                10
evaluation/env_infos/Velocity_y Min                 1
evaluation/env_infos/final/X_pos_2 Mean           352.884
evaluation/env_infos/final/X_pos_2 Std            127.504
evaluation/env_infos/final/X_pos_2 Max            578
evaluation/env_infos/final/X_pos_2 Min            120
evaluation/env_infos/initial/X_pos_2 Mean         352.884
evaluation/env_infos/initial/X_pos_2 Std          127.504
evaluation/env_infos/initial/X_pos_2 Max          578
evaluation/env_infos/initial/X_pos_2 Min          120
evaluation/env_infos/X_pos_2 Mean                 296.204
evaluation/env_infos/X_pos_2 Std      

evaluation/env_infos/Y_Pos_1 Std                    0
evaluation/env_infos/Y_Pos_1 Max                  149
evaluation/env_infos/Y_Pos_1 Min                  149
evaluation/env_infos/final/Velocity_y Mean          5.51534
evaluation/env_infos/final/Velocity_y Std           2.91885
evaluation/env_infos/final/Velocity_y Max          10
evaluation/env_infos/final/Velocity_y Min           1
evaluation/env_infos/initial/Velocity_y Mean        5.51534
evaluation/env_infos/initial/Velocity_y Std         2.91885
evaluation/env_infos/initial/Velocity_y Max        10
evaluation/env_infos/initial/Velocity_y Min         1
evaluation/env_infos/Velocity_y Mean                5.15844
evaluation/env_infos/Velocity_y Std                 2.93822
evaluation/env_infos/Velocity_y Max                10
evaluation/env_infos/Velocity_y Min                 1
evaluation/env_infos/final/X_pos_2 Mean           340.27
evaluation/env_infos/final/X_pos_2 Std            125.288
evaluation/env_infos/final/X_pos_2 Max 

evaluation/env_infos/final/Y_Pos_1 Max            149
evaluation/env_infos/final/Y_Pos_1 Min            149
evaluation/env_infos/initial/Y_Pos_1 Mean         149
evaluation/env_infos/initial/Y_Pos_1 Std            0
evaluation/env_infos/initial/Y_Pos_1 Max          149
evaluation/env_infos/initial/Y_Pos_1 Min          149
evaluation/env_infos/Y_Pos_1 Mean                 149
evaluation/env_infos/Y_Pos_1 Std                    0
evaluation/env_infos/Y_Pos_1 Max                  149
evaluation/env_infos/Y_Pos_1 Min                  149
evaluation/env_infos/final/Velocity_y Mean          5.69058
evaluation/env_infos/final/Velocity_y Std           2.92787
evaluation/env_infos/final/Velocity_y Max          10
evaluation/env_infos/final/Velocity_y Min           1
evaluation/env_infos/initial/Velocity_y Mean        5.69058
evaluation/env_infos/initial/Velocity_y Std         2.92787
evaluation/env_infos/initial/Velocity_y Max        10
evaluation/env_infos/initial/Velocity_y Min         1
eval

evaluation/env_infos/initial/actions Min            0
evaluation/env_infos/actions Mean                   0
evaluation/env_infos/actions Std                    0
evaluation/env_infos/actions Max                    0
evaluation/env_infos/actions Min                    0
evaluation/env_infos/final/Y_Pos_1 Mean           149
evaluation/env_infos/final/Y_Pos_1 Std              0
evaluation/env_infos/final/Y_Pos_1 Max            149
evaluation/env_infos/final/Y_Pos_1 Min            149
evaluation/env_infos/initial/Y_Pos_1 Mean         149
evaluation/env_infos/initial/Y_Pos_1 Std            0
evaluation/env_infos/initial/Y_Pos_1 Max          149
evaluation/env_infos/initial/Y_Pos_1 Min          149
evaluation/env_infos/Y_Pos_1 Mean                 149
evaluation/env_infos/Y_Pos_1 Std                    0
evaluation/env_infos/Y_Pos_1 Max                  149
evaluation/env_infos/Y_Pos_1 Min                  149
evaluation/env_infos/final/Velocity_y Mean          5.36364
evaluation/env_infos/f

evaluation/env_infos/final/actions Mean             0
evaluation/env_infos/final/actions Std              0
evaluation/env_infos/final/actions Max              0
evaluation/env_infos/final/actions Min              0
evaluation/env_infos/initial/actions Mean           0
evaluation/env_infos/initial/actions Std            0
evaluation/env_infos/initial/actions Max            0
evaluation/env_infos/initial/actions Min            0
evaluation/env_infos/actions Mean                   0
evaluation/env_infos/actions Std                    0
evaluation/env_infos/actions Max                    0
evaluation/env_infos/actions Min                    0
evaluation/env_infos/final/Y_Pos_1 Mean           149
evaluation/env_infos/final/Y_Pos_1 Std              0
evaluation/env_infos/final/Y_Pos_1 Max            149
evaluation/env_infos/final/Y_Pos_1 Min            149
evaluation/env_infos/initial/Y_Pos_1 Mean         149
evaluation/env_infos/initial/Y_Pos_1 Std            0
evaluation/env_infos/initial

evaluation/Returns Min                            -88
evaluation/Actions Mean                            26.0453
evaluation/Actions Std                             21.4996
evaluation/Actions Max                             94
evaluation/Actions Min                              1
evaluation/Num Paths                              221
evaluation/Average Returns                        -40.8507
evaluation/env_infos/final/actions Mean             0
evaluation/env_infos/final/actions Std              0
evaluation/env_infos/final/actions Max              0
evaluation/env_infos/final/actions Min              0
evaluation/env_infos/initial/actions Mean           0
evaluation/env_infos/initial/actions Std            0
evaluation/env_infos/initial/actions Max            0
evaluation/env_infos/initial/actions Min            0
evaluation/env_infos/actions Mean                   0
evaluation/env_infos/actions Std                    0
evaluation/env_infos/actions Max                    0
evaluation/en

evaluation/Rewards Std                              3.00432
evaluation/Rewards Max                             20
evaluation/Rewards Min                            -20
evaluation/Returns Mean                           -56.6589
evaluation/Returns Std                             19.7671
evaluation/Returns Max                              2
evaluation/Returns Min                           -117
evaluation/Actions Mean                            14.5125
evaluation/Actions Std                             13.7909
evaluation/Actions Max                             98
evaluation/Actions Min                              1
evaluation/Num Paths                              129
evaluation/Average Returns                        -56.6589
evaluation/env_infos/final/actions Mean             0
evaluation/env_infos/final/actions Std              0
evaluation/env_infos/final/actions Max              0
evaluation/env_infos/final/actions Min              0
evaluation/env_infos/initial/actions Mean          

evaluation/num steps total                     204321
evaluation/num paths total                      11142
evaluation/path length Mean                        33.2095
evaluation/path length Std                         21.0724
evaluation/path length Max                        100
evaluation/path length Min                         11
evaluation/Rewards Mean                            -1.54507
evaluation/Rewards Std                              3.17162
evaluation/Rewards Max                             -1
evaluation/Rewards Min                            -20
evaluation/Returns Mean                           -51.3108
evaluation/Returns Std                             18.4464
evaluation/Returns Max                            -30
evaluation/Returns Min                           -113
evaluation/Actions Mean                            17.4616
evaluation/Actions Std                             19.5176
evaluation/Actions Max                             98
evaluation/Actions Min                  

exploration/env_infos/initial/X_pos_2 Std           0
exploration/env_infos/initial/X_pos_2 Max         341
exploration/env_infos/initial/X_pos_2 Min         341
exploration/env_infos/X_pos_2 Mean                341
exploration/env_infos/X_pos_2 Std                   0
exploration/env_infos/X_pos_2 Max                 341
exploration/env_infos/X_pos_2 Min                 341
evaluation/num steps total                     209281
evaluation/num paths total                      11326
evaluation/path length Mean                        26.9565
evaluation/path length Std                         10.2527
evaluation/path length Max                         63
evaluation/path length Min                         14
evaluation/Rewards Mean                            -1.68065
evaluation/Rewards Std                              3.60237
evaluation/Rewards Max                             20
evaluation/Rewards Min                            -20
evaluation/Returns Mean                           -45.3043
e

exploration/env_infos/Velocity_y Max                3
exploration/env_infos/Velocity_y Min                3
exploration/env_infos/final/X_pos_2 Mean          478
exploration/env_infos/final/X_pos_2 Std             0
exploration/env_infos/final/X_pos_2 Max           478
exploration/env_infos/final/X_pos_2 Min           478
exploration/env_infos/initial/X_pos_2 Mean        478
exploration/env_infos/initial/X_pos_2 Std           0
exploration/env_infos/initial/X_pos_2 Max         478
exploration/env_infos/initial/X_pos_2 Min         478
exploration/env_infos/X_pos_2 Mean                478
exploration/env_infos/X_pos_2 Std                   0
exploration/env_infos/X_pos_2 Max                 478
exploration/env_infos/X_pos_2 Min                 478
evaluation/num steps total                     214238
evaluation/num paths total                      11508
evaluation/path length Mean                        27.2363
evaluation/path length Std                         14.0429
evaluation/path le

exploration/env_infos/final/Velocity_y Min          1
exploration/env_infos/initial/Velocity_y Mean       1
exploration/env_infos/initial/Velocity_y Std        0
exploration/env_infos/initial/Velocity_y Max        1
exploration/env_infos/initial/Velocity_y Min        1
exploration/env_infos/Velocity_y Mean               1
exploration/env_infos/Velocity_y Std                0
exploration/env_infos/Velocity_y Max                1
exploration/env_infos/Velocity_y Min                1
exploration/env_infos/final/X_pos_2 Mean          412
exploration/env_infos/final/X_pos_2 Std             0
exploration/env_infos/final/X_pos_2 Max           412
exploration/env_infos/final/X_pos_2 Min           412
exploration/env_infos/initial/X_pos_2 Mean        412
exploration/env_infos/initial/X_pos_2 Std           0
exploration/env_infos/initial/X_pos_2 Max         412
exploration/env_infos/initial/X_pos_2 Min         412
exploration/env_infos/X_pos_2 Mean                412
exploration/env_infos/X_pos_

exploration/env_infos/Y_Pos_1 Mean                198
exploration/env_infos/Y_Pos_1 Std                   0
exploration/env_infos/Y_Pos_1 Max                 198
exploration/env_infos/Y_Pos_1 Min                 198
exploration/env_infos/final/Velocity_y Mean         1
exploration/env_infos/final/Velocity_y Std          0
exploration/env_infos/final/Velocity_y Max          1
exploration/env_infos/final/Velocity_y Min          1
exploration/env_infos/initial/Velocity_y Mean       1
exploration/env_infos/initial/Velocity_y Std        0
exploration/env_infos/initial/Velocity_y Max        1
exploration/env_infos/initial/Velocity_y Min        1
exploration/env_infos/Velocity_y Mean               1
exploration/env_infos/Velocity_y Std                0
exploration/env_infos/Velocity_y Max                1
exploration/env_infos/Velocity_y Min                1
exploration/env_infos/final/X_pos_2 Mean          359
exploration/env_infos/final/X_pos_2 Std             0
exploration/env_infos/final/

exploration/env_infos/final/Y_Pos_1 Std             0
exploration/env_infos/final/Y_Pos_1 Max           198
exploration/env_infos/final/Y_Pos_1 Min           198
exploration/env_infos/initial/Y_Pos_1 Mean        198
exploration/env_infos/initial/Y_Pos_1 Std           0
exploration/env_infos/initial/Y_Pos_1 Max         198
exploration/env_infos/initial/Y_Pos_1 Min         198
exploration/env_infos/Y_Pos_1 Mean                198
exploration/env_infos/Y_Pos_1 Std                   0
exploration/env_infos/Y_Pos_1 Max                 198
exploration/env_infos/Y_Pos_1 Min                 198
exploration/env_infos/final/Velocity_y Mean         5
exploration/env_infos/final/Velocity_y Std          0
exploration/env_infos/final/Velocity_y Max          5
exploration/env_infos/final/Velocity_y Min          5
exploration/env_infos/initial/Velocity_y Mean       5
exploration/env_infos/initial/Velocity_y Std        0
exploration/env_infos/initial/Velocity_y Max        5
exploration/env_infos/initia

exploration/env_infos/initial/actions Max           0
exploration/env_infos/initial/actions Min           0
exploration/env_infos/actions Mean                  0
exploration/env_infos/actions Std                   0
exploration/env_infos/actions Max                   0
exploration/env_infos/actions Min                   0
exploration/env_infos/final/Y_Pos_1 Mean          198
exploration/env_infos/final/Y_Pos_1 Std             0
exploration/env_infos/final/Y_Pos_1 Max           198
exploration/env_infos/final/Y_Pos_1 Min           198
exploration/env_infos/initial/Y_Pos_1 Mean        198
exploration/env_infos/initial/Y_Pos_1 Std           0
exploration/env_infos/initial/Y_Pos_1 Max         198
exploration/env_infos/initial/Y_Pos_1 Min         198
exploration/env_infos/Y_Pos_1 Mean                198
exploration/env_infos/Y_Pos_1 Std                   0
exploration/env_infos/Y_Pos_1 Max                 198
exploration/env_infos/Y_Pos_1 Min                 198
exploration/env_infos/final/

exploration/Average Returns                       -10
exploration/env_infos/final/actions Mean            0
exploration/env_infos/final/actions Std             0
exploration/env_infos/final/actions Max             0
exploration/env_infos/final/actions Min             0
exploration/env_infos/initial/actions Mean          0
exploration/env_infos/initial/actions Std           0
exploration/env_infos/initial/actions Max           0
exploration/env_infos/initial/actions Min           0
exploration/env_infos/actions Mean                  0
exploration/env_infos/actions Std                   0
exploration/env_infos/actions Max                   0
exploration/env_infos/actions Min                   0
exploration/env_infos/final/Y_Pos_1 Mean          198
exploration/env_infos/final/Y_Pos_1 Std             0
exploration/env_infos/final/Y_Pos_1 Max           198
exploration/env_infos/final/Y_Pos_1 Min           198
exploration/env_infos/initial/Y_Pos_1 Mean        198
exploration/env_infos/initia

exploration/Returns Max                           -10
exploration/Returns Min                           -10
exploration/Actions Mean                           43.9
exploration/Actions Std                            25.9401
exploration/Actions Max                            82
exploration/Actions Min                            13
exploration/Num Paths                               1
exploration/Average Returns                       -10
exploration/env_infos/final/actions Mean            0
exploration/env_infos/final/actions Std             0
exploration/env_infos/final/actions Max             0
exploration/env_infos/final/actions Min             0
exploration/env_infos/initial/actions Mean          0
exploration/env_infos/initial/actions Std           0
exploration/env_infos/initial/actions Max           0
exploration/env_infos/initial/actions Min           0
exploration/env_infos/actions Mean                  0
exploration/env_infos/actions Std                   0
exploration/env_infos

exploration/path length Min                        10
exploration/Rewards Mean                           -1
exploration/Rewards Std                             0
exploration/Rewards Max                            -1
exploration/Rewards Min                            -1
exploration/Returns Mean                          -10
exploration/Returns Std                             0
exploration/Returns Max                           -10
exploration/Returns Min                           -10
exploration/Actions Mean                           13.5
exploration/Actions Std                            29.0009
exploration/Actions Max                            99
exploration/Actions Min                             2
exploration/Num Paths                               1
exploration/Average Returns                       -10
exploration/env_infos/final/actions Mean            0
exploration/env_infos/final/actions Std             0
exploration/env_infos/final/actions Max             0
exploration/env_infos

trainer/Y Predictions Max                          -7.89577
trainer/Y Predictions Min                         -14.8533
exploration/num steps total                      1510
exploration/num paths total                        75
exploration/path length Mean                       10
exploration/path length Std                         0
exploration/path length Max                        10
exploration/path length Min                        10
exploration/Rewards Mean                           -1
exploration/Rewards Std                             0
exploration/Rewards Max                            -1
exploration/Rewards Min                            -1
exploration/Returns Mean                          -10
exploration/Returns Std                             0
exploration/Returns Max                           -10
exploration/Returns Min                           -10
exploration/Actions Mean                           31.6
exploration/Actions Std                            18.6719
exploratio

---------------------------------------------  ----------------
replay_buffer/size                               1520
trainer/QF Loss                                     0.122696
trainer/Y Predictions Mean                        -12.1296
trainer/Y Predictions Std                           2.21116
trainer/Y Predictions Max                          -7.60585
trainer/Y Predictions Min                         -15.6392
exploration/num steps total                      1520
exploration/num paths total                        76
exploration/path length Mean                       10
exploration/path length Std                         0
exploration/path length Max                        10
exploration/path length Min                        10
exploration/Rewards Mean                           -1
exploration/Rewards Std                             0
exploration/Rewards Max                            -1
exploration/Rewards Min                            -1
exploration/Returns Mean                   

time/epoch (s)                                      6.68496
time/total (s)                                    344.767
Epoch                                              51
---------------------------------------------  ----------------
2019-06-09 01:39:52.746957 Pacific Daylight Time | [ddqn-train_2019_06_09_01_34_02_0000--s-0] Epoch 52 finished
---------------------------------------------  ---------------
replay_buffer/size                               1530
trainer/QF Loss                                     4.65319
trainer/Y Predictions Mean                        -11.5154
trainer/Y Predictions Std                           2.61123
trainer/Y Predictions Max                          -7.81518
trainer/Y Predictions Min                         -19.1884
exploration/num steps total                      1530
exploration/num paths total                        78
exploration/path length Mean                        5
exploration/path length Std                         4
exploration/path leng

time/data storing (s)                               0.00015026
time/evaluation sampling (s)                        4.41976
time/exploration sampling (s)                       0.012745
time/logging (s)                                    0.036783
time/saving (s)                                     0.009856
time/training (s)                                   2.30471
time/epoch (s)                                      6.784
time/total (s)                                    351.749
Epoch                                              52
---------------------------------------------  ---------------
2019-06-09 01:39:58.753574 Pacific Daylight Time | [ddqn-train_2019_06_09_01_34_02_0000--s-0] Epoch 53 finished
---------------------------------------------  ----------------
replay_buffer/size                               1540
trainer/QF Loss                                     0.129331
trainer/Y Predictions Mean                        -12.9134
trainer/Y Predictions Std                          

evaluation/env_infos/initial/X_pos_2 Std          134.881
evaluation/env_infos/initial/X_pos_2 Max          579
evaluation/env_infos/initial/X_pos_2 Min          124
evaluation/env_infos/X_pos_2 Mean                 322.986
evaluation/env_infos/X_pos_2 Std                  131.631
evaluation/env_infos/X_pos_2 Max                  579
evaluation/env_infos/X_pos_2 Min                  124
time/data storing (s)                               0.000146976
time/evaluation sampling (s)                        3.95866
time/exploration sampling (s)                       0.00835176
time/logging (s)                                    0.033391
time/saving (s)                                     0.0057415
time/training (s)                                   1.81384
time/epoch (s)                                      5.82012
time/total (s)                                    357.751
Epoch                                              53
---------------------------------------------  ----------------
2019

evaluation/env_infos/Velocity_y Max                10
evaluation/env_infos/Velocity_y Min                 1
evaluation/env_infos/final/X_pos_2 Mean           344.977
evaluation/env_infos/final/X_pos_2 Std            130.266
evaluation/env_infos/final/X_pos_2 Max            580
evaluation/env_infos/final/X_pos_2 Min            120
evaluation/env_infos/initial/X_pos_2 Mean         344.977
evaluation/env_infos/initial/X_pos_2 Std          130.266
evaluation/env_infos/initial/X_pos_2 Max          580
evaluation/env_infos/initial/X_pos_2 Min          120
evaluation/env_infos/X_pos_2 Mean                 315.186
evaluation/env_infos/X_pos_2 Std                  124.514
evaluation/env_infos/X_pos_2 Max                  580
evaluation/env_infos/X_pos_2 Min                  120
time/data storing (s)                               0.000137944
time/evaluation sampling (s)                        4.18743
time/exploration sampling (s)                       0.00894911
time/logging (s)                 

evaluation/env_infos/initial/Velocity_y Mean        5.59067
evaluation/env_infos/initial/Velocity_y Std         3.01693
evaluation/env_infos/initial/Velocity_y Max        10
evaluation/env_infos/initial/Velocity_y Min         1
evaluation/env_infos/Velocity_y Mean                4.16141
evaluation/env_infos/Velocity_y Std                 2.95147
evaluation/env_infos/Velocity_y Max                10
evaluation/env_infos/Velocity_y Min                 1
evaluation/env_infos/final/X_pos_2 Mean           366.332
evaluation/env_infos/final/X_pos_2 Std            126.755
evaluation/env_infos/final/X_pos_2 Max            577
evaluation/env_infos/final/X_pos_2 Min            124
evaluation/env_infos/initial/X_pos_2 Mean         366.332
evaluation/env_infos/initial/X_pos_2 Std          126.755
evaluation/env_infos/initial/X_pos_2 Max          577
evaluation/env_infos/initial/X_pos_2 Min          124
evaluation/env_infos/X_pos_2 Mean                 345.643
evaluation/env_infos/X_pos_2 Std      

evaluation/env_infos/Y_Pos_1 Std                    0
evaluation/env_infos/Y_Pos_1 Max                  149
evaluation/env_infos/Y_Pos_1 Min                  149
evaluation/env_infos/final/Velocity_y Mean          5.74586
evaluation/env_infos/final/Velocity_y Std           2.8656
evaluation/env_infos/final/Velocity_y Max          10
evaluation/env_infos/final/Velocity_y Min           1
evaluation/env_infos/initial/Velocity_y Mean        5.74586
evaluation/env_infos/initial/Velocity_y Std         2.8656
evaluation/env_infos/initial/Velocity_y Max        10
evaluation/env_infos/initial/Velocity_y Min         1
evaluation/env_infos/Velocity_y Mean                4.77849
evaluation/env_infos/Velocity_y Std                 3.07536
evaluation/env_infos/Velocity_y Max                10
evaluation/env_infos/Velocity_y Min                 1
evaluation/env_infos/final/X_pos_2 Mean           335.873
evaluation/env_infos/final/X_pos_2 Std            136.233
evaluation/env_infos/final/X_pos_2 Max  

evaluation/env_infos/final/Y_Pos_1 Max            149
evaluation/env_infos/final/Y_Pos_1 Min            149
evaluation/env_infos/initial/Y_Pos_1 Mean         149
evaluation/env_infos/initial/Y_Pos_1 Std            0
evaluation/env_infos/initial/Y_Pos_1 Max          149
evaluation/env_infos/initial/Y_Pos_1 Min          149
evaluation/env_infos/Y_Pos_1 Mean                 149
evaluation/env_infos/Y_Pos_1 Std                    0
evaluation/env_infos/Y_Pos_1 Max                  149
evaluation/env_infos/Y_Pos_1 Min                  149
evaluation/env_infos/final/Velocity_y Mean          5.34532
evaluation/env_infos/final/Velocity_y Std           2.91291
evaluation/env_infos/final/Velocity_y Max          10
evaluation/env_infos/final/Velocity_y Min           1
evaluation/env_infos/initial/Velocity_y Mean        5.34532
evaluation/env_infos/initial/Velocity_y Std         2.91291
evaluation/env_infos/initial/Velocity_y Max        10
evaluation/env_infos/initial/Velocity_y Min         1
eval

evaluation/env_infos/initial/actions Min            0
evaluation/env_infos/actions Mean                   0
evaluation/env_infos/actions Std                    0
evaluation/env_infos/actions Max                    0
evaluation/env_infos/actions Min                    0
evaluation/env_infos/final/Y_Pos_1 Mean           149
evaluation/env_infos/final/Y_Pos_1 Std              0
evaluation/env_infos/final/Y_Pos_1 Max            149
evaluation/env_infos/final/Y_Pos_1 Min            149
evaluation/env_infos/initial/Y_Pos_1 Mean         149
evaluation/env_infos/initial/Y_Pos_1 Std            0
evaluation/env_infos/initial/Y_Pos_1 Max          149
evaluation/env_infos/initial/Y_Pos_1 Min          149
evaluation/env_infos/Y_Pos_1 Mean                 149
evaluation/env_infos/Y_Pos_1 Std                    0
evaluation/env_infos/Y_Pos_1 Max                  149
evaluation/env_infos/Y_Pos_1 Min                  149
evaluation/env_infos/final/Velocity_y Mean          5.34815
evaluation/env_infos/f

evaluation/env_infos/final/actions Mean             0
evaluation/env_infos/final/actions Std              0
evaluation/env_infos/final/actions Max              0
evaluation/env_infos/final/actions Min              0
evaluation/env_infos/initial/actions Mean           0
evaluation/env_infos/initial/actions Std            0
evaluation/env_infos/initial/actions Max            0
evaluation/env_infos/initial/actions Min            0
evaluation/env_infos/actions Mean                   0
evaluation/env_infos/actions Std                    0
evaluation/env_infos/actions Max                    0
evaluation/env_infos/actions Min                    0
evaluation/env_infos/final/Y_Pos_1 Mean           149
evaluation/env_infos/final/Y_Pos_1 Std              0
evaluation/env_infos/final/Y_Pos_1 Max            149
evaluation/env_infos/final/Y_Pos_1 Min            149
evaluation/env_infos/initial/Y_Pos_1 Mean         149
evaluation/env_infos/initial/Y_Pos_1 Std            0
evaluation/env_infos/initial

evaluation/Returns Min                           -118
evaluation/Actions Mean                            11.9992
evaluation/Actions Std                             21.0458
evaluation/Actions Max                             98
evaluation/Actions Min                              1
evaluation/Num Paths                              113
evaluation/Average Returns                        -60.1327
evaluation/env_infos/final/actions Mean             0
evaluation/env_infos/final/actions Std              0
evaluation/env_infos/final/actions Max              0
evaluation/env_infos/final/actions Min              0
evaluation/env_infos/initial/actions Mean           0
evaluation/env_infos/initial/actions Std            0
evaluation/env_infos/initial/actions Max            0
evaluation/env_infos/initial/actions Min            0
evaluation/env_infos/actions Mean                   0
evaluation/env_infos/actions Std                    0
evaluation/env_infos/actions Max                    0
evaluation/en

evaluation/Rewards Std                              2.9954
evaluation/Rewards Max                             20
evaluation/Rewards Min                            -20
evaluation/Returns Mean                           -52.6143
evaluation/Returns Std                             26.8069
evaluation/Returns Max                              2
evaluation/Returns Min                           -119
evaluation/Actions Mean                            15.5534
evaluation/Actions Std                             25.9914
evaluation/Actions Max                             98
evaluation/Actions Min                              1
evaluation/Num Paths                              140
evaluation/Average Returns                        -52.6143
evaluation/env_infos/final/actions Mean             0
evaluation/env_infos/final/actions Std              0
evaluation/env_infos/final/actions Max              0
evaluation/env_infos/final/actions Min              0
evaluation/env_infos/initial/actions Mean           

evaluation/num steps total                     313855
evaluation/num paths total                      15062
evaluation/path length Mean                        24.895
evaluation/path length Std                         18.5624
evaluation/path length Max                        100
evaluation/path length Min                          9
evaluation/Rewards Mean                            -1.71199
evaluation/Rewards Std                              3.70077
evaluation/Rewards Max                             20
evaluation/Rewards Min                            -20
evaluation/Returns Mean                           -42.62
evaluation/Returns Std                             18.0457
evaluation/Returns Max                              6
evaluation/Returns Min                           -118
evaluation/Actions Mean                            23.4903
evaluation/Actions Std                             27.6813
evaluation/Actions Max                             98
evaluation/Actions Min                     

exploration/env_infos/initial/X_pos_2 Std           0
exploration/env_infos/initial/X_pos_2 Max         465
exploration/env_infos/initial/X_pos_2 Min         465
exploration/env_infos/X_pos_2 Mean                465
exploration/env_infos/X_pos_2 Std                   0
exploration/env_infos/X_pos_2 Max                 465
exploration/env_infos/X_pos_2 Min                 465
evaluation/num steps total                     318832
evaluation/num paths total                      15246
evaluation/path length Mean                        27.0489
evaluation/path length Std                         22.4219
evaluation/path length Max                        100
evaluation/path length Min                          9
evaluation/Rewards Mean                            -1.67149
evaluation/Rewards Std                              3.53216
evaluation/Rewards Max                             20
evaluation/Rewards Min                            -20
evaluation/Returns Mean                           -45.212
ev

exploration/env_infos/Velocity_y Max                8
exploration/env_infos/Velocity_y Min                8
exploration/env_infos/final/X_pos_2 Mean          578
exploration/env_infos/final/X_pos_2 Std             0
exploration/env_infos/final/X_pos_2 Max           578
exploration/env_infos/final/X_pos_2 Min           578
exploration/env_infos/initial/X_pos_2 Mean        578
exploration/env_infos/initial/X_pos_2 Std           0
exploration/env_infos/initial/X_pos_2 Max         578
exploration/env_infos/initial/X_pos_2 Min         578
exploration/env_infos/X_pos_2 Mean                578
exploration/env_infos/X_pos_2 Std                   0
exploration/env_infos/X_pos_2 Max                 578
exploration/env_infos/X_pos_2 Min                 578
evaluation/num steps total                     323811
evaluation/num paths total                      15454
evaluation/path length Mean                        23.9375
evaluation/path length Std                         23.4193
evaluation/path le

exploration/env_infos/final/Velocity_y Min         10
exploration/env_infos/initial/Velocity_y Mean      10
exploration/env_infos/initial/Velocity_y Std        0
exploration/env_infos/initial/Velocity_y Max       10
exploration/env_infos/initial/Velocity_y Min       10
exploration/env_infos/Velocity_y Mean              10
exploration/env_infos/Velocity_y Std                0
exploration/env_infos/Velocity_y Max               10
exploration/env_infos/Velocity_y Min               10
exploration/env_infos/final/X_pos_2 Mean          508
exploration/env_infos/final/X_pos_2 Std             0
exploration/env_infos/final/X_pos_2 Max           508
exploration/env_infos/final/X_pos_2 Min           508
exploration/env_infos/initial/X_pos_2 Mean        508
exploration/env_infos/initial/X_pos_2 Std           0
exploration/env_infos/initial/X_pos_2 Max         508
exploration/env_infos/initial/X_pos_2 Min         508
exploration/env_infos/X_pos_2 Mean                508
exploration/env_infos/X_pos_

exploration/env_infos/Y_Pos_1 Mean                198
exploration/env_infos/Y_Pos_1 Std                   0
exploration/env_infos/Y_Pos_1 Max                 198
exploration/env_infos/Y_Pos_1 Min                 198
exploration/env_infos/final/Velocity_y Mean         6
exploration/env_infos/final/Velocity_y Std          0
exploration/env_infos/final/Velocity_y Max          6
exploration/env_infos/final/Velocity_y Min          6
exploration/env_infos/initial/Velocity_y Mean       6
exploration/env_infos/initial/Velocity_y Std        0
exploration/env_infos/initial/Velocity_y Max        6
exploration/env_infos/initial/Velocity_y Min        6
exploration/env_infos/Velocity_y Mean               6
exploration/env_infos/Velocity_y Std                0
exploration/env_infos/Velocity_y Max                6
exploration/env_infos/Velocity_y Min                6
exploration/env_infos/final/X_pos_2 Mean          579
exploration/env_infos/final/X_pos_2 Std             0
exploration/env_infos/final/

exploration/env_infos/final/Y_Pos_1 Std             0
exploration/env_infos/final/Y_Pos_1 Max           198
exploration/env_infos/final/Y_Pos_1 Min           198
exploration/env_infos/initial/Y_Pos_1 Mean        198
exploration/env_infos/initial/Y_Pos_1 Std           0
exploration/env_infos/initial/Y_Pos_1 Max         198
exploration/env_infos/initial/Y_Pos_1 Min         198
exploration/env_infos/Y_Pos_1 Mean                198
exploration/env_infos/Y_Pos_1 Std                   0
exploration/env_infos/Y_Pos_1 Max                 198
exploration/env_infos/Y_Pos_1 Min                 198
exploration/env_infos/final/Velocity_y Mean         6.5
exploration/env_infos/final/Velocity_y Std          3.5
exploration/env_infos/final/Velocity_y Max         10
exploration/env_infos/final/Velocity_y Min          3
exploration/env_infos/initial/Velocity_y Mean       6.5
exploration/env_infos/initial/Velocity_y Std        3.5
exploration/env_infos/initial/Velocity_y Max       10
exploration/env_info

exploration/env_infos/initial/actions Max           0
exploration/env_infos/initial/actions Min           0
exploration/env_infos/actions Mean                  0
exploration/env_infos/actions Std                   0
exploration/env_infos/actions Max                   0
exploration/env_infos/actions Min                   0
exploration/env_infos/final/Y_Pos_1 Mean          198
exploration/env_infos/final/Y_Pos_1 Std             0
exploration/env_infos/final/Y_Pos_1 Max           198
exploration/env_infos/final/Y_Pos_1 Min           198
exploration/env_infos/initial/Y_Pos_1 Mean        198
exploration/env_infos/initial/Y_Pos_1 Std           0
exploration/env_infos/initial/Y_Pos_1 Max         198
exploration/env_infos/initial/Y_Pos_1 Min         198
exploration/env_infos/Y_Pos_1 Mean                198
exploration/env_infos/Y_Pos_1 Std                   0
exploration/env_infos/Y_Pos_1 Max                 198
exploration/env_infos/Y_Pos_1 Min                 198
exploration/env_infos/final/

exploration/Average Returns                       -10
exploration/env_infos/final/actions Mean            0
exploration/env_infos/final/actions Std             0
exploration/env_infos/final/actions Max             0
exploration/env_infos/final/actions Min             0
exploration/env_infos/initial/actions Mean          0
exploration/env_infos/initial/actions Std           0
exploration/env_infos/initial/actions Max           0
exploration/env_infos/initial/actions Min           0
exploration/env_infos/actions Mean                  0
exploration/env_infos/actions Std                   0
exploration/env_infos/actions Max                   0
exploration/env_infos/actions Min                   0
exploration/env_infos/final/Y_Pos_1 Mean          198
exploration/env_infos/final/Y_Pos_1 Std             0
exploration/env_infos/final/Y_Pos_1 Max           198
exploration/env_infos/final/Y_Pos_1 Min           198
exploration/env_infos/initial/Y_Pos_1 Mean        198
exploration/env_infos/initia

exploration/Returns Max                            -1
exploration/Returns Min                           -28
exploration/Actions Mean                           69.5
exploration/Actions Std                            23.2347
exploration/Actions Max                            96
exploration/Actions Min                            18
exploration/Num Paths                               2
exploration/Average Returns                       -14.5
exploration/env_infos/final/actions Mean            0
exploration/env_infos/final/actions Std             0
exploration/env_infos/final/actions Max             0
exploration/env_infos/final/actions Min             0
exploration/env_infos/initial/actions Mean          0
exploration/env_infos/initial/actions Std           0
exploration/env_infos/initial/actions Max           0
exploration/env_infos/initial/actions Min           0
exploration/env_infos/actions Mean                  0
exploration/env_infos/actions Std                   0
exploration/env_inf

exploration/path length Min                        10
exploration/Rewards Mean                           -1
exploration/Rewards Std                             0
exploration/Rewards Max                            -1
exploration/Rewards Min                            -1
exploration/Returns Mean                          -10
exploration/Returns Std                             0
exploration/Returns Max                           -10
exploration/Returns Min                           -10
exploration/Actions Mean                           35.5
exploration/Actions Std                            19.2627
exploration/Actions Max                            54
exploration/Actions Min                             8
exploration/Num Paths                               1
exploration/Average Returns                       -10
exploration/env_infos/final/actions Mean            0
exploration/env_infos/final/actions Std             0
exploration/env_infos/final/actions Max             0
exploration/env_infos

trainer/Y Predictions Max                          -4.17115
trainer/Y Predictions Min                         -20.0664
exploration/num steps total                      1730
exploration/num paths total                       101
exploration/path length Mean                        5
exploration/path length Std                         2
exploration/path length Max                         7
exploration/path length Min                         3
exploration/Rewards Mean                           -2.9
exploration/Rewards Std                             5.7
exploration/Rewards Max                            -1
exploration/Rewards Min                           -20
exploration/Returns Mean                          -14.5
exploration/Returns Std                            11.5
exploration/Returns Max                            -3
exploration/Returns Min                           -26
exploration/Actions Mean                           73.3
exploration/Actions Std                            22.4724
ex

2019-06-09 01:42:11.110681 Pacific Daylight Time | [ddqn-train_2019_06_09_01_34_02_0000--s-0] Epoch 73 finished
---------------------------------------------  ----------------
replay_buffer/size                               1740
trainer/QF Loss                                     6.61959
trainer/Y Predictions Mean                        -15.2751
trainer/Y Predictions Std                           5.07394
trainer/Y Predictions Max                          -5.20676
trainer/Y Predictions Min                         -22.9973
exploration/num steps total                      1740
exploration/num paths total                       102
exploration/path length Mean                       10
exploration/path length Std                         0
exploration/path length Max                        10
exploration/path length Min                        10
exploration/Rewards Mean                           -1
exploration/Rewards Std                             0
exploration/Rewards Max                 

time/saving (s)                                     0.045863
time/training (s)                                   2.03174
time/epoch (s)                                      6.1572
time/total (s)                                    490.087
Epoch                                              73
---------------------------------------------  ----------------
2019-06-09 01:42:18.607151 Pacific Daylight Time | [ddqn-train_2019_06_09_01_34_02_0000--s-0] Epoch 74 finished
---------------------------------------------  ----------------
replay_buffer/size                               1750
trainer/QF Loss                                     0.095263
trainer/Y Predictions Mean                        -15.2658
trainer/Y Predictions Std                           5.42998
trainer/Y Predictions Max                          -4.47849
trainer/Y Predictions Min                         -24.6958
exploration/num steps total                      1750
exploration/num paths total                       104
explora

evaluation/env_infos/X_pos_2 Max                  578
evaluation/env_infos/X_pos_2 Min                  123
time/data storing (s)                               0.000351018
time/evaluation sampling (s)                        4.83338
time/exploration sampling (s)                       0.0208168
time/logging (s)                                    0.0391046
time/saving (s)                                     0.00603751
time/training (s)                                   2.36775
time/epoch (s)                                      7.26744
time/total (s)                                    497.576
Epoch                                              74
---------------------------------------------  ----------------
2019-06-09 01:42:25.044806 Pacific Daylight Time | [ddqn-train_2019_06_09_01_34_02_0000--s-0] Epoch 75 finished
---------------------------------------------  ----------------
replay_buffer/size                               1760
trainer/QF Loss                                     0.0

evaluation/env_infos/final/X_pos_2 Min            120
evaluation/env_infos/initial/X_pos_2 Mean         344.077
evaluation/env_infos/initial/X_pos_2 Std          138.513
evaluation/env_infos/initial/X_pos_2 Max          579
evaluation/env_infos/initial/X_pos_2 Min          120
evaluation/env_infos/X_pos_2 Mean                 265.527
evaluation/env_infos/X_pos_2 Std                  136.102
evaluation/env_infos/X_pos_2 Max                  579
evaluation/env_infos/X_pos_2 Min                  120
time/data storing (s)                               0.000141639
time/evaluation sampling (s)                        4.34445
time/exploration sampling (s)                       0.00783201
time/logging (s)                                    0.0409463
time/saving (s)                                     0.00648993
time/training (s)                                   1.85268
time/epoch (s)                                      6.25254
time/total (s)                                    504.026
Epoch   

evaluation/env_infos/Velocity_y Std                 2.90609
evaluation/env_infos/Velocity_y Max                10
evaluation/env_infos/Velocity_y Min                 1
evaluation/env_infos/final/X_pos_2 Mean           360.506
evaluation/env_infos/final/X_pos_2 Std            129.632
evaluation/env_infos/final/X_pos_2 Max            577
evaluation/env_infos/final/X_pos_2 Min            120
evaluation/env_infos/initial/X_pos_2 Mean         360.506
evaluation/env_infos/initial/X_pos_2 Std          129.632
evaluation/env_infos/initial/X_pos_2 Max          577
evaluation/env_infos/initial/X_pos_2 Min          120
evaluation/env_infos/X_pos_2 Mean                 295.84
evaluation/env_infos/X_pos_2 Std                  131.845
evaluation/env_infos/X_pos_2 Max                  577
evaluation/env_infos/X_pos_2 Min                  120
time/data storing (s)                               0.00014246
time/evaluation sampling (s)                        3.86613
time/exploration sampling (s)         

evaluation/env_infos/final/Velocity_y Max          10
evaluation/env_infos/final/Velocity_y Min           1
evaluation/env_infos/initial/Velocity_y Mean        5.5671
evaluation/env_infos/initial/Velocity_y Std         2.91489
evaluation/env_infos/initial/Velocity_y Max        10
evaluation/env_infos/initial/Velocity_y Min         1
evaluation/env_infos/Velocity_y Mean                4.87452
evaluation/env_infos/Velocity_y Std                 2.98855
evaluation/env_infos/Velocity_y Max                10
evaluation/env_infos/Velocity_y Min                 1
evaluation/env_infos/final/X_pos_2 Mean           351.71
evaluation/env_infos/final/X_pos_2 Std            135.049
evaluation/env_infos/final/X_pos_2 Max            578
evaluation/env_infos/final/X_pos_2 Min            120
evaluation/env_infos/initial/X_pos_2 Mean         351.71
evaluation/env_infos/initial/X_pos_2 Std          135.049
evaluation/env_infos/initial/X_pos_2 Max          578
evaluation/env_infos/initial/X_pos_2 Min     

evaluation/env_infos/initial/Y_Pos_1 Min          149
evaluation/env_infos/Y_Pos_1 Mean                 149
evaluation/env_infos/Y_Pos_1 Std                    0
evaluation/env_infos/Y_Pos_1 Max                  149
evaluation/env_infos/Y_Pos_1 Min                  149
evaluation/env_infos/final/Velocity_y Mean          5.49194
evaluation/env_infos/final/Velocity_y Std           2.83696
evaluation/env_infos/final/Velocity_y Max          10
evaluation/env_infos/final/Velocity_y Min           1
evaluation/env_infos/initial/Velocity_y Mean        5.49194
evaluation/env_infos/initial/Velocity_y Std         2.83696
evaluation/env_infos/initial/Velocity_y Max        10
evaluation/env_infos/initial/Velocity_y Min         1
evaluation/env_infos/Velocity_y Mean                4.81259
evaluation/env_infos/Velocity_y Std                 2.952
evaluation/env_infos/Velocity_y Max                10
evaluation/env_infos/Velocity_y Min                 1
evaluation/env_infos/final/X_pos_2 Mean         

evaluation/env_infos/final/Y_Pos_1 Mean           149
evaluation/env_infos/final/Y_Pos_1 Std              0
evaluation/env_infos/final/Y_Pos_1 Max            149
evaluation/env_infos/final/Y_Pos_1 Min            149
evaluation/env_infos/initial/Y_Pos_1 Mean         149
evaluation/env_infos/initial/Y_Pos_1 Std            0
evaluation/env_infos/initial/Y_Pos_1 Max          149
evaluation/env_infos/initial/Y_Pos_1 Min          149
evaluation/env_infos/Y_Pos_1 Mean                 149
evaluation/env_infos/Y_Pos_1 Std                    0
evaluation/env_infos/Y_Pos_1 Max                  149
evaluation/env_infos/Y_Pos_1 Min                  149
evaluation/env_infos/final/Velocity_y Mean          5.60145
evaluation/env_infos/final/Velocity_y Std           2.8553
evaluation/env_infos/final/Velocity_y Max          10
evaluation/env_infos/final/Velocity_y Min           1
evaluation/env_infos/initial/Velocity_y Mean        5.60145
evaluation/env_infos/initial/Velocity_y Std         2.8553
evalua

evaluation/env_infos/initial/actions Std            0
evaluation/env_infos/initial/actions Max            0
evaluation/env_infos/initial/actions Min            0
evaluation/env_infos/actions Mean                   0
evaluation/env_infos/actions Std                    0
evaluation/env_infos/actions Max                    0
evaluation/env_infos/actions Min                    0
evaluation/env_infos/final/Y_Pos_1 Mean           149
evaluation/env_infos/final/Y_Pos_1 Std              0
evaluation/env_infos/final/Y_Pos_1 Max            149
evaluation/env_infos/final/Y_Pos_1 Min            149
evaluation/env_infos/initial/Y_Pos_1 Mean         149
evaluation/env_infos/initial/Y_Pos_1 Std            0
evaluation/env_infos/initial/Y_Pos_1 Max          149
evaluation/env_infos/initial/Y_Pos_1 Min          149
evaluation/env_infos/Y_Pos_1 Mean                 149
evaluation/env_infos/Y_Pos_1 Std                    0
evaluation/env_infos/Y_Pos_1 Max                  149
evaluation/env_infos/Y_Pos_1

evaluation/Num Paths                              512
evaluation/Average Returns                        -28.7578
evaluation/env_infos/final/actions Mean             0
evaluation/env_infos/final/actions Std              0
evaluation/env_infos/final/actions Max              0
evaluation/env_infos/final/actions Min              0
evaluation/env_infos/initial/actions Mean           0
evaluation/env_infos/initial/actions Std            0
evaluation/env_infos/initial/actions Max            0
evaluation/env_infos/initial/actions Min            0
evaluation/env_infos/actions Mean                   0
evaluation/env_infos/actions Std                    0
evaluation/env_infos/actions Max                    0
evaluation/env_infos/actions Min                    0
evaluation/env_infos/final/Y_Pos_1 Mean           149
evaluation/env_infos/final/Y_Pos_1 Std              0
evaluation/env_infos/final/Y_Pos_1 Max            149
evaluation/env_infos/final/Y_Pos_1 Min            149
evaluation/env_infos/in

evaluation/Returns Std                              4.16037
evaluation/Returns Max                            -26
evaluation/Returns Min                            -61
evaluation/Actions Mean                            48.3947
evaluation/Actions Std                             29.3556
evaluation/Actions Max                             98
evaluation/Actions Min                              1
evaluation/Num Paths                              406
evaluation/Average Returns                        -31.3005
evaluation/env_infos/final/actions Mean             0
evaluation/env_infos/final/actions Std              0
evaluation/env_infos/final/actions Max              0
evaluation/env_infos/final/actions Min              0
evaluation/env_infos/initial/actions Mean           0
evaluation/env_infos/initial/actions Std            0
evaluation/env_infos/initial/actions Max            0
evaluation/env_infos/initial/actions Min            0
evaluation/env_infos/actions Mean                   0
evaluat

evaluation/path length Max                        100
evaluation/path length Min                          9
evaluation/Rewards Mean                            -1.97777
evaluation/Rewards Std                              4.19782
evaluation/Rewards Max                             -1
evaluation/Rewards Min                            -20
evaluation/Returns Mean                           -37.9885
evaluation/Returns Std                             15.7889
evaluation/Returns Max                            -28
evaluation/Returns Min                           -119
evaluation/Actions Mean                            30.8054
evaluation/Actions Std                             24.0668
evaluation/Actions Max                             98
evaluation/Actions Min                              1
evaluation/Num Paths                              260
evaluation/Average Returns                        -37.9885
evaluation/env_infos/final/actions Mean             0
evaluation/env_infos/final/actions Std       

KeyboardInterrupt: 

In [4]:
env = gym.make('Physics_RL-v0')
env.reset()

(10, 174, 421, 10)

In [21]:
#env.observation_space.low.size
env.step(10)

((180, 174, 421, 95),
 0,
 False,
 {'Velocity_y': 5,
  'X_pos_2': 421,
  'Y_Pos_1': 278,
  'actions': [10,
   10,
   10,
   10,
   10,
   10,
   10,
   10,
   10,
   10,
   10,
   10,
   10,
   10,
   10,
   10,
   10]})