In [1]:
import gym
import gym_Physics_RL
from torch import nn as nn

from rlkit.exploration_strategies.base import PolicyWrappedWithExplorationStrategy
from rlkit.exploration_strategies.epsilon_greedy import EpsilonGreedy
from rlkit.policies.argmax import ArgmaxDiscretePolicy
from rlkit.torch.dqn.double_dqn import DoubleDQNTrainer
from rlkit.torch.networks import Mlp
import rlkit.torch.pytorch_util as ptu
from rlkit.data_management.env_replay_buffer import EnvReplayBuffer
from rlkit.launchers.launcher_util import setup_logger
from rlkit.samplers.data_collector import MdpPathCollector
from rlkit.torch.torch_rl_algorithm import TorchBatchRLAlgorithm


def experiment(variant):
    expl_env = gym.make('Physics_RL-v0')
    eval_env = gym.make('Physics_RL-v0')
    obs_dim = expl_env.observation_space.low.size
    action_dim = eval_env.action_space.n

    qf = Mlp(
        hidden_sizes=[32, 32],
        input_size=obs_dim,
        output_size=action_dim,
    )
    target_qf = Mlp(
        hidden_sizes=[32, 32],
        input_size=obs_dim,
        output_size=action_dim,
    )
    qf_criterion = nn.MSELoss()
    eval_policy = ArgmaxDiscretePolicy(qf)
    expl_policy = PolicyWrappedWithExplorationStrategy(
        EpsilonGreedy(expl_env.action_space),
        eval_policy,
    )
    eval_path_collector = MdpPathCollector(
        eval_env,
        eval_policy,
    )
    expl_path_collector = MdpPathCollector(
        expl_env,
        expl_policy,
    )
    trainer = DoubleDQNTrainer(
        qf=qf,
        target_qf=target_qf,
        qf_criterion=qf_criterion,
        **variant['trainer_kwargs']
    )
    replay_buffer = EnvReplayBuffer(
        variant['replay_buffer_size'],
        expl_env,
    )
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        **variant['algorithm_kwargs']
    )
    algorithm.to(ptu.device)
    algorithm.train()

doodad not detected


In [2]:
if __name__ == "__main__":
    # noinspection PyTypeChecker
    variant = dict(
        algorithm="DDQN",
        version="normal",
        layer_size=256,
        replay_buffer_size=int(1E6),
        algorithm_kwargs=dict(
            num_epochs=3000,
            num_eval_steps_per_epoch=5000,
            num_trains_per_train_loop=1000,
            num_expl_steps_per_train_loop=10,
            min_num_steps_before_training=10,
            max_path_length=10,
            batch_size=256,
        ),
        trainer_kwargs=dict(
            discount=0.99,
            learning_rate=3E-4,
        ),
    )
    setup_logger('ddqn-train', variant=variant)
    # ptu.set_gpu_mode(True)  # optionally set the GPU (default=False)
    experiment(variant)

2019-06-08 21:13:54.963605 Pacific Daylight Time | Variant:
2019-06-08 21:13:54.965607 Pacific Daylight Time | {
  "algorithm": "DDQN",
  "version": "normal",
  "layer_size": 256,
  "replay_buffer_size": 1000000,
  "algorithm_kwargs": {
    "num_epochs": 3000,
    "num_eval_steps_per_epoch": 5000,
    "num_trains_per_train_loop": 1000,
    "num_expl_steps_per_train_loop": 10,
    "min_num_steps_before_training": 10,
    "max_path_length": 10,
    "batch_size": 256
  },
  "trainer_kwargs": {
    "discount": 0.99,
    "learning_rate": 0.0003
  }
}
2019-06-08 21:14:03.271335 Pacific Daylight Time | [h:\work\psu\ece 510(deep learning theory and practices)\project\rlkit\rlkit\..\data\ddqn-train\ddqn-train_2019_06_08_21_13_54_0000--s-0] Epoch 0 finished
---------------------------------------------  --------------
replay_buffer/size                               20
trainer/QF Loss                                   3.4821
trainer/Y Predictions Mean                        0.23983
trainer/Y Pre

evaluation/env_infos/X_pos_2 Std                131.014
evaluation/env_infos/X_pos_2 Max                579
evaluation/env_infos/X_pos_2 Min                120
time/data storing (s)                             0.000133017
time/evaluation sampling (s)                      3.95574
time/exploration sampling (s)                     0.00700434
time/logging (s)                                  0.0666458
time/saving (s)                                   0.00952634
time/training (s)                                 4.09406
time/epoch (s)                                    8.13311
time/total (s)                                   10.5608
Epoch                                             0
---------------------------------------------  --------------
2019-06-08 21:14:12.746959 Pacific Daylight Time | [h:\work\psu\ece 510(deep learning theory and practices)\project\rlkit\rlkit\..\data\ddqn-train\ddqn-train_2019_06_08_21_13_54_0000--s-0] Epoch 1 finished
---------------------------------------------

evaluation/env_infos/initial/X_pos_2 Max        580
evaluation/env_infos/initial/X_pos_2 Min        121
evaluation/env_infos/X_pos_2 Mean               359.035
evaluation/env_infos/X_pos_2 Std                132.578
evaluation/env_infos/X_pos_2 Max                580
evaluation/env_infos/X_pos_2 Min                121
time/data storing (s)                             0.000143691
time/evaluation sampling (s)                      4.7883
time/exploration sampling (s)                     0.00851269
time/logging (s)                                  0.0557408
time/saving (s)                                   0.00679168
time/training (s)                                 4.43137
time/epoch (s)                                    9.29086
time/total (s)                                   20.0185
Epoch                                             1
---------------------------------------------  --------------
2019-06-08 21:14:22.679114 Pacific Daylight Time | [h:\work\psu\ece 510(deep learning theory

evaluation/env_infos/final/X_pos_2 Std           129.747
evaluation/env_infos/final/X_pos_2 Max           580
evaluation/env_infos/final/X_pos_2 Min           120
evaluation/env_infos/initial/X_pos_2 Mean        341.488
evaluation/env_infos/initial/X_pos_2 Std         129.747
evaluation/env_infos/initial/X_pos_2 Max         580
evaluation/env_infos/initial/X_pos_2 Min         120
evaluation/env_infos/X_pos_2 Mean                347.585
evaluation/env_infos/X_pos_2 Std                 128.524
evaluation/env_infos/X_pos_2 Max                 580
evaluation/env_infos/X_pos_2 Min                 120
time/data storing (s)                              0.000136712
time/evaluation sampling (s)                       4.6502
time/exploration sampling (s)                      0.00766656
time/logging (s)                                   0.0499353
time/saving (s)                                    0.00794819
time/training (s)                                  5.01254
time/epoch (s)                  

evaluation/env_infos/initial/Velocity_y Min        1
evaluation/env_infos/Velocity_y Mean               5.7148
evaluation/env_infos/Velocity_y Std                2.85317
evaluation/env_infos/Velocity_y Max               10
evaluation/env_infos/Velocity_y Min                1
evaluation/env_infos/final/X_pos_2 Mean          355.646
evaluation/env_infos/final/X_pos_2 Std           135.305
evaluation/env_infos/final/X_pos_2 Max           580
evaluation/env_infos/final/X_pos_2 Min           121
evaluation/env_infos/initial/X_pos_2 Mean        355.646
evaluation/env_infos/initial/X_pos_2 Std         135.305
evaluation/env_infos/initial/X_pos_2 Max         580
evaluation/env_infos/initial/X_pos_2 Min         121
evaluation/env_infos/X_pos_2 Mean                360.475
evaluation/env_infos/X_pos_2 Std                 135.053
evaluation/env_infos/X_pos_2 Max                 580
evaluation/env_infos/X_pos_2 Min                 121
time/data storing (s)                              0.000139997
t

evaluation/env_infos/final/Velocity_y Std          2.93248
evaluation/env_infos/final/Velocity_y Max         10
evaluation/env_infos/final/Velocity_y Min          1
evaluation/env_infos/initial/Velocity_y Mean       5.48684
evaluation/env_infos/initial/Velocity_y Std        2.93248
evaluation/env_infos/initial/Velocity_y Max       10
evaluation/env_infos/initial/Velocity_y Min        1
evaluation/env_infos/Velocity_y Mean               5.5002
evaluation/env_infos/Velocity_y Std                2.94199
evaluation/env_infos/Velocity_y Max               10
evaluation/env_infos/Velocity_y Min                1
evaluation/env_infos/final/X_pos_2 Mean          335.519
evaluation/env_infos/final/X_pos_2 Std           129.587
evaluation/env_infos/final/X_pos_2 Max           579
evaluation/env_infos/final/X_pos_2 Min           120
evaluation/env_infos/initial/X_pos_2 Mean        335.519
evaluation/env_infos/initial/X_pos_2 Std         129.587
evaluation/env_infos/initial/X_pos_2 Max         579
e

evaluation/env_infos/Y_Pos_1 Mean                297
evaluation/env_infos/Y_Pos_1 Std                   0
evaluation/env_infos/Y_Pos_1 Max                 297
evaluation/env_infos/Y_Pos_1 Min                 297
evaluation/env_infos/final/Velocity_y Mean         5.602
evaluation/env_infos/final/Velocity_y Std          2.87673
evaluation/env_infos/final/Velocity_y Max         10
evaluation/env_infos/final/Velocity_y Min          1
evaluation/env_infos/initial/Velocity_y Mean       5.602
evaluation/env_infos/initial/Velocity_y Std        2.87673
evaluation/env_infos/initial/Velocity_y Max       10
evaluation/env_infos/initial/Velocity_y Min        1
evaluation/env_infos/Velocity_y Mean               5.602
evaluation/env_infos/Velocity_y Std                2.87673
evaluation/env_infos/Velocity_y Max               10
evaluation/env_infos/Velocity_y Min                1
evaluation/env_infos/final/X_pos_2 Mean          354.08
evaluation/env_infos/final/X_pos_2 Std           136.425
evaluatio

evaluation/env_infos/final/Y_Pos_1 Max           297
evaluation/env_infos/final/Y_Pos_1 Min           297
evaluation/env_infos/initial/Y_Pos_1 Mean        297
evaluation/env_infos/initial/Y_Pos_1 Std           0
evaluation/env_infos/initial/Y_Pos_1 Max         297
evaluation/env_infos/initial/Y_Pos_1 Min         297
evaluation/env_infos/Y_Pos_1 Mean                297
evaluation/env_infos/Y_Pos_1 Std                   0
evaluation/env_infos/Y_Pos_1 Max                 297
evaluation/env_infos/Y_Pos_1 Min                 297
evaluation/env_infos/final/Velocity_y Mean         5.442
evaluation/env_infos/final/Velocity_y Std          2.71857
evaluation/env_infos/final/Velocity_y Max         10
evaluation/env_infos/final/Velocity_y Min          1
evaluation/env_infos/initial/Velocity_y Mean       5.442
evaluation/env_infos/initial/Velocity_y Std        2.71857
evaluation/env_infos/initial/Velocity_y Max       10
evaluation/env_infos/initial/Velocity_y Min        1
evaluation/env_infos/Veloc

evaluation/env_infos/initial/actions Min           0
evaluation/env_infos/actions Mean                  0
evaluation/env_infos/actions Std                   0
evaluation/env_infos/actions Max                   0
evaluation/env_infos/actions Min                   0
evaluation/env_infos/final/Y_Pos_1 Mean          297
evaluation/env_infos/final/Y_Pos_1 Std             0
evaluation/env_infos/final/Y_Pos_1 Max           297
evaluation/env_infos/final/Y_Pos_1 Min           297
evaluation/env_infos/initial/Y_Pos_1 Mean        297
evaluation/env_infos/initial/Y_Pos_1 Std           0
evaluation/env_infos/initial/Y_Pos_1 Max         297
evaluation/env_infos/initial/Y_Pos_1 Min         297
evaluation/env_infos/Y_Pos_1 Mean                297
evaluation/env_infos/Y_Pos_1 Std                   0
evaluation/env_infos/Y_Pos_1 Max                 297
evaluation/env_infos/Y_Pos_1 Min                 297
evaluation/env_infos/final/Velocity_y Mean         5.50772
evaluation/env_infos/final/Velocity_y St

evaluation/env_infos/final/actions Mean            0
evaluation/env_infos/final/actions Std             0
evaluation/env_infos/final/actions Max             0
evaluation/env_infos/final/actions Min             0
evaluation/env_infos/initial/actions Mean          0
evaluation/env_infos/initial/actions Std           0
evaluation/env_infos/initial/actions Max           0
evaluation/env_infos/initial/actions Min           0
evaluation/env_infos/actions Mean                  0
evaluation/env_infos/actions Std                   0
evaluation/env_infos/actions Max                   0
evaluation/env_infos/actions Min                   0
evaluation/env_infos/final/Y_Pos_1 Mean          297
evaluation/env_infos/final/Y_Pos_1 Std             0
evaluation/env_infos/final/Y_Pos_1 Max           297
evaluation/env_infos/final/Y_Pos_1 Min           297
evaluation/env_infos/initial/Y_Pos_1 Mean        297
evaluation/env_infos/initial/Y_Pos_1 Std           0
evaluation/env_infos/initial/Y_Pos_1 Max      

evaluation/Actions Mean                           66.6543
evaluation/Actions Std                            14.6956
evaluation/Actions Max                            88
evaluation/Actions Min                            21
evaluation/Num Paths                             551
evaluation/Average Returns                        -1.1706
evaluation/env_infos/final/actions Mean            0
evaluation/env_infos/final/actions Std             0
evaluation/env_infos/final/actions Max             0
evaluation/env_infos/final/actions Min             0
evaluation/env_infos/initial/actions Mean          0
evaluation/env_infos/initial/actions Std           0
evaluation/env_infos/initial/actions Max           0
evaluation/env_infos/initial/actions Min           0
evaluation/env_infos/actions Mean                  0
evaluation/env_infos/actions Std                   0
evaluation/env_infos/actions Max                   0
evaluation/env_infos/actions Min                   0
evaluation/env_infos/final/Y_Po

evaluation/Rewards Max                             1
evaluation/Rewards Min                           -10
evaluation/Returns Mean                            3.78119
evaluation/Returns Std                             5.90082
evaluation/Returns Max                            10
evaluation/Returns Min                            -3
evaluation/Actions Mean                           55.6016
evaluation/Actions Std                            18.8385
evaluation/Actions Max                            88
evaluation/Actions Min                            21
evaluation/Num Paths                             521
evaluation/Average Returns                         3.78119
evaluation/env_infos/final/actions Mean            0
evaluation/env_infos/final/actions Std             0
evaluation/env_infos/final/actions Max             0
evaluation/env_infos/final/actions Min             0
evaluation/env_infos/initial/actions Mean          0
evaluation/env_infos/initial/actions Std           0
evaluation/env_inf

evaluation/path length Mean                       10
evaluation/path length Std                         0
evaluation/path length Max                        10
evaluation/path length Min                        10
evaluation/Rewards Mean                            0.043
evaluation/Rewards Std                             3.10019
evaluation/Rewards Max                             1
evaluation/Rewards Min                           -10
evaluation/Returns Mean                            0.43
evaluation/Returns Std                             3.69934
evaluation/Returns Max                            10
evaluation/Returns Min                            -1
evaluation/Actions Mean                           56.3216
evaluation/Actions Std                            10.4425
evaluation/Actions Max                            88
evaluation/Actions Min                            12
evaluation/Num Paths                             500
evaluation/Average Returns                         0.43
evaluation/env

exploration/env_infos/X_pos_2 Std                  0
exploration/env_infos/X_pos_2 Max                195
exploration/env_infos/X_pos_2 Min                195
evaluation/num steps total                     64952
evaluation/num paths total                      6894
evaluation/path length Mean                        9.96806
evaluation/path length Std                         0.17583
evaluation/path length Max                        10
evaluation/path length Min                         9
evaluation/Rewards Mean                            0.947137
evaluation/Rewards Std                             0.760725
evaluation/Rewards Max                             1
evaluation/Rewards Min                           -10
evaluation/Returns Mean                            9.44112
evaluation/Returns Std                             2.49371
evaluation/Returns Max                            10
evaluation/Returns Min                            -2
evaluation/Actions Mean                           18.9093
eva

exploration/env_infos/final/X_pos_2 Min          452
exploration/env_infos/initial/X_pos_2 Mean       455
exploration/env_infos/initial/X_pos_2 Std          3
exploration/env_infos/initial/X_pos_2 Max        458
exploration/env_infos/initial/X_pos_2 Min        452
exploration/env_infos/X_pos_2 Mean               452.6
exploration/env_infos/X_pos_2 Std                  1.8
exploration/env_infos/X_pos_2 Max                458
exploration/env_infos/X_pos_2 Min                452
evaluation/num steps total                     69944
evaluation/num paths total                      7451
evaluation/path length Mean                        8.9623
evaluation/path length Std                         0.190475
evaluation/path length Max                         9
evaluation/path length Min                         8
evaluation/Rewards Mean                           -0.227364
evaluation/Rewards Std                             3.46332
evaluation/Rewards Max                             1
evaluation/Reward

exploration/env_infos/Velocity_y Std               0
exploration/env_infos/Velocity_y Max               3
exploration/env_infos/Velocity_y Min               3
exploration/env_infos/final/X_pos_2 Mean         357
exploration/env_infos/final/X_pos_2 Std            0
exploration/env_infos/final/X_pos_2 Max          357
exploration/env_infos/final/X_pos_2 Min          357
exploration/env_infos/initial/X_pos_2 Mean       357
exploration/env_infos/initial/X_pos_2 Std          0
exploration/env_infos/initial/X_pos_2 Max        357
exploration/env_infos/initial/X_pos_2 Min        357
exploration/env_infos/X_pos_2 Mean               357
exploration/env_infos/X_pos_2 Std                  0
exploration/env_infos/X_pos_2 Max                357
exploration/env_infos/X_pos_2 Min                357
evaluation/num steps total                     74940
evaluation/num paths total                      7951
evaluation/path length Mean                        9.992
evaluation/path length Std                

exploration/env_infos/final/Velocity_y Min         8
exploration/env_infos/initial/Velocity_y Mean      8.5
exploration/env_infos/initial/Velocity_y Std       0.5
exploration/env_infos/initial/Velocity_y Max       9
exploration/env_infos/initial/Velocity_y Min       8
exploration/env_infos/Velocity_y Mean              8.2
exploration/env_infos/Velocity_y Std               0.4
exploration/env_infos/Velocity_y Max               9
exploration/env_infos/Velocity_y Min               8
exploration/env_infos/final/X_pos_2 Mean         324.5
exploration/env_infos/final/X_pos_2 Std           77.5
exploration/env_infos/final/X_pos_2 Max          402
exploration/env_infos/final/X_pos_2 Min          247
exploration/env_infos/initial/X_pos_2 Mean       324.5
exploration/env_infos/initial/X_pos_2 Std         77.5
exploration/env_infos/initial/X_pos_2 Max        402
exploration/env_infos/initial/X_pos_2 Min        247
exploration/env_infos/X_pos_2 Mean               278
exploration/env_infos/X_pos_2 

exploration/env_infos/Y_Pos_1 Mean               345
exploration/env_infos/Y_Pos_1 Std                  0
exploration/env_infos/Y_Pos_1 Max                345
exploration/env_infos/Y_Pos_1 Min                345
exploration/env_infos/final/Velocity_y Mean        5
exploration/env_infos/final/Velocity_y Std         3
exploration/env_infos/final/Velocity_y Max         8
exploration/env_infos/final/Velocity_y Min         2
exploration/env_infos/initial/Velocity_y Mean      5
exploration/env_infos/initial/Velocity_y Std       3
exploration/env_infos/initial/Velocity_y Max       8
exploration/env_infos/initial/Velocity_y Min       2
exploration/env_infos/Velocity_y Mean              6.8
exploration/env_infos/Velocity_y Std               2.4
exploration/env_infos/Velocity_y Max               8
exploration/env_infos/Velocity_y Min               2
exploration/env_infos/final/X_pos_2 Mean         359.5
exploration/env_infos/final/X_pos_2 Std           50.5
exploration/env_infos/final/X_pos_2 Ma

exploration/env_infos/final/Y_Pos_1 Max          345
exploration/env_infos/final/Y_Pos_1 Min          345
exploration/env_infos/initial/Y_Pos_1 Mean       345
exploration/env_infos/initial/Y_Pos_1 Std          0
exploration/env_infos/initial/Y_Pos_1 Max        345
exploration/env_infos/initial/Y_Pos_1 Min        345
exploration/env_infos/Y_Pos_1 Mean               345
exploration/env_infos/Y_Pos_1 Std                  0
exploration/env_infos/Y_Pos_1 Max                345
exploration/env_infos/Y_Pos_1 Min                345
exploration/env_infos/final/Velocity_y Mean        7
exploration/env_infos/final/Velocity_y Std         2
exploration/env_infos/final/Velocity_y Max         9
exploration/env_infos/final/Velocity_y Min         5
exploration/env_infos/initial/Velocity_y Mean      7
exploration/env_infos/initial/Velocity_y Std       2
exploration/env_infos/initial/Velocity_y Max       9
exploration/env_infos/initial/Velocity_y Min       5
exploration/env_infos/Velocity_y Mean         

exploration/env_infos/actions Mean                 0
exploration/env_infos/actions Std                  0
exploration/env_infos/actions Max                  0
exploration/env_infos/actions Min                  0
exploration/env_infos/final/Y_Pos_1 Mean         345
exploration/env_infos/final/Y_Pos_1 Std            0
exploration/env_infos/final/Y_Pos_1 Max          345
exploration/env_infos/final/Y_Pos_1 Min          345
exploration/env_infos/initial/Y_Pos_1 Mean       345
exploration/env_infos/initial/Y_Pos_1 Std          0
exploration/env_infos/initial/Y_Pos_1 Max        345
exploration/env_infos/initial/Y_Pos_1 Min        345
exploration/env_infos/Y_Pos_1 Mean               345
exploration/env_infos/Y_Pos_1 Std                  0
exploration/env_infos/Y_Pos_1 Max                345
exploration/env_infos/Y_Pos_1 Min                345
exploration/env_infos/final/Velocity_y Mean        4.5
exploration/env_infos/final/Velocity_y Std         0.5
exploration/env_infos/final/Velocity_y Max

exploration/env_infos/final/actions Max            0
exploration/env_infos/final/actions Min            0
exploration/env_infos/initial/actions Mean         0
exploration/env_infos/initial/actions Std          0
exploration/env_infos/initial/actions Max          0
exploration/env_infos/initial/actions Min          0
exploration/env_infos/actions Mean                 0
exploration/env_infos/actions Std                  0
exploration/env_infos/actions Max                  0
exploration/env_infos/actions Min                  0
exploration/env_infos/final/Y_Pos_1 Mean         345
exploration/env_infos/final/Y_Pos_1 Std            0
exploration/env_infos/final/Y_Pos_1 Max          345
exploration/env_infos/final/Y_Pos_1 Min          345
exploration/env_infos/initial/Y_Pos_1 Mean       345
exploration/env_infos/initial/Y_Pos_1 Std          0
exploration/env_infos/initial/Y_Pos_1 Max        345
exploration/env_infos/initial/Y_Pos_1 Min        345
exploration/env_infos/Y_Pos_1 Mean            

exploration/Actions Std                            30.1843
exploration/Actions Max                            87
exploration/Actions Min                             9
exploration/Num Paths                               2
exploration/Average Returns                        -0.5
exploration/env_infos/final/actions Mean            0
exploration/env_infos/final/actions Std             0
exploration/env_infos/final/actions Max             0
exploration/env_infos/final/actions Min             0
exploration/env_infos/initial/actions Mean          0
exploration/env_infos/initial/actions Std           0
exploration/env_infos/initial/actions Max           0
exploration/env_infos/initial/actions Min           0
exploration/env_infos/actions Mean                  0
exploration/env_infos/actions Std                   0
exploration/env_infos/actions Max                   0
exploration/env_infos/actions Min                   0
exploration/env_infos/final/Y_Pos_1 Mean          345
exploration/env_infos

exploration/Rewards Mean                            1
exploration/Rewards Std                             0
exploration/Rewards Max                             1
exploration/Rewards Min                             1
exploration/Returns Mean                           10
exploration/Returns Std                             0
exploration/Returns Max                            10
exploration/Returns Min                            10
exploration/Actions Mean                           46.1
exploration/Actions Std                            17.2711
exploration/Actions Max                            80
exploration/Actions Min                            25
exploration/Num Paths                               1
exploration/Average Returns                        10
exploration/env_infos/final/actions Mean            0
exploration/env_infos/final/actions Std             0
exploration/env_infos/final/actions Max             0
exploration/env_infos/final/actions Min             0
exploration/env_infos

trainer/Y Predictions Max                         173.29
trainer/Y Predictions Min                         -10.8531
exploration/num steps total                       240
exploration/num paths total                        36
exploration/path length Mean                       10
exploration/path length Std                         0
exploration/path length Max                        10
exploration/path length Min                        10
exploration/Rewards Mean                            1
exploration/Rewards Std                             0
exploration/Rewards Max                             1
exploration/Rewards Min                             1
exploration/Returns Mean                           10
exploration/Returns Std                             0
exploration/Returns Max                            10
exploration/Returns Min                            10
exploration/Actions Mean                           53
exploration/Actions Std                            29.712
exploration/Acti

---------------------------------------------  ----------------
replay_buffer/size                                250
trainer/QF Loss                                   387.371
trainer/Y Predictions Mean                         91.8074
trainer/Y Predictions Std                          43.125
trainer/Y Predictions Max                         198.861
trainer/Y Predictions Min                          -9.40543
exploration/num steps total                       250
exploration/num paths total                        38
exploration/path length Mean                        5
exploration/path length Std                         4
exploration/path length Max                         9
exploration/path length Min                         1
exploration/Rewards Mean                           -0.1
exploration/Rewards Std                             3.3
exploration/Rewards Max                             1
exploration/Rewards Min                           -10
exploration/Returns Mean                     

time/training (s)                                   4.58412
time/epoch (s)                                      8.72229
time/total (s)                                    227.427
Epoch                                              23
---------------------------------------------  ----------------
2019-06-08 21:17:49.498272 Pacific Daylight Time | [h:\work\psu\ece 510(deep learning theory and practices)\project\rlkit\rlkit\..\data\ddqn-train\ddqn-train_2019_06_08_21_13_54_0000--s-0] Epoch 24 finished
---------------------------------------------  ----------------
replay_buffer/size                                260
trainer/QF Loss                                   392.093
trainer/Y Predictions Mean                        115.885
trainer/Y Predictions Std                          53.2386
trainer/Y Predictions Max                         233.414
trainer/Y Predictions Min                         -12.2145
exploration/num steps total                       260
exploration/num paths total      

evaluation/env_infos/X_pos_2 Std                  134.393
evaluation/env_infos/X_pos_2 Max                  580
evaluation/env_infos/X_pos_2 Min                  120
time/data storing (s)                               0.000139175
time/evaluation sampling (s)                        4.31626
time/exploration sampling (s)                       0.00693701
time/logging (s)                                    0.0493375
time/saving (s)                                     0.00857961
time/training (s)                                   4.72668
time/epoch (s)                                      9.10793
time/total (s)                                    236.732
Epoch                                              24
---------------------------------------------  ----------------
2019-06-08 21:17:58.881245 Pacific Daylight Time | [h:\work\psu\ece 510(deep learning theory and practices)\project\rlkit\rlkit\..\data\ddqn-train\ddqn-train_2019_06_08_21_13_54_0000--s-0] Epoch 25 finished
-------------------

evaluation/env_infos/final/X_pos_2 Mean           355.697
evaluation/env_infos/final/X_pos_2 Std            135.285
evaluation/env_infos/final/X_pos_2 Max            579
evaluation/env_infos/final/X_pos_2 Min            121
evaluation/env_infos/initial/X_pos_2 Mean         355.697
evaluation/env_infos/initial/X_pos_2 Std          135.285
evaluation/env_infos/initial/X_pos_2 Max          579
evaluation/env_infos/initial/X_pos_2 Min          121
evaluation/env_infos/X_pos_2 Mean                 356.024
evaluation/env_infos/X_pos_2 Std                  136.973
evaluation/env_infos/X_pos_2 Max                  579
evaluation/env_infos/X_pos_2 Min                  121
time/data storing (s)                               0.000147797
time/evaluation sampling (s)                        4.49605
time/exploration sampling (s)                       0.00792479
time/logging (s)                                    0.0563213
time/saving (s)                                     0.00948118
time/training (s

evaluation/env_infos/initial/Velocity_y Mean        5.53937
evaluation/env_infos/initial/Velocity_y Std         2.76088
evaluation/env_infos/initial/Velocity_y Max        10
evaluation/env_infos/initial/Velocity_y Min         1
evaluation/env_infos/Velocity_y Mean                5.55011
evaluation/env_infos/Velocity_y Std                 2.75706
evaluation/env_infos/Velocity_y Max                10
evaluation/env_infos/Velocity_y Min                 1
evaluation/env_infos/final/X_pos_2 Mean           354.461
evaluation/env_infos/final/X_pos_2 Std            136.235
evaluation/env_infos/final/X_pos_2 Max            579
evaluation/env_infos/final/X_pos_2 Min            120
evaluation/env_infos/initial/X_pos_2 Mean         354.461
evaluation/env_infos/initial/X_pos_2 Std          136.235
evaluation/env_infos/initial/X_pos_2 Max          579
evaluation/env_infos/initial/X_pos_2 Min          120
evaluation/env_infos/X_pos_2 Mean                 356.746
evaluation/env_infos/X_pos_2 Std      

evaluation/env_infos/Y_Pos_1 Mean                 297
evaluation/env_infos/Y_Pos_1 Std                    0
evaluation/env_infos/Y_Pos_1 Max                  297
evaluation/env_infos/Y_Pos_1 Min                  297
evaluation/env_infos/final/Velocity_y Mean          5.33464
evaluation/env_infos/final/Velocity_y Std           2.85452
evaluation/env_infos/final/Velocity_y Max          10
evaluation/env_infos/final/Velocity_y Min           1
evaluation/env_infos/initial/Velocity_y Mean        5.33464
evaluation/env_infos/initial/Velocity_y Std         2.85452
evaluation/env_infos/initial/Velocity_y Max        10
evaluation/env_infos/initial/Velocity_y Min         1
evaluation/env_infos/Velocity_y Mean                5.32213
evaluation/env_infos/Velocity_y Std                 2.85502
evaluation/env_infos/Velocity_y Max                10
evaluation/env_infos/Velocity_y Min                 1
evaluation/env_infos/final/X_pos_2 Mean           351.967
evaluation/env_infos/final/X_pos_2 Std    

evaluation/env_infos/actions Min                    0
evaluation/env_infos/final/Y_Pos_1 Mean           297
evaluation/env_infos/final/Y_Pos_1 Std              0
evaluation/env_infos/final/Y_Pos_1 Max            297
evaluation/env_infos/final/Y_Pos_1 Min            297
evaluation/env_infos/initial/Y_Pos_1 Mean         297
evaluation/env_infos/initial/Y_Pos_1 Std            0
evaluation/env_infos/initial/Y_Pos_1 Max          297
evaluation/env_infos/initial/Y_Pos_1 Min          297
evaluation/env_infos/Y_Pos_1 Mean                 297
evaluation/env_infos/Y_Pos_1 Std                    0
evaluation/env_infos/Y_Pos_1 Max                  297
evaluation/env_infos/Y_Pos_1 Min                  297
evaluation/env_infos/final/Velocity_y Mean          5.69767
evaluation/env_infos/final/Velocity_y Std           2.89374
evaluation/env_infos/final/Velocity_y Max          10
evaluation/env_infos/final/Velocity_y Min           1
evaluation/env_infos/initial/Velocity_y Mean        5.69767
evaluation

evaluation/env_infos/final/actions Max              0
evaluation/env_infos/final/actions Min              0
evaluation/env_infos/initial/actions Mean           0
evaluation/env_infos/initial/actions Std            0
evaluation/env_infos/initial/actions Max            0
evaluation/env_infos/initial/actions Min            0
evaluation/env_infos/actions Mean                   0
evaluation/env_infos/actions Std                    0
evaluation/env_infos/actions Max                    0
evaluation/env_infos/actions Min                    0
evaluation/env_infos/final/Y_Pos_1 Mean           297
evaluation/env_infos/final/Y_Pos_1 Std              0
evaluation/env_infos/final/Y_Pos_1 Max            297
evaluation/env_infos/final/Y_Pos_1 Min            297
evaluation/env_infos/initial/Y_Pos_1 Mean         297
evaluation/env_infos/initial/Y_Pos_1 Std            0
evaluation/env_infos/initial/Y_Pos_1 Max          297
evaluation/env_infos/initial/Y_Pos_1 Min          297
evaluation/env_infos/Y_Pos_1

evaluation/Returns Min                             -4
evaluation/Actions Mean                            69.0414
evaluation/Actions Std                             20.4487
evaluation/Actions Max                             88
evaluation/Actions Min                              9
evaluation/Num Paths                              578
evaluation/Average Returns                         -0.377163
evaluation/env_infos/final/actions Mean             0
evaluation/env_infos/final/actions Std              0
evaluation/env_infos/final/actions Max              0
evaluation/env_infos/final/actions Min              0
evaluation/env_infos/initial/actions Mean           0
evaluation/env_infos/initial/actions Std            0
evaluation/env_infos/initial/actions Max            0
evaluation/env_infos/initial/actions Min            0
evaluation/env_infos/actions Mean                   0
evaluation/env_infos/actions Std                    0
evaluation/env_infos/actions Max                    0
evaluation/

evaluation/path length Max                         10
evaluation/path length Min                          7
evaluation/Rewards Mean                             0.240696
evaluation/Rewards Std                              2.78851
evaluation/Rewards Max                              1
evaluation/Rewards Min                            -10
evaluation/Returns Mean                             2.21547
evaluation/Returns Std                              5.93764
evaluation/Returns Max                             10
evaluation/Returns Min                             -4
evaluation/Actions Mean                            61.5366
evaluation/Actions Std                             21.7646
evaluation/Actions Max                             88
evaluation/Actions Min                              9
evaluation/Num Paths                              543
evaluation/Average Returns                          2.21547
evaluation/env_infos/final/actions Mean             0
evaluation/env_infos/final/actions Std   

exploration/env_infos/initial/X_pos_2 Min         123
exploration/env_infos/X_pos_2 Mean                514.5
exploration/env_infos/X_pos_2 Std                 130.5
exploration/env_infos/X_pos_2 Max                 558
exploration/env_infos/X_pos_2 Min                 123
evaluation/num steps total                     164878
evaluation/num paths total                      17785
evaluation/path length Mean                         9.61538
evaluation/path length Std                          0.809887
evaluation/path length Max                         10
evaluation/path length Min                          7
evaluation/Rewards Mean                             0.725
evaluation/Rewards Std                              1.71737
evaluation/Rewards Max                              1
evaluation/Rewards Min                            -10
evaluation/Returns Mean                             6.97115
evaluation/Returns Std                              5.40167
evaluation/Returns Max                     

exploration/env_infos/Velocity_y Max                6
exploration/env_infos/Velocity_y Min                2
exploration/env_infos/final/X_pos_2 Mean          477.5
exploration/env_infos/final/X_pos_2 Std            54.5
exploration/env_infos/final/X_pos_2 Max           532
exploration/env_infos/final/X_pos_2 Min           423
exploration/env_infos/initial/X_pos_2 Mean        477.5
exploration/env_infos/initial/X_pos_2 Std          54.5
exploration/env_infos/initial/X_pos_2 Max         532
exploration/env_infos/initial/X_pos_2 Min         423
exploration/env_infos/X_pos_2 Mean                444.8
exploration/env_infos/X_pos_2 Std                  43.6
exploration/env_infos/X_pos_2 Max                 532
exploration/env_infos/X_pos_2 Min                 423
evaluation/num steps total                     169874
evaluation/num paths total                      18405
evaluation/path length Mean                         8.05806
evaluation/path length Std                          0.996696
eva

exploration/env_infos/final/Velocity_y Std          0
exploration/env_infos/final/Velocity_y Max          1
exploration/env_infos/final/Velocity_y Min          1
exploration/env_infos/initial/Velocity_y Mean       1
exploration/env_infos/initial/Velocity_y Std        0
exploration/env_infos/initial/Velocity_y Max        1
exploration/env_infos/initial/Velocity_y Min        1
exploration/env_infos/Velocity_y Mean               1
exploration/env_infos/Velocity_y Std                0
exploration/env_infos/Velocity_y Max                1
exploration/env_infos/Velocity_y Min                1
exploration/env_infos/final/X_pos_2 Mean          265
exploration/env_infos/final/X_pos_2 Std             0
exploration/env_infos/final/X_pos_2 Max           265
exploration/env_infos/final/X_pos_2 Min           265
exploration/env_infos/initial/X_pos_2 Mean        265
exploration/env_infos/initial/X_pos_2 Std           0
exploration/env_infos/initial/X_pos_2 Max         265
exploration/env_infos/initia

KeyboardInterrupt: 

In [4]:
env = gym.make('Physics_RL-v0')
env.reset()

(10, 174, 421, 10)

In [21]:
#env.observation_space.low.size
env.step(10)

((180, 174, 421, 95),
 0,
 False,
 {'Velocity_y': 5,
  'X_pos_2': 421,
  'Y_Pos_1': 278,
  'actions': [10,
   10,
   10,
   10,
   10,
   10,
   10,
   10,
   10,
   10,
   10,
   10,
   10,
   10,
   10,
   10,
   10]})