In [0]:
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1024, 768))
display.start()
import os
os.environ["DISPLAY"] = ":" + str(display.display) + "." + str(display.screen)

# Simple example running TRPO on CartPole (OpenAI Gym version)

**Important!! ** Before running the following cell, make sure rllab is set up properly in your **current** runtime by executing codes in **day1_rllab_setup.ipynb** 

Also, when run for the first time, the code will exit without training, just creating the personal profile. If this happens, just run the code again.

In [3]:
from rllab.algos.trpo import TRPO
from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
from rllab.envs.gym_env import GymEnv
from rllab.envs.normalized_env import normalize
from rllab.misc.instrument import run_experiment_lite
from rllab.policies.categorical_mlp_policy import CategoricalMLPPolicy


# Please note that different environments with different action spaces may
# require different policies. For example with a Discrete action space, a
# CategoricalMLPPolicy works, but for a Box action space may need to use
# a GaussianMLPPolicy (see the trpo_gym_pendulum.py example)
env = normalize(GymEnv("CartPole-v0", record_video=True))

policy = CategoricalMLPPolicy(
    env_spec=env.spec,
    # The neural network policy should have two hidden layers, each with 32 hidden units.
    hidden_sizes=(32, 32)
)

baseline = LinearFeatureBaseline(env_spec=env.spec)

algo = TRPO(
    env=env,
    policy=policy,
    baseline=baseline,
    batch_size=4000,
    max_path_length=env.horizon,
    n_itr=50,
    discount=0.99,
    step_size=0.01,
    # Uncomment both lines (this and the plot parameter below) to enable plotting
    # plot=True,
)

algo.train()




  "downsample module has been moved to the theano.tensor.signal.pool module.")
[2019-01-07 08:14:44,166] Making new env: CartPole-v0


2019-01-07 08:14:44.182760 UTC | observation space: Box(4,)
2019-01-07 08:14:44.184115 UTC | action space: Discrete(2)


  result = entry_point.load(False)
[2019-01-07 08:14:45,345] We did not found a dynamic library into the library_dir of the library we use for blas. If you use ATLAS, make sure to compile it with dynamics library.


2019-01-07 08:14:48.323658 UTC | Populating workers...
2019-01-07 08:14:48.326334 UTC | Populated


0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:14:49.200493 UTC | itr #0 | fitting baseline...



Total time elapsed: 00:00:00
  featmat.T.dot(returns)


2019-01-07 08:14:49.245449 UTC | itr #0 | fitted
[35m=: Compiling function f_loss[0m
[35mdone in 9.402 seconds[0m
[35m=: Compiling function constraint[0m
[35mdone in 3.657 seconds[0m
2019-01-07 08:15:02.339870 UTC | itr #0 | computing loss before
2019-01-07 08:15:02.353345 UTC | itr #0 | performing update
2019-01-07 08:15:02.366115 UTC | itr #0 | computing descent direction
[35m=: Compiling function f_grad[0m
[35mdone in 13.164 seconds[0m
[35m=: Compiling function f_Hx_plain[0m
[35mdone in 26.744 seconds[0m
2019-01-07 08:15:42.661532 UTC | itr #0 | descent direction computed
[35m=: Compiling function f_loss_constraint[0m
[35mdone in 0.172 seconds[0m
2019-01-07 08:15:42.961653 UTC | itr #0 | backtrack iters: 8
2019-01-07 08:15:42.963498 UTC | itr #0 | computing loss after
2019-01-07 08:15:42.972697 UTC | itr #0 | optimization finished
2019-01-07 08:15:43.000777 UTC | itr #0 | saving snapshot...
2019-01-07 08:15:43.005797 UTC | itr #0 | saved
2019-01-07 08:15:43.01224

0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00


2019-01-07 08:15:43.937746 UTC | itr #1 | fitting baseline...
2019-01-07 08:15:43.950647 UTC | itr #1 | fitted


  featmat.T.dot(returns)


2019-01-07 08:15:43.980390 UTC | itr #1 | computing loss before
2019-01-07 08:15:44.002513 UTC | itr #1 | performing update
2019-01-07 08:15:44.004454 UTC | itr #1 | computing descent direction
2019-01-07 08:15:44.301027 UTC | itr #1 | descent direction computed
2019-01-07 08:15:44.372630 UTC | itr #1 | backtrack iters: 4
2019-01-07 08:15:44.374556 UTC | itr #1 | computing loss after
2019-01-07 08:15:44.383478 UTC | itr #1 | optimization finished
2019-01-07 08:15:44.422854 UTC | itr #1 | saving snapshot...
2019-01-07 08:15:44.424726 UTC | itr #1 | saved
2019-01-07 08:15:44.433539 UTC | -----------------------  -------------
2019-01-07 08:15:44.439860 UTC | Iteration                  1
2019-01-07 08:15:44.446077 UTC | AverageDiscountedReturn   14.5068
2019-01-07 08:15:44.447518 UTC | AverageReturn             15.8103
2019-01-07 08:15:44.449091 UTC | ExplainedVariance          0.416123
2019-01-07 08:15:44.450577 UTC | NumTrajs                 253
2019-01-07 08:15:44.452192 UTC | Entropy 

0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00


2019-01-07 08:15:45.355749 UTC | itr #2 | fitting baseline...
2019-01-07 08:15:45.369960 UTC | itr #2 | fitted
2019-01-07 08:15:45.424918 UTC | itr #2 | computing loss before
2019-01-07 08:15:45.447596 UTC | itr #2 | performing update
2019-01-07 08:15:45.451337 UTC | itr #2 | computing descent direction
2019-01-07 08:15:45.732470 UTC | itr #2 | descent direction computed
2019-01-07 08:15:45.829208 UTC | itr #2 | backtrack iters: 6
2019-01-07 08:15:45.831006 UTC | itr #2 | computing loss after
2019-01-07 08:15:45.832760 UTC | itr #2 | optimization finished
2019-01-07 08:15:45.866213 UTC | itr #2 | saving snapshot...
2019-01-07 08:15:45.867847 UTC | itr #2 | saved
2019-01-07 08:15:45.871680 UTC | -----------------------  -------------
2019-01-07 08:15:45.873553 UTC | Iteration                  2
2019-01-07 08:15:45.876217 UTC | AverageDiscountedReturn   15.7939
2019-01-07 08:15:45.878807 UTC | AverageReturn             17.3463
2019-01-07 08:15:45.881985 UTC | ExplainedVariance          0

0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00


2019-01-07 08:15:46.786559 UTC | itr #3 | fitting baseline...
2019-01-07 08:15:46.798648 UTC | itr #3 | fitted
2019-01-07 08:15:46.852993 UTC | itr #3 | computing loss before
2019-01-07 08:15:46.877180 UTC | itr #3 | performing update
2019-01-07 08:15:46.880371 UTC | itr #3 | computing descent direction
2019-01-07 08:15:47.178075 UTC | itr #3 | descent direction computed
2019-01-07 08:15:47.198493 UTC | itr #3 | backtrack iters: 0
2019-01-07 08:15:47.200438 UTC | itr #3 | computing loss after
2019-01-07 08:15:47.202641 UTC | itr #3 | optimization finished
2019-01-07 08:15:47.236432 UTC | itr #3 | saving snapshot...
2019-01-07 08:15:47.242471 UTC | itr #3 | saved
2019-01-07 08:15:47.245402 UTC | -----------------------  -------------
2019-01-07 08:15:47.247297 UTC | Iteration                  3
2019-01-07 08:15:47.249259 UTC | AverageDiscountedReturn   16.9829
2019-01-07 08:15:47.252011 UTC | AverageReturn             18.8726
2019-01-07 08:15:47.254148 UTC | ExplainedVariance          0

0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00


2019-01-07 08:15:48.174352 UTC | itr #4 | fitting baseline...
2019-01-07 08:15:48.185483 UTC | itr #4 | fitted
2019-01-07 08:15:48.215689 UTC | itr #4 | computing loss before
2019-01-07 08:15:48.229351 UTC | itr #4 | performing update
2019-01-07 08:15:48.231664 UTC | itr #4 | computing descent direction
2019-01-07 08:15:48.541371 UTC | itr #4 | descent direction computed
2019-01-07 08:15:48.561094 UTC | itr #4 | backtrack iters: 0
2019-01-07 08:15:48.562687 UTC | itr #4 | computing loss after
2019-01-07 08:15:48.565257 UTC | itr #4 | optimization finished
2019-01-07 08:15:48.595104 UTC | itr #4 | saving snapshot...
2019-01-07 08:15:48.596668 UTC | itr #4 | saved
2019-01-07 08:15:48.599988 UTC | -----------------------  -------------
2019-01-07 08:15:48.602317 UTC | Iteration                  4
2019-01-07 08:15:48.604356 UTC | AverageDiscountedReturn   18.3195
2019-01-07 08:15:48.607050 UTC | AverageReturn             20.8446
2019-01-07 08:15:48.609559 UTC | ExplainedVariance          0

0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00


2019-01-07 08:15:49.509106 UTC | itr #5 | fitting baseline...
2019-01-07 08:15:49.521838 UTC | itr #5 | fitted
2019-01-07 08:15:49.577403 UTC | itr #5 | computing loss before
2019-01-07 08:15:49.605551 UTC | itr #5 | performing update
2019-01-07 08:15:49.610034 UTC | itr #5 | computing descent direction
2019-01-07 08:15:49.900645 UTC | itr #5 | descent direction computed
2019-01-07 08:15:49.932905 UTC | itr #5 | backtrack iters: 1
2019-01-07 08:15:49.937736 UTC | itr #5 | computing loss after
2019-01-07 08:15:49.943912 UTC | itr #5 | optimization finished
2019-01-07 08:15:49.975517 UTC | itr #5 | saving snapshot...
2019-01-07 08:15:49.980179 UTC | itr #5 | saved
2019-01-07 08:15:49.987365 UTC | -----------------------  -------------
2019-01-07 08:15:49.992007 UTC | Iteration                  5
2019-01-07 08:15:49.996599 UTC | AverageDiscountedReturn   22.7058
2019-01-07 08:15:49.998525 UTC | AverageReturn             26.8467
2019-01-07 08:15:50.000916 UTC | ExplainedVariance          0

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:15:50.914423 UTC | itr #6 | fitting baseline...



Total time elapsed: 00:00:00


2019-01-07 08:15:50.926685 UTC | itr #6 | fitted
2019-01-07 08:15:50.984399 UTC | itr #6 | computing loss before
2019-01-07 08:15:51.008327 UTC | itr #6 | performing update
2019-01-07 08:15:51.010886 UTC | itr #6 | computing descent direction
2019-01-07 08:15:51.308610 UTC | itr #6 | descent direction computed
2019-01-07 08:15:51.340479 UTC | itr #6 | backtrack iters: 1
2019-01-07 08:15:51.345549 UTC | itr #6 | computing loss after
2019-01-07 08:15:51.351322 UTC | itr #6 | optimization finished
2019-01-07 08:15:51.380010 UTC | itr #6 | saving snapshot...
2019-01-07 08:15:51.384802 UTC | itr #6 | saved
2019-01-07 08:15:51.391363 UTC | -----------------------  -------------
2019-01-07 08:15:51.396550 UTC | Iteration                  6
2019-01-07 08:15:51.401359 UTC | AverageDiscountedReturn   32.7316
2019-01-07 08:15:51.403159 UTC | AverageReturn             45.1461
2019-01-07 08:15:51.405660 UTC | ExplainedVariance          0.070471
2019-01-07 08:15:51.407517 UTC | NumTrajs             

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:15:52.402650 UTC | itr #7 | fitting baseline...
2019-01-07 08:15:52.411574 UTC | itr #7 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:15:52.474984 UTC | itr #7 | computing loss before
2019-01-07 08:15:52.499670 UTC | itr #7 | performing update
2019-01-07 08:15:52.503347 UTC | itr #7 | computing descent direction
2019-01-07 08:15:52.795904 UTC | itr #7 | descent direction computed
2019-01-07 08:15:52.829589 UTC | itr #7 | backtrack iters: 1
2019-01-07 08:15:52.831356 UTC | itr #7 | computing loss after
2019-01-07 08:15:52.834244 UTC | itr #7 | optimization finished
2019-01-07 08:15:52.864454 UTC | itr #7 | saving snapshot...
2019-01-07 08:15:52.866673 UTC | itr #7 | saved
2019-01-07 08:15:52.875425 UTC | -----------------------  -------------
2019-01-07 08:15:52.877016 UTC | Iteration                  7
2019-01-07 08:15:52.879504 UTC | AverageDiscountedReturn   38.7148
2019-01-07 08:15:52.882855 UTC | AverageReturn             56.3108
2019-01-07 08:15:52.885184 UTC | ExplainedVariance          0.18899
2019-01-07 08:15:52.887436 UTC | NumTrajs                  74
2019-01-07 08:15:52.889650 UTC | Entropy  

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:15:53.778390 UTC | itr #8 | fitting baseline...



Total time elapsed: 00:00:00


2019-01-07 08:15:53.784622 UTC | itr #8 | fitted
2019-01-07 08:15:53.842389 UTC | itr #8 | computing loss before
2019-01-07 08:15:53.869509 UTC | itr #8 | performing update
2019-01-07 08:15:53.871433 UTC | itr #8 | computing descent direction
2019-01-07 08:15:54.168292 UTC | itr #8 | descent direction computed
2019-01-07 08:15:54.199239 UTC | itr #8 | backtrack iters: 1
2019-01-07 08:15:54.200750 UTC | itr #8 | computing loss after
2019-01-07 08:15:54.203374 UTC | itr #8 | optimization finished
2019-01-07 08:15:54.246041 UTC | itr #8 | saving snapshot...
2019-01-07 08:15:54.247611 UTC | itr #8 | saved
2019-01-07 08:15:54.253179 UTC | -----------------------  -------------
2019-01-07 08:15:54.255841 UTC | Iteration                  8
2019-01-07 08:15:54.259245 UTC | AverageDiscountedReturn   63.7597
2019-01-07 08:15:54.262744 UTC | AverageReturn            121.879
2019-01-07 08:15:54.265422 UTC | ExplainedVariance          0.366996
2019-01-07 08:15:54.269639 UTC | NumTrajs              

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:15:55.160210 UTC | itr #9 | fitting baseline...



Total time elapsed: 00:00:00


2019-01-07 08:15:55.174517 UTC | itr #9 | fitted
2019-01-07 08:15:55.207771 UTC | itr #9 | computing loss before
2019-01-07 08:15:55.221020 UTC | itr #9 | performing update
2019-01-07 08:15:55.227350 UTC | itr #9 | computing descent direction
2019-01-07 08:15:55.530695 UTC | itr #9 | descent direction computed
2019-01-07 08:15:55.552206 UTC | itr #9 | backtrack iters: 0
2019-01-07 08:15:55.553743 UTC | itr #9 | computing loss after
2019-01-07 08:15:55.555231 UTC | itr #9 | optimization finished
2019-01-07 08:15:55.589567 UTC | itr #9 | saving snapshot...
2019-01-07 08:15:55.591064 UTC | itr #9 | saved
2019-01-07 08:15:55.594183 UTC | -----------------------  -------------
2019-01-07 08:15:55.596779 UTC | Iteration                  9
2019-01-07 08:15:55.599242 UTC | AverageDiscountedReturn   77.428
2019-01-07 08:15:55.601312 UTC | AverageReturn            162.36
2019-01-07 08:15:55.603036 UTC | ExplainedVariance          0.610107
2019-01-07 08:15:55.604632 UTC | NumTrajs                

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:15:56.463224 UTC | itr #10 | fitting baseline...
2019-01-07 08:15:56.471550 UTC | itr #10 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:15:56.503018 UTC | itr #10 | computing loss before
2019-01-07 08:15:56.532968 UTC | itr #10 | performing update
2019-01-07 08:15:56.534897 UTC | itr #10 | computing descent direction
2019-01-07 08:15:56.835188 UTC | itr #10 | descent direction computed
2019-01-07 08:15:56.858921 UTC | itr #10 | backtrack iters: 0
2019-01-07 08:15:56.863987 UTC | itr #10 | computing loss after
2019-01-07 08:15:56.869201 UTC | itr #10 | optimization finished
2019-01-07 08:15:56.896565 UTC | itr #10 | saving snapshot...
2019-01-07 08:15:56.902181 UTC | itr #10 | saved
2019-01-07 08:15:56.908464 UTC | -----------------------  -------------
2019-01-07 08:15:56.912593 UTC | Iteration                 10
2019-01-07 08:15:56.915665 UTC | AverageDiscountedReturn   78.114
2019-01-07 08:15:56.918006 UTC | AverageReturn            173.875
2019-01-07 08:15:56.919215 UTC | ExplainedVariance          0.792554
2019-01-07 08:15:56.921694 UTC | NumTrajs                  24
2019-01-07 08:15:56.922783 UTC | E

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:15:57.827388 UTC | itr #11 | fitting baseline...



Total time elapsed: 00:00:00


2019-01-07 08:15:57.836685 UTC | itr #11 | fitted
2019-01-07 08:15:57.885986 UTC | itr #11 | computing loss before
2019-01-07 08:15:57.909189 UTC | itr #11 | performing update
2019-01-07 08:15:57.913503 UTC | itr #11 | computing descent direction
2019-01-07 08:15:58.197153 UTC | itr #11 | descent direction computed
2019-01-07 08:15:58.224195 UTC | itr #11 | backtrack iters: 0
2019-01-07 08:15:58.231685 UTC | itr #11 | computing loss after
2019-01-07 08:15:58.237236 UTC | itr #11 | optimization finished
2019-01-07 08:15:58.264850 UTC | itr #11 | saving snapshot...
2019-01-07 08:15:58.269870 UTC | itr #11 | saved
2019-01-07 08:15:58.275772 UTC | -----------------------  -------------
2019-01-07 08:15:58.281326 UTC | Iteration                 11
2019-01-07 08:15:58.286126 UTC | AverageDiscountedReturn   81.9636
2019-01-07 08:15:58.289659 UTC | AverageReturn            182.318
2019-01-07 08:15:58.290898 UTC | ExplainedVariance          0.812713
2019-01-07 08:15:58.292085 UTC | NumTrajs    

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:15:59.166366 UTC | itr #12 | fitting baseline...
2019-01-07 08:15:59.172154 UTC | itr #12 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:15:59.224957 UTC | itr #12 | computing loss before
2019-01-07 08:15:59.257675 UTC | itr #12 | performing update
2019-01-07 08:15:59.259250 UTC | itr #12 | computing descent direction
2019-01-07 08:15:59.545654 UTC | itr #12 | descent direction computed
2019-01-07 08:15:59.602305 UTC | itr #12 | backtrack iters: 3
2019-01-07 08:15:59.603877 UTC | itr #12 | computing loss after
2019-01-07 08:15:59.605766 UTC | itr #12 | optimization finished
2019-01-07 08:15:59.643484 UTC | itr #12 | saving snapshot...
2019-01-07 08:15:59.645143 UTC | itr #12 | saved
2019-01-07 08:15:59.647788 UTC | -----------------------  -------------
2019-01-07 08:15:59.649506 UTC | Iteration                 12
2019-01-07 08:15:59.651169 UTC | AverageDiscountedReturn   85.497
2019-01-07 08:15:59.652916 UTC | AverageReturn            195.238
2019-01-07 08:15:59.654553 UTC | ExplainedVariance          0.930368
2019-01-07 08:15:59.656141 UTC | NumTrajs                  21
2019-01-07 08:15:59.657794 UTC | E

0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00

2019-01-07 08:16:00.505389 UTC | itr #13 | fitting baseline...
2019-01-07 08:16:00.512084 UTC | itr #13 | fitted





2019-01-07 08:16:00.569859 UTC | itr #13 | computing loss before
2019-01-07 08:16:00.595955 UTC | itr #13 | performing update
2019-01-07 08:16:00.599421 UTC | itr #13 | computing descent direction
2019-01-07 08:16:00.891752 UTC | itr #13 | descent direction computed
2019-01-07 08:16:00.938719 UTC | itr #13 | backtrack iters: 2
2019-01-07 08:16:00.940300 UTC | itr #13 | computing loss after
2019-01-07 08:16:00.943716 UTC | itr #13 | optimization finished
2019-01-07 08:16:00.978358 UTC | itr #13 | saving snapshot...
2019-01-07 08:16:00.979758 UTC | itr #13 | saved
2019-01-07 08:16:00.984341 UTC | -----------------------  -------------
2019-01-07 08:16:00.989249 UTC | Iteration                 13
2019-01-07 08:16:00.995009 UTC | AverageDiscountedReturn   85.6135
2019-01-07 08:16:00.996778 UTC | AverageReturn            193.714
2019-01-07 08:16:00.999028 UTC | ExplainedVariance          0.95133
2019-01-07 08:16:01.002439 UTC | NumTrajs                  21
2019-01-07 08:16:01.006363 UTC | E

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:01.886453 UTC | itr #14 | fitting baseline...



Total time elapsed: 00:00:00


2019-01-07 08:16:01.891832 UTC | itr #14 | fitted
2019-01-07 08:16:01.957545 UTC | itr #14 | computing loss before
2019-01-07 08:16:01.983265 UTC | itr #14 | performing update
2019-01-07 08:16:01.986160 UTC | itr #14 | computing descent direction
2019-01-07 08:16:02.269700 UTC | itr #14 | descent direction computed
2019-01-07 08:16:02.307957 UTC | itr #14 | backtrack iters: 1
2019-01-07 08:16:02.310020 UTC | itr #14 | computing loss after
2019-01-07 08:16:02.313268 UTC | itr #14 | optimization finished
2019-01-07 08:16:02.344581 UTC | itr #14 | saving snapshot...
2019-01-07 08:16:02.347042 UTC | itr #14 | saved
2019-01-07 08:16:02.355398 UTC | -----------------------  -------------
2019-01-07 08:16:02.357506 UTC | Iteration                 14
2019-01-07 08:16:02.359707 UTC | AverageDiscountedReturn   86.602
2019-01-07 08:16:02.362008 UTC | AverageReturn            200
2019-01-07 08:16:02.364183 UTC | ExplainedVariance          0.98351
2019-01-07 08:16:02.366118 UTC | NumTrajs          

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:03.279649 UTC | itr #15 | fitting baseline...



Total time elapsed: 00:00:00


2019-01-07 08:16:03.289047 UTC | itr #15 | fitted
2019-01-07 08:16:03.354594 UTC | itr #15 | computing loss before
2019-01-07 08:16:03.384384 UTC | itr #15 | performing update
2019-01-07 08:16:03.386363 UTC | itr #15 | computing descent direction
2019-01-07 08:16:03.675980 UTC | itr #15 | descent direction computed
2019-01-07 08:16:03.725428 UTC | itr #15 | backtrack iters: 2
2019-01-07 08:16:03.730691 UTC | itr #15 | computing loss after
2019-01-07 08:16:03.735557 UTC | itr #15 | optimization finished
2019-01-07 08:16:03.765505 UTC | itr #15 | saving snapshot...
2019-01-07 08:16:03.770469 UTC | itr #15 | saved
2019-01-07 08:16:03.776596 UTC | -----------------------  -------------
2019-01-07 08:16:03.781316 UTC | Iteration                 15
2019-01-07 08:16:03.784704 UTC | AverageDiscountedReturn   81.0352
2019-01-07 08:16:03.787520 UTC | AverageReturn            182.522
2019-01-07 08:16:03.788865 UTC | ExplainedVariance          0.851732
2019-01-07 08:16:03.791628 UTC | NumTrajs    

0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00


2019-01-07 08:16:04.750198 UTC | itr #16 | fitting baseline...
2019-01-07 08:16:04.756654 UTC | itr #16 | fitted
2019-01-07 08:16:04.788832 UTC | itr #16 | computing loss before
2019-01-07 08:16:04.806718 UTC | itr #16 | performing update
2019-01-07 08:16:04.812097 UTC | itr #16 | computing descent direction
2019-01-07 08:16:05.097044 UTC | itr #16 | descent direction computed
2019-01-07 08:16:05.131064 UTC | itr #16 | backtrack iters: 1
2019-01-07 08:16:05.135690 UTC | itr #16 | computing loss after
2019-01-07 08:16:05.143194 UTC | itr #16 | optimization finished
2019-01-07 08:16:05.172790 UTC | itr #16 | saving snapshot...
2019-01-07 08:16:05.178266 UTC | itr #16 | saved
2019-01-07 08:16:05.185160 UTC | -----------------------  -------------
2019-01-07 08:16:05.189885 UTC | Iteration                 16
2019-01-07 08:16:05.193550 UTC | AverageDiscountedReturn   85.4982
2019-01-07 08:16:05.196242 UTC | AverageReturn            194.952
2019-01-07 08:16:05.197618 UTC | ExplainedVariance 

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:06.114389 UTC | itr #17 | fitting baseline...



Total time elapsed: 00:00:00


2019-01-07 08:16:06.120574 UTC | itr #17 | fitted
2019-01-07 08:16:06.179937 UTC | itr #17 | computing loss before
2019-01-07 08:16:06.204475 UTC | itr #17 | performing update
2019-01-07 08:16:06.208730 UTC | itr #17 | computing descent direction
2019-01-07 08:16:06.511456 UTC | itr #17 | descent direction computed
2019-01-07 08:16:06.583095 UTC | itr #17 | backtrack iters: 4
2019-01-07 08:16:06.584733 UTC | itr #17 | computing loss after
2019-01-07 08:16:06.586433 UTC | itr #17 | optimization finished
2019-01-07 08:16:06.620410 UTC | itr #17 | saving snapshot...
2019-01-07 08:16:06.622079 UTC | itr #17 | saved
2019-01-07 08:16:06.624718 UTC | -----------------------  -------------
2019-01-07 08:16:06.628011 UTC | Iteration                 17
2019-01-07 08:16:06.630778 UTC | AverageDiscountedReturn   86.602
2019-01-07 08:16:06.632841 UTC | AverageReturn            200
2019-01-07 08:16:06.634917 UTC | ExplainedVariance          0.996169
2019-01-07 08:16:06.636851 UTC | NumTrajs         

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:07.559525 UTC | itr #18 | fitting baseline...
2019-01-07 08:16:07.565784 UTC | itr #18 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:07.627112 UTC | itr #18 | computing loss before
2019-01-07 08:16:07.661597 UTC | itr #18 | performing update
2019-01-07 08:16:07.667417 UTC | itr #18 | computing descent direction
2019-01-07 08:16:07.949872 UTC | itr #18 | descent direction computed
2019-01-07 08:16:08.023532 UTC | itr #18 | backtrack iters: 4
2019-01-07 08:16:08.030444 UTC | itr #18 | computing loss after
2019-01-07 08:16:08.038785 UTC | itr #18 | optimization finished
2019-01-07 08:16:08.068811 UTC | itr #18 | saving snapshot...
2019-01-07 08:16:08.074094 UTC | itr #18 | saved
2019-01-07 08:16:08.082243 UTC | -----------------------  -------------
2019-01-07 08:16:08.087769 UTC | Iteration                 18
2019-01-07 08:16:08.093171 UTC | AverageDiscountedReturn   85.754
2019-01-07 08:16:08.094590 UTC | AverageReturn            195.286
2019-01-07 08:16:08.097770 UTC | ExplainedVariance          0.941902
2019-01-07 08:16:08.099180 UTC | NumTrajs                  21
2019-01-07 08:16:08.100522 UTC | E

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:09.039104 UTC | itr #19 | fitting baseline...
2019-01-07 08:16:09.044774 UTC | itr #19 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:09.083438 UTC | itr #19 | computing loss before
2019-01-07 08:16:09.102989 UTC | itr #19 | performing update
2019-01-07 08:16:09.108598 UTC | itr #19 | computing descent direction
2019-01-07 08:16:09.396677 UTC | itr #19 | descent direction computed
2019-01-07 08:16:09.477099 UTC | itr #19 | backtrack iters: 4
2019-01-07 08:16:09.485050 UTC | itr #19 | computing loss after
2019-01-07 08:16:09.491510 UTC | itr #19 | optimization finished
2019-01-07 08:16:09.522069 UTC | itr #19 | saving snapshot...
2019-01-07 08:16:09.528179 UTC | itr #19 | saved
2019-01-07 08:16:09.535260 UTC | -----------------------  -------------
2019-01-07 08:16:09.542803 UTC | Iteration                 19
2019-01-07 08:16:09.548175 UTC | AverageDiscountedReturn   85.4443
2019-01-07 08:16:09.549644 UTC | AverageReturn            193.952
2019-01-07 08:16:09.551003 UTC | ExplainedVariance          0.914958
2019-01-07 08:16:09.555063 UTC | NumTrajs                  21
2019-01-07 08:16:09.556384 UTC | 

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:10.448161 UTC | itr #20 | fitting baseline...
2019-01-07 08:16:10.454267 UTC | itr #20 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:10.489694 UTC | itr #20 | computing loss before
2019-01-07 08:16:10.523676 UTC | itr #20 | performing update
2019-01-07 08:16:10.525841 UTC | itr #20 | computing descent direction
2019-01-07 08:16:10.836483 UTC | itr #20 | descent direction computed
2019-01-07 08:16:10.882709 UTC | itr #20 | backtrack iters: 2
2019-01-07 08:16:10.884363 UTC | itr #20 | computing loss after
2019-01-07 08:16:10.889058 UTC | itr #20 | optimization finished
2019-01-07 08:16:10.924467 UTC | itr #20 | saving snapshot...
2019-01-07 08:16:10.925941 UTC | itr #20 | saved
2019-01-07 08:16:10.934300 UTC | -----------------------  -------------
2019-01-07 08:16:10.935961 UTC | Iteration                 20
2019-01-07 08:16:10.940561 UTC | AverageDiscountedReturn   84.787
2019-01-07 08:16:10.942849 UTC | AverageReturn            193.619
2019-01-07 08:16:10.945019 UTC | ExplainedVariance          0.91409
2019-01-07 08:16:10.947046 UTC | NumTrajs                  21
2019-01-07 08:16:10.948979 UTC | En

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:11.826975 UTC | itr #21 | fitting baseline...
2019-01-07 08:16:11.832717 UTC | itr #21 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:11.891196 UTC | itr #21 | computing loss before
2019-01-07 08:16:11.916469 UTC | itr #21 | performing update
2019-01-07 08:16:11.920254 UTC | itr #21 | computing descent direction
2019-01-07 08:16:12.220087 UTC | itr #21 | descent direction computed
2019-01-07 08:16:12.254903 UTC | itr #21 | backtrack iters: 1
2019-01-07 08:16:12.261013 UTC | itr #21 | computing loss after
2019-01-07 08:16:12.266742 UTC | itr #21 | optimization finished
2019-01-07 08:16:12.304369 UTC | itr #21 | saving snapshot...
2019-01-07 08:16:12.321093 UTC | itr #21 | saved
2019-01-07 08:16:12.327422 UTC | -----------------------  -------------
2019-01-07 08:16:12.332502 UTC | Iteration                 21
2019-01-07 08:16:12.335925 UTC | AverageDiscountedReturn   86.602
2019-01-07 08:16:12.337487 UTC | AverageReturn            200
2019-01-07 08:16:12.340484 UTC | ExplainedVariance          0.989526
2019-01-07 08:16:12.342015 UTC | NumTrajs                  20
2019-01-07 08:16:12.343439 UTC | Entro

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:13.269923 UTC | itr #22 | fitting baseline...
2019-01-07 08:16:13.279644 UTC | itr #22 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:13.311347 UTC | itr #22 | computing loss before
2019-01-07 08:16:13.339448 UTC | itr #22 | performing update
2019-01-07 08:16:13.342418 UTC | itr #22 | computing descent direction
2019-01-07 08:16:13.669020 UTC | itr #22 | descent direction computed
2019-01-07 08:16:13.716485 UTC | itr #22 | backtrack iters: 2
2019-01-07 08:16:13.718099 UTC | itr #22 | computing loss after
2019-01-07 08:16:13.719823 UTC | itr #22 | optimization finished
2019-01-07 08:16:13.753243 UTC | itr #22 | saving snapshot...
2019-01-07 08:16:13.754973 UTC | itr #22 | saved
2019-01-07 08:16:13.757505 UTC | -----------------------  -------------
2019-01-07 08:16:13.761814 UTC | Iteration                 22
2019-01-07 08:16:13.764451 UTC | AverageDiscountedReturn   84.1777
2019-01-07 08:16:13.766604 UTC | AverageReturn            192.381
2019-01-07 08:16:13.768811 UTC | ExplainedVariance          0.924083
2019-01-07 08:16:13.770846 UTC | NumTrajs                  21
2019-01-07 08:16:13.772604 UTC | 

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:14.803267 UTC | itr #23 | fitting baseline...
2019-01-07 08:16:14.809797 UTC | itr #23 | fitted



Total time elapsed: 00:00:01


2019-01-07 08:16:14.876419 UTC | itr #23 | computing loss before
2019-01-07 08:16:14.905485 UTC | itr #23 | performing update
2019-01-07 08:16:14.907030 UTC | itr #23 | computing descent direction
2019-01-07 08:16:15.217076 UTC | itr #23 | descent direction computed
2019-01-07 08:16:15.267770 UTC | itr #23 | backtrack iters: 2
2019-01-07 08:16:15.269788 UTC | itr #23 | computing loss after
2019-01-07 08:16:15.275997 UTC | itr #23 | optimization finished
2019-01-07 08:16:15.312106 UTC | itr #23 | saving snapshot...
2019-01-07 08:16:15.313816 UTC | itr #23 | saved
2019-01-07 08:16:15.320648 UTC | -----------------------  -------------
2019-01-07 08:16:15.326073 UTC | Iteration                 23
2019-01-07 08:16:15.333757 UTC | AverageDiscountedReturn   86.5891
2019-01-07 08:16:15.335210 UTC | AverageReturn            199.905
2019-01-07 08:16:15.337560 UTC | ExplainedVariance          0.994751
2019-01-07 08:16:15.339476 UTC | NumTrajs                  21
2019-01-07 08:16:15.340643 UTC | 

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:16.218097 UTC | itr #24 | fitting baseline...
2019-01-07 08:16:16.224776 UTC | itr #24 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:16.257346 UTC | itr #24 | computing loss before
2019-01-07 08:16:16.278893 UTC | itr #24 | performing update
2019-01-07 08:16:16.284082 UTC | itr #24 | computing descent direction
2019-01-07 08:16:16.576555 UTC | itr #24 | descent direction computed
2019-01-07 08:16:16.639017 UTC | itr #24 | backtrack iters: 2
2019-01-07 08:16:16.649062 UTC | itr #24 | computing loss after
2019-01-07 08:16:16.656221 UTC | itr #24 | optimization finished
2019-01-07 08:16:16.687514 UTC | itr #24 | saving snapshot...
2019-01-07 08:16:16.694210 UTC | itr #24 | saved
2019-01-07 08:16:16.700960 UTC | -----------------------  -------------
2019-01-07 08:16:16.706455 UTC | Iteration                 24
2019-01-07 08:16:16.712492 UTC | AverageDiscountedReturn   84.5826
2019-01-07 08:16:16.713890 UTC | AverageReturn            191.667
2019-01-07 08:16:16.717035 UTC | ExplainedVariance          0.890762
2019-01-07 08:16:16.718499 UTC | NumTrajs                  21
2019-01-07 08:16:16.719742 UTC | 

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:17.630906 UTC | itr #25 | fitting baseline...



Total time elapsed: 00:00:00


2019-01-07 08:16:17.637446 UTC | itr #25 | fitted
2019-01-07 08:16:17.705224 UTC | itr #25 | computing loss before
2019-01-07 08:16:17.740333 UTC | itr #25 | performing update
2019-01-07 08:16:17.742079 UTC | itr #25 | computing descent direction
2019-01-07 08:16:18.042232 UTC | itr #25 | descent direction computed
2019-01-07 08:16:18.107191 UTC | itr #25 | backtrack iters: 3
2019-01-07 08:16:18.113170 UTC | itr #25 | computing loss after
2019-01-07 08:16:18.119664 UTC | itr #25 | optimization finished
2019-01-07 08:16:18.150508 UTC | itr #25 | saving snapshot...
2019-01-07 08:16:18.156184 UTC | itr #25 | saved
2019-01-07 08:16:18.162651 UTC | -----------------------  -------------
2019-01-07 08:16:18.167346 UTC | Iteration                 25
2019-01-07 08:16:18.171993 UTC | AverageDiscountedReturn   86.5624
2019-01-07 08:16:18.173546 UTC | AverageReturn            199.714
2019-01-07 08:16:18.175026 UTC | ExplainedVariance          0.992123
2019-01-07 08:16:18.178009 UTC | NumTrajs    

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:19.087207 UTC | itr #26 | fitting baseline...
2019-01-07 08:16:19.093268 UTC | itr #26 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:19.150757 UTC | itr #26 | computing loss before
2019-01-07 08:16:19.179651 UTC | itr #26 | performing update
2019-01-07 08:16:19.182914 UTC | itr #26 | computing descent direction
2019-01-07 08:16:19.477395 UTC | itr #26 | descent direction computed
2019-01-07 08:16:19.509582 UTC | itr #26 | backtrack iters: 1
2019-01-07 08:16:19.511245 UTC | itr #26 | computing loss after
2019-01-07 08:16:19.513002 UTC | itr #26 | optimization finished
2019-01-07 08:16:19.551803 UTC | itr #26 | saving snapshot...
2019-01-07 08:16:19.553993 UTC | itr #26 | saved
2019-01-07 08:16:19.557454 UTC | -----------------------  -------------
2019-01-07 08:16:19.559089 UTC | Iteration                 26
2019-01-07 08:16:19.561429 UTC | AverageDiscountedReturn   86.602
2019-01-07 08:16:19.563601 UTC | AverageReturn            200
2019-01-07 08:16:19.566297 UTC | ExplainedVariance          0.999855
2019-01-07 08:16:19.567906 UTC | NumTrajs                  20
2019-01-07 08:16:19.569854 UTC | Entro

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:20.540611 UTC | itr #27 | fitting baseline...
2019-01-07 08:16:20.546063 UTC | itr #27 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:20.607006 UTC | itr #27 | computing loss before
2019-01-07 08:16:20.633434 UTC | itr #27 | performing update
2019-01-07 08:16:20.637264 UTC | itr #27 | computing descent direction
2019-01-07 08:16:20.951763 UTC | itr #27 | descent direction computed
2019-01-07 08:16:21.027328 UTC | itr #27 | backtrack iters: 4
2019-01-07 08:16:21.029044 UTC | itr #27 | computing loss after
2019-01-07 08:16:21.035473 UTC | itr #27 | optimization finished
2019-01-07 08:16:21.068891 UTC | itr #27 | saving snapshot...
2019-01-07 08:16:21.070594 UTC | itr #27 | saved
2019-01-07 08:16:21.077081 UTC | -----------------------  -------------
2019-01-07 08:16:21.081848 UTC | Iteration                 27
2019-01-07 08:16:21.085163 UTC | AverageDiscountedReturn   86.602
2019-01-07 08:16:21.087348 UTC | AverageReturn            200
2019-01-07 08:16:21.089576 UTC | ExplainedVariance          0.999973
2019-01-07 08:16:21.092063 UTC | NumTrajs                  20
2019-01-07 08:16:21.093512 UTC | Entro

0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00


2019-01-07 08:16:21.988146 UTC | itr #28 | fitting baseline...
2019-01-07 08:16:21.998414 UTC | itr #28 | fitted
2019-01-07 08:16:22.034972 UTC | itr #28 | computing loss before
2019-01-07 08:16:22.053451 UTC | itr #28 | performing update
2019-01-07 08:16:22.055113 UTC | itr #28 | computing descent direction
2019-01-07 08:16:22.347662 UTC | itr #28 | descent direction computed
2019-01-07 08:16:22.367918 UTC | itr #28 | backtrack iters: 0
2019-01-07 08:16:22.369466 UTC | itr #28 | computing loss after
2019-01-07 08:16:22.371109 UTC | itr #28 | optimization finished
2019-01-07 08:16:22.404816 UTC | itr #28 | saving snapshot...
2019-01-07 08:16:22.406485 UTC | itr #28 | saved
2019-01-07 08:16:22.410890 UTC | -----------------------  -------------
2019-01-07 08:16:22.413591 UTC | Iteration                 28
2019-01-07 08:16:22.415782 UTC | AverageDiscountedReturn   86.602
2019-01-07 08:16:22.417603 UTC | AverageReturn            200
2019-01-07 08:16:22.420796 UTC | ExplainedVariance      

0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00


2019-01-07 08:16:23.329832 UTC | itr #29 | fitting baseline...
2019-01-07 08:16:23.335414 UTC | itr #29 | fitted
2019-01-07 08:16:23.393692 UTC | itr #29 | computing loss before
2019-01-07 08:16:23.420650 UTC | itr #29 | performing update
2019-01-07 08:16:23.422131 UTC | itr #29 | computing descent direction
2019-01-07 08:16:23.720507 UTC | itr #29 | descent direction computed
2019-01-07 08:16:23.813116 UTC | itr #29 | backtrack iters: 5
2019-01-07 08:16:23.814662 UTC | itr #29 | computing loss after
2019-01-07 08:16:23.816176 UTC | itr #29 | optimization finished
2019-01-07 08:16:23.855833 UTC | itr #29 | saving snapshot...
2019-01-07 08:16:23.857851 UTC | itr #29 | saved
2019-01-07 08:16:23.860080 UTC | -----------------------  -------------
2019-01-07 08:16:23.863509 UTC | Iteration                 29
2019-01-07 08:16:23.866931 UTC | AverageDiscountedReturn   86.602
2019-01-07 08:16:23.868999 UTC | AverageReturn            200
2019-01-07 08:16:23.871077 UTC | ExplainedVariance      

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:24.790240 UTC | itr #30 | fitting baseline...
2019-01-07 08:16:24.795985 UTC | itr #30 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:24.831436 UTC | itr #30 | computing loss before
2019-01-07 08:16:24.856036 UTC | itr #30 | performing update
2019-01-07 08:16:24.859132 UTC | itr #30 | computing descent direction
2019-01-07 08:16:25.165051 UTC | itr #30 | descent direction computed
2019-01-07 08:16:25.239881 UTC | itr #30 | backtrack iters: 4
2019-01-07 08:16:25.241668 UTC | itr #30 | computing loss after
2019-01-07 08:16:25.249414 UTC | itr #30 | optimization finished
2019-01-07 08:16:25.277269 UTC | itr #30 | saving snapshot...
2019-01-07 08:16:25.283000 UTC | itr #30 | saved
2019-01-07 08:16:25.291313 UTC | -----------------------  -------------
2019-01-07 08:16:25.295559 UTC | Iteration                 30
2019-01-07 08:16:25.297144 UTC | AverageDiscountedReturn   86.602
2019-01-07 08:16:25.299444 UTC | AverageReturn            200
2019-01-07 08:16:25.304161 UTC | ExplainedVariance          0.999972
2019-01-07 08:16:25.305677 UTC | NumTrajs                  20
2019-01-07 08:16:25.307944 UTC | Entro

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:26.195497 UTC | itr #31 | fitting baseline...
2019-01-07 08:16:26.201802 UTC | itr #31 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:26.269191 UTC | itr #31 | computing loss before
2019-01-07 08:16:26.301829 UTC | itr #31 | performing update
2019-01-07 08:16:26.303789 UTC | itr #31 | computing descent direction
2019-01-07 08:16:26.597538 UTC | itr #31 | descent direction computed
2019-01-07 08:16:26.685170 UTC | itr #31 | backtrack iters: 5
2019-01-07 08:16:26.693310 UTC | itr #31 | computing loss after
2019-01-07 08:16:26.699751 UTC | itr #31 | optimization finished
2019-01-07 08:16:26.730126 UTC | itr #31 | saving snapshot...
2019-01-07 08:16:26.736341 UTC | itr #31 | saved
2019-01-07 08:16:26.743410 UTC | -----------------------  -------------
2019-01-07 08:16:26.748844 UTC | Iteration                 31
2019-01-07 08:16:26.752056 UTC | AverageDiscountedReturn   86.602
2019-01-07 08:16:26.755563 UTC | AverageReturn            200
2019-01-07 08:16:26.756925 UTC | ExplainedVariance          0.999973
2019-01-07 08:16:26.758224 UTC | NumTrajs                  20
2019-01-07 08:16:26.760333 UTC | Entro

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:27.705653 UTC | itr #32 | fitting baseline...
2019-01-07 08:16:27.712051 UTC | itr #32 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:27.742982 UTC | itr #32 | computing loss before
2019-01-07 08:16:27.763724 UTC | itr #32 | performing update
2019-01-07 08:16:27.765994 UTC | itr #32 | computing descent direction
2019-01-07 08:16:28.084055 UTC | itr #32 | descent direction computed
2019-01-07 08:16:28.145310 UTC | itr #32 | backtrack iters: 3
2019-01-07 08:16:28.147450 UTC | itr #32 | computing loss after
2019-01-07 08:16:28.158546 UTC | itr #32 | optimization finished
2019-01-07 08:16:28.189330 UTC | itr #32 | saving snapshot...
2019-01-07 08:16:28.190936 UTC | itr #32 | saved
2019-01-07 08:16:28.201602 UTC | -----------------------  -------------
2019-01-07 08:16:28.203040 UTC | Iteration                 32
2019-01-07 08:16:28.205354 UTC | AverageDiscountedReturn   86.602
2019-01-07 08:16:28.207941 UTC | AverageReturn            200
2019-01-07 08:16:28.210502 UTC | ExplainedVariance          0.99997
2019-01-07 08:16:28.212073 UTC | NumTrajs                  20
2019-01-07 08:16:28.213757 UTC | Entrop

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:29.135352 UTC | itr #33 | fitting baseline...
2019-01-07 08:16:29.141356 UTC | itr #33 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:29.198101 UTC | itr #33 | computing loss before
2019-01-07 08:16:29.226974 UTC | itr #33 | performing update
2019-01-07 08:16:29.230338 UTC | itr #33 | computing descent direction
2019-01-07 08:16:29.535557 UTC | itr #33 | descent direction computed
2019-01-07 08:16:29.596658 UTC | itr #33 | backtrack iters: 3
2019-01-07 08:16:29.598354 UTC | itr #33 | computing loss after
2019-01-07 08:16:29.600072 UTC | itr #33 | optimization finished
2019-01-07 08:16:29.634156 UTC | itr #33 | saving snapshot...
2019-01-07 08:16:29.635848 UTC | itr #33 | saved
2019-01-07 08:16:29.640239 UTC | -----------------------  -------------
2019-01-07 08:16:29.644841 UTC | Iteration                 33
2019-01-07 08:16:29.646424 UTC | AverageDiscountedReturn   86.602
2019-01-07 08:16:29.647979 UTC | AverageReturn            200
2019-01-07 08:16:29.651595 UTC | ExplainedVariance          0.999975
2019-01-07 08:16:29.654040 UTC | NumTrajs                  20
2019-01-07 08:16:29.656567 UTC | Entro

0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00


2019-01-07 08:16:30.522959 UTC | itr #34 | fitting baseline...
2019-01-07 08:16:30.534735 UTC | itr #34 | fitted
2019-01-07 08:16:30.608632 UTC | itr #34 | computing loss before
2019-01-07 08:16:30.639654 UTC | itr #34 | performing update
2019-01-07 08:16:30.641840 UTC | itr #34 | computing descent direction
2019-01-07 08:16:30.939463 UTC | itr #34 | descent direction computed
2019-01-07 08:16:31.020632 UTC | itr #34 | backtrack iters: 4
2019-01-07 08:16:31.025328 UTC | itr #34 | computing loss after
2019-01-07 08:16:31.029827 UTC | itr #34 | optimization finished
2019-01-07 08:16:31.059748 UTC | itr #34 | saving snapshot...
2019-01-07 08:16:31.064110 UTC | itr #34 | saved
2019-01-07 08:16:31.069553 UTC | -----------------------  -------------
2019-01-07 08:16:31.074111 UTC | Iteration                 34
2019-01-07 08:16:31.078727 UTC | AverageDiscountedReturn   81.5278
2019-01-07 08:16:31.080233 UTC | AverageReturn            176.913
2019-01-07 08:16:31.081742 UTC | ExplainedVariance 

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:32.025709 UTC | itr #35 | fitting baseline...



Total time elapsed: 00:00:00


2019-01-07 08:16:32.032091 UTC | itr #35 | fitted
2019-01-07 08:16:32.095034 UTC | itr #35 | computing loss before
2019-01-07 08:16:32.124488 UTC | itr #35 | performing update
2019-01-07 08:16:32.126247 UTC | itr #35 | computing descent direction
2019-01-07 08:16:32.431526 UTC | itr #35 | descent direction computed
2019-01-07 08:16:32.468526 UTC | itr #35 | backtrack iters: 1
2019-01-07 08:16:32.482578 UTC | itr #35 | computing loss after
2019-01-07 08:16:32.487770 UTC | itr #35 | optimization finished
2019-01-07 08:16:32.518843 UTC | itr #35 | saving snapshot...
2019-01-07 08:16:32.523868 UTC | itr #35 | saved
2019-01-07 08:16:32.529687 UTC | -----------------------  -------------
2019-01-07 08:16:32.535032 UTC | Iteration                 35
2019-01-07 08:16:32.539955 UTC | AverageDiscountedReturn   84.2068
2019-01-07 08:16:32.542881 UTC | AverageReturn            188.818
2019-01-07 08:16:32.544336 UTC | ExplainedVariance          0.86729
2019-01-07 08:16:32.545858 UTC | NumTrajs     

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:33.464822 UTC | itr #36 | fitting baseline...
2019-01-07 08:16:33.470258 UTC | itr #36 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:33.532900 UTC | itr #36 | computing loss before
2019-01-07 08:16:33.557849 UTC | itr #36 | performing update
2019-01-07 08:16:33.559904 UTC | itr #36 | computing descent direction
2019-01-07 08:16:33.861157 UTC | itr #36 | descent direction computed
2019-01-07 08:16:33.924579 UTC | itr #36 | backtrack iters: 3
2019-01-07 08:16:33.926260 UTC | itr #36 | computing loss after
2019-01-07 08:16:33.928038 UTC | itr #36 | optimization finished
2019-01-07 08:16:33.963806 UTC | itr #36 | saving snapshot...
2019-01-07 08:16:33.965460 UTC | itr #36 | saved
2019-01-07 08:16:33.968102 UTC | -----------------------  -------------
2019-01-07 08:16:33.970936 UTC | Iteration                 36
2019-01-07 08:16:33.973744 UTC | AverageDiscountedReturn   86.602
2019-01-07 08:16:33.975577 UTC | AverageReturn            200
2019-01-07 08:16:33.977453 UTC | ExplainedVariance          0.951497
2019-01-07 08:16:33.979202 UTC | NumTrajs                  20
2019-01-07 08:16:33.980837 UTC | Entro

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:34.914906 UTC | itr #37 | fitting baseline...
2019-01-07 08:16:34.922848 UTC | itr #37 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:34.959707 UTC | itr #37 | computing loss before
2019-01-07 08:16:34.978843 UTC | itr #37 | performing update
2019-01-07 08:16:34.984330 UTC | itr #37 | computing descent direction
2019-01-07 08:16:35.295610 UTC | itr #37 | descent direction computed
2019-01-07 08:16:35.404706 UTC | itr #37 | backtrack iters: 5
2019-01-07 08:16:35.411883 UTC | itr #37 | computing loss after
2019-01-07 08:16:35.417913 UTC | itr #37 | optimization finished
2019-01-07 08:16:35.448562 UTC | itr #37 | saving snapshot...
2019-01-07 08:16:35.453746 UTC | itr #37 | saved
2019-01-07 08:16:35.460401 UTC | -----------------------  -------------
2019-01-07 08:16:35.465805 UTC | Iteration                 37
2019-01-07 08:16:35.471121 UTC | AverageDiscountedReturn   86.602
2019-01-07 08:16:35.474450 UTC | AverageReturn            200
2019-01-07 08:16:35.476026 UTC | ExplainedVariance          0.999976
2019-01-07 08:16:35.477492 UTC | NumTrajs                  20
2019-01-07 08:16:35.480898 UTC | Entro

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:36.428068 UTC | itr #38 | fitting baseline...
2019-01-07 08:16:36.435802 UTC | itr #38 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:36.471418 UTC | itr #38 | computing loss before
2019-01-07 08:16:36.490534 UTC | itr #38 | performing update
2019-01-07 08:16:36.495860 UTC | itr #38 | computing descent direction
2019-01-07 08:16:36.789408 UTC | itr #38 | descent direction computed
2019-01-07 08:16:36.852196 UTC | itr #38 | backtrack iters: 3
2019-01-07 08:16:36.859336 UTC | itr #38 | computing loss after
2019-01-07 08:16:36.865154 UTC | itr #38 | optimization finished
2019-01-07 08:16:36.897644 UTC | itr #38 | saving snapshot...
2019-01-07 08:16:36.903305 UTC | itr #38 | saved
2019-01-07 08:16:36.909637 UTC | -----------------------  -------------
2019-01-07 08:16:36.913958 UTC | Iteration                 38
2019-01-07 08:16:36.917008 UTC | AverageDiscountedReturn   86.602
2019-01-07 08:16:36.920047 UTC | AverageReturn            200
2019-01-07 08:16:36.921702 UTC | ExplainedVariance          0.999975
2019-01-07 08:16:36.923153 UTC | NumTrajs                  20
2019-01-07 08:16:36.924566 UTC | Entro

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:37.880334 UTC | itr #39 | fitting baseline...
2019-01-07 08:16:37.886894 UTC | itr #39 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:37.926090 UTC | itr #39 | computing loss before
2019-01-07 08:16:37.948024 UTC | itr #39 | performing update
2019-01-07 08:16:37.949613 UTC | itr #39 | computing descent direction
2019-01-07 08:16:38.280263 UTC | itr #39 | descent direction computed
2019-01-07 08:16:38.344127 UTC | itr #39 | backtrack iters: 3
2019-01-07 08:16:38.345744 UTC | itr #39 | computing loss after
2019-01-07 08:16:38.347314 UTC | itr #39 | optimization finished
2019-01-07 08:16:38.385537 UTC | itr #39 | saving snapshot...
2019-01-07 08:16:38.387450 UTC | itr #39 | saved
2019-01-07 08:16:38.390534 UTC | -----------------------  -------------
2019-01-07 08:16:38.392130 UTC | Iteration                 39
2019-01-07 08:16:38.394068 UTC | AverageDiscountedReturn   84.1949
2019-01-07 08:16:38.395743 UTC | AverageReturn            187.045
2019-01-07 08:16:38.397700 UTC | ExplainedVariance          0.845626
2019-01-07 08:16:38.399501 UTC | NumTrajs                  22
2019-01-07 08:16:38.400927 UTC | 

0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00

2019-01-07 08:16:39.364640 UTC | itr #40 | fitting baseline...
2019-01-07 08:16:39.372817 UTC | itr #40 | fitted
2019-01-07 08:16:39.405167 UTC | itr #40 | computing loss before





2019-01-07 08:16:39.429565 UTC | itr #40 | performing update
2019-01-07 08:16:39.434722 UTC | itr #40 | computing descent direction
2019-01-07 08:16:39.741655 UTC | itr #40 | descent direction computed
2019-01-07 08:16:39.764428 UTC | itr #40 | backtrack iters: 0
2019-01-07 08:16:39.770743 UTC | itr #40 | computing loss after
2019-01-07 08:16:39.775631 UTC | itr #40 | optimization finished
2019-01-07 08:16:39.807081 UTC | itr #40 | saving snapshot...
2019-01-07 08:16:39.812041 UTC | itr #40 | saved
2019-01-07 08:16:39.818176 UTC | -----------------------  -------------
2019-01-07 08:16:39.822855 UTC | Iteration                 40
2019-01-07 08:16:39.827239 UTC | AverageDiscountedReturn   83.6843
2019-01-07 08:16:39.828591 UTC | AverageReturn            184.636
2019-01-07 08:16:39.831232 UTC | ExplainedVariance          0.894431
2019-01-07 08:16:39.832514 UTC | NumTrajs                  22
2019-01-07 08:16:39.833850 UTC | Entropy                    0.53214
2019-01-07 08:16:39.837343 UTC

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:40.770615 UTC | itr #41 | fitting baseline...
2019-01-07 08:16:40.776881 UTC | itr #41 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:40.815980 UTC | itr #41 | computing loss before
2019-01-07 08:16:40.840709 UTC | itr #41 | performing update
2019-01-07 08:16:40.846676 UTC | itr #41 | computing descent direction
2019-01-07 08:16:41.175204 UTC | itr #41 | descent direction computed
2019-01-07 08:16:41.241187 UTC | itr #41 | backtrack iters: 3
2019-01-07 08:16:41.245213 UTC | itr #41 | computing loss after
2019-01-07 08:16:41.246717 UTC | itr #41 | optimization finished
2019-01-07 08:16:41.290527 UTC | itr #41 | saving snapshot...
2019-01-07 08:16:41.297086 UTC | itr #41 | saved
2019-01-07 08:16:41.303584 UTC | -----------------------  -------------
2019-01-07 08:16:41.305043 UTC | Iteration                 41
2019-01-07 08:16:41.311849 UTC | AverageDiscountedReturn   85.6534
2019-01-07 08:16:41.315126 UTC | AverageReturn            194.048
2019-01-07 08:16:41.316741 UTC | ExplainedVariance          0.961936
2019-01-07 08:16:41.318013 UTC | NumTrajs                  21
2019-01-07 08:16:41.320544 UTC | 

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:42.318866 UTC | itr #42 | fitting baseline...
2019-01-07 08:16:42.325956 UTC | itr #42 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:42.383402 UTC | itr #42 | computing loss before
2019-01-07 08:16:42.415812 UTC | itr #42 | performing update
2019-01-07 08:16:42.418867 UTC | itr #42 | computing descent direction
2019-01-07 08:16:42.735087 UTC | itr #42 | descent direction computed
2019-01-07 08:16:42.772245 UTC | itr #42 | backtrack iters: 1
2019-01-07 08:16:42.778630 UTC | itr #42 | computing loss after
2019-01-07 08:16:42.785099 UTC | itr #42 | optimization finished
2019-01-07 08:16:42.817226 UTC | itr #42 | saving snapshot...
2019-01-07 08:16:42.824918 UTC | itr #42 | saved
2019-01-07 08:16:42.832201 UTC | -----------------------  -------------
2019-01-07 08:16:42.837696 UTC | Iteration                 42
2019-01-07 08:16:42.839207 UTC | AverageDiscountedReturn   84.3094
2019-01-07 08:16:42.842725 UTC | AverageReturn            188.773
2019-01-07 08:16:42.844085 UTC | ExplainedVariance          0.885965
2019-01-07 08:16:42.845332 UTC | NumTrajs                  22
2019-01-07 08:16:42.848328 UTC | 

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:43.813191 UTC | itr #43 | fitting baseline...
2019-01-07 08:16:43.819943 UTC | itr #43 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:43.853578 UTC | itr #43 | computing loss before
2019-01-07 08:16:43.886886 UTC | itr #43 | performing update
2019-01-07 08:16:43.888816 UTC | itr #43 | computing descent direction
2019-01-07 08:16:44.214159 UTC | itr #43 | descent direction computed
2019-01-07 08:16:44.248740 UTC | itr #43 | backtrack iters: 1
2019-01-07 08:16:44.250441 UTC | itr #43 | computing loss after
2019-01-07 08:16:44.252097 UTC | itr #43 | optimization finished
2019-01-07 08:16:44.288062 UTC | itr #43 | saving snapshot...
2019-01-07 08:16:44.289867 UTC | itr #43 | saved
2019-01-07 08:16:44.292553 UTC | -----------------------  -------------
2019-01-07 08:16:44.297073 UTC | Iteration                 43
2019-01-07 08:16:44.299602 UTC | AverageDiscountedReturn   86.1699
2019-01-07 08:16:44.302849 UTC | AverageReturn            197.238
2019-01-07 08:16:44.304557 UTC | ExplainedVariance          0.960816
2019-01-07 08:16:44.307639 UTC | NumTrajs                  21
2019-01-07 08:16:44.309074 UTC | 

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:45.203176 UTC | itr #44 | fitting baseline...



Total time elapsed: 00:00:00


2019-01-07 08:16:45.209929 UTC | itr #44 | fitted
2019-01-07 08:16:45.278012 UTC | itr #44 | computing loss before
2019-01-07 08:16:45.307543 UTC | itr #44 | performing update
2019-01-07 08:16:45.309561 UTC | itr #44 | computing descent direction
2019-01-07 08:16:45.628835 UTC | itr #44 | descent direction computed
2019-01-07 08:16:45.678860 UTC | itr #44 | backtrack iters: 2
2019-01-07 08:16:45.680705 UTC | itr #44 | computing loss after
2019-01-07 08:16:45.682339 UTC | itr #44 | optimization finished
2019-01-07 08:16:45.721960 UTC | itr #44 | saving snapshot...
2019-01-07 08:16:45.724063 UTC | itr #44 | saved
2019-01-07 08:16:45.727910 UTC | -----------------------  -------------
2019-01-07 08:16:45.731641 UTC | Iteration                 44
2019-01-07 08:16:45.734606 UTC | AverageDiscountedReturn   86.5416
2019-01-07 08:16:45.736465 UTC | AverageReturn            199.571
2019-01-07 08:16:45.738994 UTC | ExplainedVariance          0.994597
2019-01-07 08:16:45.740490 UTC | NumTrajs    

0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00

2019-01-07 08:16:46.608797 UTC | itr #45 | fitting baseline...
2019-01-07 08:16:46.617860 UTC | itr #45 | fitted





2019-01-07 08:16:46.657817 UTC | itr #45 | computing loss before
2019-01-07 08:16:46.677532 UTC | itr #45 | performing update
2019-01-07 08:16:46.685099 UTC | itr #45 | computing descent direction
2019-01-07 08:16:46.994668 UTC | itr #45 | descent direction computed
2019-01-07 08:16:47.046024 UTC | itr #45 | backtrack iters: 2
2019-01-07 08:16:47.052097 UTC | itr #45 | computing loss after
2019-01-07 08:16:47.058537 UTC | itr #45 | optimization finished
2019-01-07 08:16:47.093762 UTC | itr #45 | saving snapshot...
2019-01-07 08:16:47.101130 UTC | itr #45 | saved
2019-01-07 08:16:47.107907 UTC | -----------------------  -------------
2019-01-07 08:16:47.112798 UTC | Iteration                 45
2019-01-07 08:16:47.116240 UTC | AverageDiscountedReturn   86.5891
2019-01-07 08:16:47.117782 UTC | AverageReturn            199.905
2019-01-07 08:16:47.120815 UTC | ExplainedVariance          0.999804
2019-01-07 08:16:47.122039 UTC | NumTrajs                  21
2019-01-07 08:16:47.123255 UTC | 

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:48.053180 UTC | itr #46 | fitting baseline...



Total time elapsed: 00:00:00


2019-01-07 08:16:48.059323 UTC | itr #46 | fitted
2019-01-07 08:16:48.099460 UTC | itr #46 | computing loss before
2019-01-07 08:16:48.118551 UTC | itr #46 | performing update
2019-01-07 08:16:48.120091 UTC | itr #46 | computing descent direction
2019-01-07 08:16:48.440133 UTC | itr #46 | descent direction computed
2019-01-07 08:16:48.480310 UTC | itr #46 | backtrack iters: 1
2019-01-07 08:16:48.482470 UTC | itr #46 | computing loss after
2019-01-07 08:16:48.486199 UTC | itr #46 | optimization finished
2019-01-07 08:16:48.520376 UTC | itr #46 | saving snapshot...
2019-01-07 08:16:48.522025 UTC | itr #46 | saved
2019-01-07 08:16:48.526572 UTC | -----------------------  -------------
2019-01-07 08:16:48.530015 UTC | Iteration                 46
2019-01-07 08:16:48.533599 UTC | AverageDiscountedReturn   85.9698
2019-01-07 08:16:48.536185 UTC | AverageReturn            196.19
2019-01-07 08:16:48.538623 UTC | ExplainedVariance          0.959879
2019-01-07 08:16:48.540966 UTC | NumTrajs     

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:49.412647 UTC | itr #47 | fitting baseline...
2019-01-07 08:16:49.420330 UTC | itr #47 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:49.458564 UTC | itr #47 | computing loss before
2019-01-07 08:16:49.488352 UTC | itr #47 | performing update
2019-01-07 08:16:49.490457 UTC | itr #47 | computing descent direction
2019-01-07 08:16:49.802063 UTC | itr #47 | descent direction computed
2019-01-07 08:16:49.838755 UTC | itr #47 | backtrack iters: 1
2019-01-07 08:16:49.841123 UTC | itr #47 | computing loss after
2019-01-07 08:16:49.846091 UTC | itr #47 | optimization finished
2019-01-07 08:16:49.878614 UTC | itr #47 | saving snapshot...
2019-01-07 08:16:49.880149 UTC | itr #47 | saved
2019-01-07 08:16:49.884946 UTC | -----------------------  -------------
2019-01-07 08:16:49.888700 UTC | Iteration                 47
2019-01-07 08:16:49.900595 UTC | AverageDiscountedReturn   86.602
2019-01-07 08:16:49.904137 UTC | AverageReturn            200
2019-01-07 08:16:49.907646 UTC | ExplainedVariance          0.993289
2019-01-07 08:16:49.911002 UTC | NumTrajs                  20
2019-01-07 08:16:49.914469 UTC | Entro

0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00


2019-01-07 08:16:50.829688 UTC | itr #48 | fitting baseline...
2019-01-07 08:16:50.838094 UTC | itr #48 | fitted
2019-01-07 08:16:50.894016 UTC | itr #48 | computing loss before
2019-01-07 08:16:50.926056 UTC | itr #48 | performing update
2019-01-07 08:16:50.928840 UTC | itr #48 | computing descent direction
2019-01-07 08:16:51.236734 UTC | itr #48 | descent direction computed
2019-01-07 08:16:51.258478 UTC | itr #48 | backtrack iters: 0
2019-01-07 08:16:51.264694 UTC | itr #48 | computing loss after
2019-01-07 08:16:51.270184 UTC | itr #48 | optimization finished
2019-01-07 08:16:51.302245 UTC | itr #48 | saving snapshot...
2019-01-07 08:16:51.308380 UTC | itr #48 | saved
2019-01-07 08:16:51.315534 UTC | -----------------------  -------------
2019-01-07 08:16:51.320682 UTC | Iteration                 48
2019-01-07 08:16:51.325921 UTC | AverageDiscountedReturn   85.2374
2019-01-07 08:16:51.327523 UTC | AverageReturn            192.429
2019-01-07 08:16:51.328998 UTC | ExplainedVariance 

0% [##############################] 100% | ETA: 00:00:00

2019-01-07 08:16:52.296723 UTC | itr #49 | fitting baseline...
2019-01-07 08:16:52.303224 UTC | itr #49 | fitted



Total time elapsed: 00:00:00


2019-01-07 08:16:52.361733 UTC | itr #49 | computing loss before
2019-01-07 08:16:52.391004 UTC | itr #49 | performing update
2019-01-07 08:16:52.392912 UTC | itr #49 | computing descent direction
2019-01-07 08:16:52.706223 UTC | itr #49 | descent direction computed
2019-01-07 08:16:52.740586 UTC | itr #49 | backtrack iters: 1
2019-01-07 08:16:52.742168 UTC | itr #49 | computing loss after
2019-01-07 08:16:52.743895 UTC | itr #49 | optimization finished
2019-01-07 08:16:52.779873 UTC | itr #49 | saving snapshot...
2019-01-07 08:16:52.781511 UTC | itr #49 | saved
2019-01-07 08:16:52.785146 UTC | -----------------------  -------------
2019-01-07 08:16:52.787020 UTC | Iteration                 49
2019-01-07 08:16:52.789240 UTC | AverageDiscountedReturn   86.602
2019-01-07 08:16:52.791479 UTC | AverageReturn            200
2019-01-07 08:16:52.793731 UTC | ExplainedVariance          0.977927
2019-01-07 08:16:52.795736 UTC | NumTrajs                  20
2019-01-07 08:16:52.797126 UTC | Entro

A better way to conduct experiments is to use **run_experiment_lite** as follows:

In [4]:
from rllab.algos.trpo import TRPO
from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
from rllab.envs.gym_env import GymEnv
from rllab.envs.normalized_env import normalize
from rllab.misc.instrument import run_experiment_lite
from rllab.policies.categorical_mlp_policy import CategoricalMLPPolicy


def run_task(*_):
    # Please note that different environments with different action spaces may
    # require different policies. For example with a Discrete action space, a
    # CategoricalMLPPolicy works, but for a Box action space may need to use
    # a GaussianMLPPolicy (see the trpo_gym_pendulum.py example)
    env = normalize(GymEnv("CartPole-v0"))

    policy = CategoricalMLPPolicy(
        env_spec=env.spec,
        # The neural network policy should have two hidden layers, each with 32 hidden units.
        hidden_sizes=(32, 32)
    )

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = TRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=4000,
        max_path_length=env.horizon,
        n_itr=50,
        discount=0.99,
        step_size=0.01,
        # Uncomment both lines (this and the plot parameter below) to enable plotting
        # plot=True,
    )
    algo.train()


run_experiment_lite(
    run_task,
    env={'DISPLAY': os.environ["DISPLAY"]},
    # Number of parallel workers for sampling
    n_parallel=1,
    # Only keep the snapshot parameters for the last iteration
    snapshot_mode="last",
    # Specifies the seed for the experiment. If this is not provided, a random seed
    # will be used
    seed=1,
    # plot=True,
)

python /content/scripts/run_experiment_lite.py  --n_parallel '1'  --snapshot_mode 'last'  --seed '1'  --exp_name 'experiment_2019_01_07_08_14_37_0001'  --log_dir '/content/data/local/experiment/experiment_2019_01_07_08_14_37_0001'  --use_cloudpickle 'True'  --args_data 'gASVdwMAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX2ZpbGxfZnVuY3Rpb26Uk5QoaACMD19tYWtlX3NrZWxfZnVuY5STlGgAjA1fYnVpbHRpbl90eXBllJOUjAhDb2RlVHlwZZSFlFKUKEsASwBLBUsKS0dDTHQAdAFkAYMBgwF9AXQCfAFqA2QKZAONAn0CdAR8AWoDZASNAX0DdAV8AXwCfANkBXwBagZkBmQHZAhkCY0IfQR8BGoHgwABAGQAUwCUKE6MC0NhcnRQb2xlLXYwlEsgjAhlbnZfc3BlY5SMDGhpZGRlbl9zaXplc5SGlGgMhZRNoA9LMkc/764UeuFHrkc/hHrhR64UeyiMA2VudpSMBnBvbGljeZSMCGJhc2VsaW5llIwKYmF0Y2hfc2l6ZZSMD21heF9wYXRoX2xlbmd0aJSMBW5faXRylIwIZGlzY291bnSUjAlzdGVwX3NpemWUdJRLIEsghpR0lCiMCW5vcm1hbGl6ZZSMBkd5bUVudpSMFENhdGVnb3JpY2FsTUxQUG9saWN5lIwEc3BlY5SMFUxpbmVhckZlYXR1cmVCYXNlbGluZZSMBFRSUE+UjAdob3Jpem9ulIwFdHJhaW6UdJQojAFflGgQaBFoEowEYWxnb5R0lIwePGlweXRob24taW5wdXQtNC02ODhiNGEzMTE2OWM+lIwIcnVuX3Rhc2uUSwlDH