# 1. define eager graph

In [1]:
import eagerx
from typing import Dict
import numpy as np
from huggingface_sb3 import load_from_hub
import stable_baselines3 as sb3

from double_pendulum.objects import Double_Pendulum

In [2]:
# noinspection JupyterPackage
rate = 50.0
graph = eagerx.Graph.create()
sensors = ["theta", "theta_dot", "image"]
actuators = ["u"]
states = ["model_state"]
pendulum = Double_Pendulum.make("double_pendulum", rate=rate, actuators=actuators, sensors=sensors, states=states, render_fn="double_pendulum_render_fn")

from double_pendulum.processor import DecomposedAngle,DecomposedAngle_vel
pendulum.sensors.theta.processor = DecomposedAngle.make()
pendulum.sensors.theta.space.low = -1
pendulum.sensors.theta.space.high = 1
pendulum.sensors.theta.space.shape = [4]
pendulum.sensors.theta_dot.processor = DecomposedAngle_vel.make()
pendulum.sensors.theta_dot.space.low = -999
pendulum.sensors.theta_dot.space.high = 999
pendulum.sensors.theta_dot.space.shape = [2]


graph.add(pendulum)

# Connect the pendulum to an action and observations
graph.connect(action="voltage", target=pendulum.actuators.u)
graph.connect(source=pendulum.sensors.theta, observation="angle", window=60)
graph.connect(source=pendulum.sensors.theta_dot, observation="angular_velocity")

# Render image
graph.render(source=pendulum.sensors.image, rate=rate)
Double_Pendulum.info()

   entity_type: `Double_Pendulum`
   module: `double_pendulum.objects`
   file: `/home/marunyu/study/eagerx_sideproject/double_pendulum/objects.py`

Supported engines:
 - eagerx_ode.engine/OdeEngine

Make this spec with:
   spec = Double_Pendulum.make(name: str, actuators: List[str] = None, sensors: List[str] = None, states: List[str] = None, rate: float = 50.0, render_shape: List[int] = None, render_fn: str = None)

class Double_Pendulum:
   make(name: str, actuators: List[str] = None, sensors: List[str] = None, states: List[str] = None, rate: float = 50.0, render_shape: List[int] = None, render_fn: str = None):
      sensors:
       - theta: Space(-999.0, 999.0, (), float32)
       - theta_dot: Space(-999.0, 999.0, (), float32)
       - image: Space(uint8)
       - u_applied: Space([-4.], [4.], (1,), float32)
      actuators:
       - u: Space([-4.], [4.], (1,), float32)
      engine_states:
       - model_state: Space([-3.14 -3.14 -9.   -9.  ], [3.14 3.14 9.   9.  ], (4,), float32)


engine and train environment

In [3]:
from eagerx_ode.engine import OdeEngine
from double_pendulum.double_pendulum_env import Double_PendulumEnv
from gym.wrappers.rescale_action import RescaleAction
ode_engine = OdeEngine.make(rate=rate)
train_env = Double_PendulumEnv(name="train", rate=rate, graph=graph, engine=ode_engine, eval=False)
test_env = Double_PendulumEnv(name="test", rate=rate, graph=graph, engine=ode_engine, eval=True)
print("action_space: ", train_env.action_space)
print("observation_space: ", train_env.observation_space)
# ode_render = pendulum.gui(OdeEngine)
from eagerx.wrappers import Flatten
from stable_baselines3.common.env_checker import check_env
train_env = Flatten(train_env)
test_env = Flatten(test_env)

[31m[WARN]: Backend 'SINGLE_PROCESS' does not support multiprocessing, so all nodes are launched in the ENVIRONMENT process.[0m
action_space:  Dict(voltage:Space([-4.], [4.], (1,), float32))
observation_space:  Dict(angle:Box([[-1. -1. -1. -1.]], [[1. 1. 1. 1.]], (1, 4), float32), angular_velocity:Box([[-999. -999.]], [[999. 999.]], (1, 2), float32))


# 2 train

# 2.1 SAC

In [None]:
sac_model = sb3.SAC("MlpPolicy", train_env, verbose=1, learning_rate=7e-4, tensorboard_log="./tensorboard/sac_doupen_tensorboard/")
train_env.render("human")
sac_model.learn(total_timesteps=int(10000))
train_env.close()
sac_model.save("./model/double_pendulum_sac")

In [4]:
from stable_baselines3.common.evaluation import evaluate_policy
import helper
# sac_model = sb3.SAC.load("./model/double_pendulum_sac.zip")
sac_model = sb3.SAC.load("./double_pendulum_sac.zip")
# mean_reward, std_reward = evaluate_policy(sac_model, test_env, n_eval_episodes=10, render=True)
# print("mean_reward:",mean_reward,"std_reward:",std_reward)
helper.evaluate(sac_model,test_env,n_eval_episodes=5,episode_length=270, video_rate=rate,video_prefix="trained_disc")

Start evaluation episode 0 of 5


  1%|          | 3/270 [00:00<00:47,  5.60it/s]QObject::moveToThread: Current thread (0x7f950c001c20) is not the object's thread (0x7f950c1d25f0).
Cannot move to target thread (0x7f950c001c20)

QObject::moveToThread: Current thread (0x7f950c001c20) is not the object's thread (0x7f950c1d25f0).
Cannot move to target thread (0x7f950c001c20)

QObject::moveToThread: Current thread (0x7f950c001c20) is not the object's thread (0x7f950c1d25f0).
Cannot move to target thread (0x7f950c001c20)

QObject::moveToThread: Current thread (0x7f950c001c20) is not the object's thread (0x7f950c1d25f0).
Cannot move to target thread (0x7f950c001c20)

QObject::moveToThread: Current thread (0x7f950c001c20) is not the object's thread (0x7f950c1d25f0).
Cannot move to target thread (0x7f950c001c20)

QObject::moveToThread: Current thread (0x7f950c001c20) is not the object's thread (0x7f950c1d25f0).
Cannot move to target thread (0x7f950c001c20)

QObject::moveToThread: Current thread (0x7f950c001c20) is not the objec

vel_cost:   33.25617057340145 [ 2.5781271  -0.03352065]
vel_reward:   20635.274776156984 [ 2.5781271  -0.03352065]
vel_cost:   25.334301771390248 [ 2.2497315  -0.03731069]
vel_reward:   19128.917354991867 [ 2.2497315  -0.03731069]
vel_cost:   19.14327517836619 [ 1.9558152  -0.02933409]
vel_reward:   22437.4888319981 [ 1.9558152  -0.02933409]
vel_cost:   14.431316132833206 [ 1.6983376  -0.02186765]
vel_reward:   26051.15100469698 [ 1.6983376  -0.02186765]
vel_cost:   10.822271802698564 [ 1.471073   -0.00998119]
vel_reward:   33042.38306327408 [ 1.471073   -0.00998119]
vel_cost:   8.115788770387878 [ 1.2740309e+00 -8.6011167e-04]
vel_reward:   39654.82670964153 [ 1.2740309e+00 -8.6011167e-04]
vel_cost:   6.182516924765747 [ 1.1117887 -0.0103581]
vel_reward:   32794.24005421202 [ 1.1117887 -0.0103581]
vel_cost:   4.744603841474526 [ 0.9731455  -0.02184395]
vel_reward:   26063.498448259685 [ 0.9731455  -0.02184395]
vel_cost:   3.664664373379232 [ 0.8536986  -0.03213859]
vel_reward:   21213

 78%|███████▊  | 210/270 [00:01<00:00, 160.04it/s]

vel_cost:   4.191140636832569 [-0.9028089  -0.07609896]
vel_reward:   8806.020494287723 [-0.9028089  -0.07609896]
vel_cost:   5.645092722828995 [-1.0361751  -0.11764315]
vel_reward:   3836.4672373148273 [-1.0361751  -0.11764315]
vel_cost:   7.633161399409301 [-1.2065902  -0.13301544]
vel_reward:   2821.0414596956834 [-1.2065902  -0.13301544]
vel_cost:   14.195108558390785 [-1.605615    0.25545162]
vel_reward:   243.74870413296867 [-1.605615    0.25545162]
vel_cost:   28.93611514261579 [-2.0277615  0.6471875]
vel_cost:   18.036819264987347 [-1.8246824  -0.26358023]
vel_reward:   207.17534848518147 [-1.8246824  -0.26358023]
vel_cost:   26.13083596086952 [-1.961311  -0.5872449]
vel_cost:   54.40867679001208 [-1.9380966 -1.3346832]
vel_cost:   73.07089167084087 [-2.2514925 -1.5447459]


 97%|█████████▋| 262/270 [00:02<00:00, 163.62it/s]

vel_cost:   35.55346796340909 [ 2.6664565  -0.01326211]
vel_reward:   30943.799954995764 [ 2.6664565  -0.01326211]
vel_cost:   27.313988786588272 [ 2.3366284 -0.0272273]
vel_reward:   23403.107425040125 [ 2.3366284 -0.0272273]
vel_cost:   20.70807150109637 [ 2.0349197  -0.01337869]
vel_reward:   30871.73935925378 [ 2.0349197  -0.01337869]
vel_cost:   15.78108211297607 [ 1.776449  -0.0105534]
vel_reward:   32666.395460390588 [ 1.776449  -0.0105534]
vel_cost:   11.975475447425685 [1.5476072e+00 1.3318408e-03]
vel_reward:   39282.459333188715 [1.5476072e+00 1.3318408e-03]
vel_cost:   9.124702984800047 [1.3507423  0.01043705]
vel_reward:   32742.502418272066 [1.3507423  0.01043705]
vel_cost:   7.1256703395228 [ 1.1937376  -0.00558005]
vel_reward:   36082.711099533575 [ 1.1937376  -0.00558005]
vel_cost:   5.597223791283493 [ 1.0576513  -0.01430494]
vel_reward:   30305.102425838202 [ 1.0576513  -0.01430494]
vel_cost:   4.482586530628861 [ 0.9453017  -0.02702767]
vel_reward:   23496.734730838

100%|██████████| 270/270 [00:02<00:00, 114.40it/s]


Start video writer
Showing episode 0 with episodic reward: 622654.0593725473


Start evaluation episode 1 of 5


 15%|█▌        | 41/270 [00:00<00:01, 131.49it/s]

vel_cost:   40.07337133923543 [2.8249397  0.09272244]
vel_reward:   6315.242410267299 [2.8249397  0.09272244]
vel_cost:   32.62917800589834 [2.5020769  0.25760773]
vel_reward:   233.4611357637387 [2.5020769  0.25760773]
vel_cost:   32.11513096283774 [2.0946724 0.7133326]
vel_cost:   81.97786039821075 [1.3966998 1.9003159]
vel_cost:   180.69169774789998 [0.817122  2.9778621]
vel_cost:   218.25987947297114 [0.676731  3.2861075]
vel_cost:   248.49667483411508 [0.6586933 3.5094678]


 26%|██▌       | 70/270 [00:00<00:01, 135.33it/s]

vel_cost:   44.26056915442068 [2.9749603  0.02076585]
vel_reward:   26631.582568463262 [2.9749603  0.02076585]
vel_cost:   33.705651435054115 [2.595978   0.02251887]
vel_reward:   25714.04858156695 [2.595978   0.02251887]
vel_cost:   25.668925545532257 [2.2655106  0.01765656]
vel_reward:   28340.271066522375 [2.2655106  0.01765656]
vel_cost:   19.434986534744514 [1.9709802  0.02363518]
vel_reward:   25146.314144578657 [1.9709802  0.02363518]
vel_cost:   14.816248648405816 [1.7211287  0.01553802]
vel_reward:   29566.870830207656 [1.7211287  0.01553802]
vel_cost:   11.43772892685113 [ 1.5124011  -0.00686815]
vel_reward:   35165.02071133406 [ 1.5124011  -0.00686815]
vel_cost:   8.903760991831518 [ 1.3332723  -0.02800478]
vel_reward:   23042.014375972434 [ 1.3332723  -0.02800478]
vel_cost:   6.926579875725394 [ 1.1746839  -0.03685663]
vel_reward:   19303.422319086058 [ 1.1746839  -0.03685663]
vel_cost:   5.578030354718959 [ 1.0485983  -0.06333975]
vel_reward:   11365.92459081188 [ 1.048598

 47%|████▋     | 127/270 [00:00<00:01, 136.24it/s]

vel_cost:   34.385667880079026 [ 2.6221473  -0.01921587]
vel_reward:   27470.08019659863 [ 2.6221473  -0.01921587]
vel_cost:   25.83764461647035 [ 2.2726934  -0.02446249]
vel_reward:   24733.66307493548 [ 2.2726934  -0.02446249]
vel_cost:   19.132151326529232 [ 1.9558866  -0.01531228]
vel_reward:   29700.66031119236 [ 1.9558866  -0.01531228]
vel_cost:   14.159509720040798 [ 1.6822937  -0.02115424]
vel_reward:   26425.517400610323 [ 1.6822937  -0.02115424]
vel_cost:   10.250938906676545 [ 1.431714   -0.00978056]
vel_reward:   33175.23416675457 [ 1.431714   -0.00978056]
vel_cost:   7.555641322321184 [ 1.2272605  -0.03521364]
vel_reward:   19948.262973730176 [ 1.2272605  -0.03521364]
vel_cost:   5.485182165968433 [ 1.0422621  -0.05178367]
vel_reward:   14321.206885586855 [ 1.0422621  -0.05178367]
vel_cost:   3.841864776609035 [ 0.8699847 -0.0536181]
vel_reward:   13805.305719530503 [ 0.8699847 -0.0536181]
vel_cost:   2.5894203551615123 [ 0.7123052 -0.0512479]
vel_reward:   14475.489831523

 76%|███████▋  | 206/270 [00:01<00:00, 152.24it/s]

vel_cost:   31.90715055716504 [ 2.525255   -0.03360608]
vel_reward:   20600.046590993257 [ 2.525255   -0.03360608]
vel_cost:   24.18714714990263 [ 2.1988041  -0.02593156]
vel_reward:   24017.522777961036 [ 2.1988041  -0.02593156]
vel_cost:   18.30262716308622 [ 1.9128374  -0.01986559]
vel_reward:   27115.433982654184 [ 1.9128374  -0.01986559]
vel_cost:   13.800708020535177 [ 1.661224  -0.0109133]
vel_reward:   32432.106165815723 [ 1.661224  -0.0109133]
vel_cost:   10.377351771344857 [1.4406493e+00 8.9726571e-05]
vel_reward:   40270.54770492221 [1.4406493e+00 8.9726571e-05]
vel_cost:   7.867796423461159 [ 1.2544155e+00 -5.0192949e-04]
vel_reward:   39939.91968842645 [ 1.2544155e+00 -5.0192949e-04]
vel_cost:   6.060973253019287 [ 1.1005775  -0.01519766]
vel_reward:   29768.82681731289 [ 1.1005775  -0.01519766]
vel_cost:   4.695354621889156 [ 0.9675844  -0.02669929]
vel_reward:   23651.56091550054 [ 0.9675844  -0.02669929]
vel_cost:   3.6868999282731 [ 0.85501486 -0.03977931]
vel_reward: 

 96%|█████████▋| 260/270 [00:01<00:00, 166.26it/s]

vel_cost:   36.063780938149996 [ 2.6855152  -0.01382466]
vel_reward:   30597.606701123288 [ 2.6855152  -0.01382466]
vel_cost:   27.257532577933077 [ 2.3338857  -0.03348207]
vel_reward:   20651.201190789478 [ 2.3338857  -0.03348207]
vel_cost:   20.029919838400804 [ 2.001384   -0.01055903]
vel_reward:   32662.715707445386 [ 2.001384   -0.01055903]
vel_cost:   14.909762227406793 [ 1.7258563  -0.02903645]
vel_reward:   22571.44947046811 [ 1.7258563  -0.02903645]
vel_cost:   10.727553530646144 [ 1.4645685 -0.0117242]
vel_reward:   31910.36643574248 [ 1.4645685 -0.0117242]
vel_cost:   7.823342837674425 [ 1.2493864  -0.03042251]
vel_reward:   21954.33678037369 [ 1.2493864  -0.03042251]
vel_cost:   5.631093509816723 [ 1.057153   -0.04649265]
vel_reward:   15919.77360324175 [ 1.057153   -0.04649265]
vel_cost:   3.904827980279993 [ 0.8783567 -0.0486187]
vel_reward:   15257.037925425715 [ 0.8783567 -0.0486187]
vel_cost:   2.586548371242695 [ 0.71359324 -0.04498432]
vel_reward:   16407.33581622851

100%|██████████| 270/270 [00:01<00:00, 146.68it/s]


Start video writer
Showing episode 1 with episodic reward: 1485925.0168463616


Start evaluation episode 2 of 5
[31m[WARN]: [subscriber][/test/engine][states][model_state]: Message does not match the defined space. Either a mismatch in expected shape (msg.shape=(4,) vs space.shape=(4,)), dtype (msg.dtype=float32 vs space.dtype=float32), and/or the value is out of bounds (low/high).[0m


 14%|█▍        | 38/270 [00:00<00:01, 122.20it/s]

vel_cost:   38.379665009074536 [ 2.770542e+00 -2.763464e-03]
vel_reward:   38173.65525548583 [ 2.770542e+00 -2.763464e-03]
vel_cost:   30.06400172270976 [ 2.4520295  -0.00937869]
vel_reward:   33442.95332843196 [ 2.4520295  -0.00937869]
vel_cost:   23.520691255131343 [2.168888   0.00395902]
vel_reward:   37271.70978751959 [2.168888   0.00395902]
vel_cost:   18.606801531445157 [1.9288886  0.01368605]
vel_reward:   30682.54400788803 [1.9288886  0.01368605]
vel_cost:   14.876160185774904 [1.7238533  0.02983985]
vel_reward:   22211.673017092784 [1.7238533  0.02983985]
vel_cost:   12.13131299269488 [1.555217   0.04348159]
vel_reward:   16907.935454216262 [1.555217   0.04348159]
vel_cost:   10.061227773417727 [1.4105632  0.07509482]
vel_reward:   8984.658624934686 [1.4105632  0.07509482]
vel_cost:   8.781131823510009 [1.226877   0.25049913]
vel_reward:   269.1281338164757 [1.226877   0.25049913]
vel_cost:   8.678634263874674 [1.1070637  0.35711935]


 30%|███       | 81/270 [00:00<00:01, 135.31it/s]

vel_cost:   36.639714310850565 [ 2.705573   -0.04420735]
vel_reward:   16664.285625900782 [ 2.705573   -0.04420735]
vel_cost:   26.878393998583324 [ 2.3182526  -0.01860008]
vel_reward:   27810.491484000115 [ 2.3182526  -0.01860008]
vel_cost:   19.872068515841914 [ 1.992268   -0.03633887]
vel_reward:   19504.349995037253 [ 1.992268   -0.03633887]
vel_cost:   14.196658003853933 [ 1.6845447  -0.02025348]
vel_reward:   26905.893575697122 [ 1.6845447  -0.02025348]
vel_cost:   10.041332061877208 [ 1.416476   -0.02157615]
vel_reward:   26203.469023106198 [ 1.416476   -0.02157615]
vel_cost:   6.932146367488486 [ 1.1763754  -0.02534866]
vel_reward:   24299.16018390137 [ 1.1763754  -0.02534866]
vel_cost:   4.552432642806042 [ 0.953477   -0.01849386]
vel_reward:   27869.6344473502 [ 0.953477   -0.01849386]
vel_cost:   2.75922970328383 [0.742861  0.0009375]
vel_reward:   39593.49760042885 [0.742861  0.0009375]
vel_cost:   1.503225742241396 [0.5465866  0.02172709]
vel_reward:   26124.485556423293 [

 53%|█████▎    | 142/270 [00:01<00:00, 144.17it/s]

vel_cost:   38.235631221459286 [ 2.765324   -0.00521486]
vel_reward:   36347.21828251261 [ 2.765324   -0.00521486]
vel_cost:   29.47300314998048 [ 2.4273794  -0.02464764]
vel_reward:   24642.239694158434 [ 2.4273794  -0.02464764]
vel_cost:   22.3832985282869 [ 2.115718   -0.00996901]
vel_reward:   33050.43490709974 [ 2.115718   -0.00996901]
vel_cost:   17.1013920915618 [ 1.8493323  -0.00787943]
vel_reward:   34460.93082761641 [ 1.8493323  -0.00787943]
vel_cost:   13.03201501272832 [1.6144307  0.00203883]
vel_reward:   38730.92179700968 [1.6144307  0.00203883]
vel_cost:   9.938258601989023 [1.4094918  0.01568956]
vel_reward:   29477.392382592978 [1.4094918  0.01568956]
vel_cost:   7.798879949913589 [ 1.2489002  -0.00246691]
vel_reward:   38400.737100014136 [ 1.2489002  -0.00246691]
vel_cost:   6.151964746639323 [ 1.1090288  -0.01058341]
vel_reward:   32646.796792031266 [ 1.1090288  -0.01058341]
vel_cost:   4.950480955022625 [ 0.9940021  -0.02267162]
vel_reward:   25635.614931357566 [ 0.

 71%|███████   | 191/270 [00:01<00:00, 147.85it/s]

vel_cost:   39.361262346034756 [ 2.805392   -0.02251691]
vel_reward:   25715.058252185794 [ 2.805392   -0.02251691]
vel_cost:   29.658835259303814 [ 2.4344466  -0.03618333]
vel_reward:   19565.11794030473 [ 2.4344466  -0.03618333]
vel_cost:   21.778927614319873 [ 2.086865   -0.01396612]
vel_reward:   30511.156905644708 [ 2.086865   -0.01396612]
vel_cost:   16.197238399738723 [ 1.7984365  -0.03561515]
vel_reward:   19788.718128217213 [ 1.7984365  -0.03561515]
vel_cost:   11.56821927003149 [ 1.5208427  -0.01305204]
vel_reward:   31074.083072352703 [ 1.5208427  -0.01305204]
vel_cost:   8.29121113718062 [ 1.2870718  -0.02054475]
vel_reward:   26749.61002381743 [ 1.2870718  -0.02054475]
vel_cost:   5.8965989864230925 [ 1.0832813  -0.03814927]
vel_reward:   18810.76682371247 [ 1.0832813  -0.03814927]
vel_cost:   4.010898268363761 [ 0.89219666 -0.03925806]
vel_reward:   18398.2150216414 [ 0.89219666 -0.03925806]
vel_cost:   2.5505797390004448 [ 0.7120664  -0.02773704]
vel_reward:   23165.7334

100%|██████████| 270/270 [00:01<00:00, 148.87it/s]

vel_cost:   33.74177874131993 [ 2.5974736 -0.0192778]
vel_reward:   27436.078264733693 [ 2.5974736 -0.0192778]
vel_cost:   25.784948716094913 [ 2.2704933  -0.02150615]
vel_reward:   26240.179606473328 [ 2.2704933  -0.02150615]
vel_cost:   19.53385307475163 [ 1.976497   -0.00758304]
vel_reward:   34665.81606196606 [ 1.976497   -0.00758304]
vel_cost:   14.903014786654463 [ 1.7263778  -0.00745812]
vel_reward:   34752.53385360345 [ 1.7263778  -0.00745812]
vel_cost:   11.291441319698555 [1.5026996  0.00674788]
vel_reward:   35249.71310782444 [1.5026996  0.00674788]
vel_cost:   8.674811800753591 [1.3171504  0.00439614]
vel_reward:   36947.28476606058 [1.3171504  0.00439614]
vel_cost:   6.77224323462883 [ 1.1636816  -0.00856912]
vel_reward:   33988.84703896803 [ 1.1636816  -0.00856912]
vel_cost:   5.340189513749675 [ 1.0327717  -0.01884479]
vel_reward:   27674.71151797613 [ 1.0327717  -0.01884479]
vel_cost:   4.294224490085848 [ 0.92452043 -0.03204242]
vel_reward:   21254.45411876137 [ 0.9245




Showing episode 2 with episodic reward: 1650448.3260526957


Start evaluation episode 3 of 5


 37%|███▋      | 100/270 [00:00<00:01, 141.21it/s]

vel_cost:   33.55080717215344 [2.5903933e+00 2.4474859e-03]
vel_reward:   38415.65998347543 [2.5903933e+00 2.4474859e-03]
vel_cost:   24.956679664799214 [ 2.2339656  -0.01354136]
vel_reward:   30771.464781941315 [ 2.2339656  -0.01354136]
vel_cost:   18.22939422141008 [ 1.9091744  -0.01526264]
vel_reward:   29730.160359986046 [ 1.9091744  -0.01526264]
vel_cost:   13.195501772907727 [ 1.6236714  -0.02641732]
vel_reward:   23785.318780379457 [ 1.6236714  -0.02641732]
vel_cost:   9.397884409548382 [ 1.3688031  -0.03858379]
vel_reward:   18648.002803748594 [ 1.3688031  -0.03858379]
vel_cost:   6.392983980397104 [ 1.1291457  -0.03011101]
vel_reward:   22091.54147142006 [ 1.1291457  -0.03011101]
vel_cost:   4.16906859832909 [ 0.9119417  -0.02332404]
vel_reward:   25303.28438366733 [ 0.9119417  -0.02332404]
vel_cost:   2.547221269898097 [ 0.7129167  -0.01727768]
vel_reward:   28555.83732044278 [ 0.7129167  -0.01727768]
vel_cost:   1.350135040510129 [0.51952153 0.00557636]
vel_reward:   36085.3

 61%|██████    | 165/270 [00:01<00:00, 153.52it/s]

vel_cost:   30.704581381968286 [ 2.477122   -0.03457794]
vel_reward:   20203.5066994772 [ 2.477122   -0.03457794]
vel_cost:   23.287799382866798 [ 2.1576402 -0.0231763]
vel_reward:   25378.159859636085 [ 2.1576402 -0.0231763]
vel_cost:   17.67663327440944 [ 1.8799438  -0.01686553]
vel_reward:   28792.193384333084 [ 1.8799438  -0.01686553]
vel_cost:   13.386062451957532 [ 1.6361519  -0.00740639]
vel_reward:   34788.5092683808 [ 1.6361519  -0.00740639]
vel_cost:   10.11627046942645 [1.4223652  0.00573071]
vel_reward:   35974.15295649191 [1.4223652  0.00573071]
vel_cost:   7.765483652297576 [1.2462329e+00 3.0457677e-04]
vel_reward:   40097.87625474498 [1.2462329e+00 3.0457677e-04]
vel_cost:   6.038932802060661 [ 1.0987469  -0.01163851]
vel_reward:   31965.102539020718 [ 1.0987469  -0.01163851]
vel_cost:   4.743986465215713 [ 0.9730598  -0.02209055]
vel_reward:   25935.270100304064 [ 0.9730598  -0.02209055]
vel_cost:   3.7920364457581757 [ 0.86816216 -0.0342847 ]
vel_reward:   20322.344344

 79%|███████▉  | 214/270 [00:01<00:00, 153.75it/s]

vel_cost:   33.466887375393256 [ 2.5867217  -0.02370936]
vel_reward:   25109.032941658043 [ 2.5867217  -0.02370936]
vel_cost:   25.01959519686041 [ 2.2365575 -0.0207946]
vel_reward:   26616.275831635376 [ 2.2365575 -0.0207946]
vel_cost:   18.59308230546397 [ 1.9278665  -0.02206455]
vel_reward:   25948.761258890994 [ 1.9278665  -0.02206455]
vel_cost:   13.649208276205666 [ 1.6518221  -0.01820328]
vel_reward:   28032.072180504103 [ 1.6518221  -0.01820328]
vel_cost:   9.886943756561283 [ 1.4059733  -0.01252785]
vel_reward:   31401.570195034794 [ 1.4059733  -0.01252785]
vel_cost:   7.255768426724818 [ 1.2023778  -0.03688265]
vel_reward:   19293.376111756937 [ 1.2023778  -0.03688265]
vel_cost:   5.231280642475467 [ 1.0175552  -0.05205158]
vel_reward:   14244.676787593467 [ 1.0175552  -0.05205158]
vel_cost:   3.6114780788838674 [ 0.84388936 -0.0503646 ]
vel_reward:   14733.487061046322 [ 0.84388936 -0.0503646 ]
vel_cost:   2.3996360357264783 [ 0.68609315 -0.04796716]
vel_reward:   15457.1498

100%|██████████| 270/270 [00:01<00:00, 150.34it/s]


vel_cost:   37.40891655250424 [ 2.7352629  -0.00548494]
vel_reward:   36151.41693728456 [ 2.7352629  -0.00548494]
vel_cost:   28.820249328078432 [ 2.400392   -0.02328006]
vel_reward:   25325.546620450317 [ 2.400392   -0.02328006]
vel_cost:   21.880769895753073 [ 2.0918684  -0.00775522]
vel_reward:   34546.64639859447 [ 2.0918684  -0.00775522]
vel_cost:   16.727821197213537 [ 1.8290455  -0.00625846]
vel_reward:   35596.43949386383 [ 1.8290455  -0.00625846]
vel_cost:   12.750985213036525 [1.5969121  0.00414252]
vel_reward:   37135.17235899735 [1.5969121  0.00414252]
vel_cost:   9.759258215959264 [1.3968225  0.01358933]
vel_reward:   30741.95587699928 [1.3968225  0.01358933]
vel_cost:   7.665655924677494 [ 1.2381788  -0.00332875]
vel_reward:   37744.50595292228 [ 1.2381788  -0.00332875]
vel_cost:   6.060308357698507 [ 1.1006956  -0.01152004]
vel_reward:   32040.92675426097 [ 1.1006956  -0.01152004]
vel_cost:   4.89367120599338 [ 0.98812973 -0.02415506]
vel_reward:   24886.205977608945 [ 0

Start evaluation episode 4 of 5


 56%|█████▌    | 150/270 [00:00<00:00, 154.35it/s]

vel_cost:   42.327401592246325 [ 2.9089656  -0.02915274]
vel_reward:   22519.01691369871 [ 2.9089656  -0.02915274]
vel_cost:   30.909106056073384 [2.4863224e+00 2.3515837e-03]
vel_reward:   38489.4136519454 [2.4863224e+00 2.3515837e-03]
vel_cost:   22.895162097434866 [ 2.1388514  -0.03296623]
vel_reward:   20865.357152684035 [ 2.1388514  -0.03296623]
vel_cost:   16.361334742516 [ 1.8080374  -0.02858193]
vel_reward:   22777.568774614138 [ 1.8080374  -0.02858193]
vel_cost:   11.307885225889317 [ 1.5036291  -0.01300586]
vel_reward:   31102.794964314548 [ 1.5036291  -0.01300586]
vel_cost:   7.662708257898017 [ 1.2374446  -0.01783547]
vel_reward:   28239.04046475663 [ 1.2374446  -0.01783547]
vel_cost:   4.820829663738478 [0.9819073  0.00244801]
vel_reward:   38415.25605907632 [0.9819073  0.00244801]
vel_cost:   2.7875408207156553 [0.7451637  0.02366039]
vel_reward:   25133.639987911676 [0.7451637  0.02366039]
vel_cost:   1.4063224692642764 [0.519468   0.05342631]
vel_reward:   13858.3615586

 81%|████████▏ | 220/270 [00:01<00:00, 168.24it/s]

vel_cost:   34.53665611131334 [ 2.6279325  -0.01803984]
vel_reward:   28123.856296011298 [ 2.6279325  -0.01803984]
vel_cost:   26.449646916429298 [ 2.2994025  -0.02587296]
vel_reward:   24045.68993424836 [ 2.2994025  -0.02587296]
vel_cost:   20.031620681092743 [ 2.0014377  -0.01195213]
vel_reward:   31765.233249353638 [ 2.0014377  -0.01195213]
vel_cost:   15.266931875556617 [ 1.7472526  -0.01112188]
vel_reward:   32297.097846876455 [ 1.7472526  -0.01112188]
vel_cost:   11.555458868891893 [1.5202171  0.00282228]
vel_reward:   38128.776120534196 [1.5202171  0.00282228]
vel_cost:   8.81893805041441 [1.3279966  0.00729057]
vel_reward:   34869.185296292344 [1.3279966  0.00729057]
vel_cost:   6.87233808802868 [ 1.1722814  -0.00748287]
vel_reward:   34735.33794201226 [ 1.1722814  -0.00748287]
vel_cost:   5.393408925551448 [ 1.0380374  -0.01703013]
vel_reward:   28697.56634482894 [ 1.0380374  -0.01703013]
vel_cost:   4.312152704255525 [ 0.9267454  -0.02988928]
vel_reward:   22189.722538549468 

100%|██████████| 270/270 [00:01<00:00, 158.02it/s]

vel_cost:   32.620082324815144 [ 2.5539224  -0.01934402]
vel_reward:   27399.767598762508 [ 2.5539224  -0.01934402]
vel_cost:   24.323348310677268 [ 2.2055705  -0.00567034]
vel_reward:   36017.61619661313 [ 2.2055705  -0.00567034]
vel_cost:   18.26759258161178 [ 1.9109695  -0.02070086]
vel_reward:   26666.222553806234 [ 1.9109695  -0.02070086]
vel_cost:   13.41686294938687 [ 1.6380115  -0.00853026]
vel_reward:   34015.274954843844 [ 1.6380115  -0.00853026]
vel_cost:   9.870134394767513 [ 1.4047476  -0.01333271]
vel_reward:   30900.14169475529 [ 1.4047476  -0.01333271]
vel_cost:   7.337193956590903 [ 1.2089657  -0.03821265]
vel_reward:   18786.93941646684 [ 1.2089657  -0.03821265]
vel_cost:   5.4190073818814 [ 1.0344725  -0.05845553]
vel_reward:   12532.238076934957 [ 1.0344725  -0.05845553]
vel_cost:   3.8829154691175205 [ 0.8721359  -0.06317061]
vel_reward:   11404.43904863166 [ 0.8721359  -0.06317061]
vel_cost:   2.7173263003915347 [ 0.7253759  -0.06575537]
vel_reward:   10829.863016




Showing episode 4 with episodic reward: 997514.6766766048


Finished evaluation with mean episodic reward: 1250693.11945621


# 2.2 DDPG

In [None]:
ddpg_model = sb3.DDPG("MlpPolicy", train_env, verbose=1, learning_rate=5e-4, tensorboard_log="./tensorboard/ddpg_doupen_tensorboard/")
train_env.render("human")
ddpg_model.learn(total_timesteps=int(10000))
train_env.close()
ddpg_model.save("./model/double_pendulum_ddpg")

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy
ddpg_model = sb3.DDPG.load("./model/double_pendulum_ddpg.zip")
mean_reward, std_reward = evaluate_policy(ddpg_model, test_env, n_eval_episodes=10, render=True)
print("mean_reward:",mean_reward,"std_reward:",std_reward)

# 2.3 PPO

In [None]:
ppo_model = sb3.PPO("MlpPolicy", train_env, verbose=1, learning_rate=5e-4, tensorboard_log="./tensorboard/ppo_doupen_tensorboard/")
train_env.render("human")
ppo_model.learn(total_timesteps=int(10000))
train_env.close()
ddpg_model.save("./model/double_pendulum_ppo")

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy
ppo_model = sb3.DDPG.load("./model/double_pendulum_ppo.zip")
mean_reward, std_reward = evaluate_policy(ppo_model, test_env, n_eval_episodes=10, render=True)
print("mean_reward:",mean_reward,"std_reward:",std_reward)