# 1. define eager graph

In [1]:
import eagerx
from typing import Dict
import numpy as np
from huggingface_sb3 import load_from_hub
import stable_baselines3 as sb3

from double_pendulum.objects import Double_Pendulum

In [2]:
# noinspection JupyterPackage
rate = 20.0
graph = eagerx.Graph.create()
sensors = ["theta", "theta_dot", "image"]
actuators = ["u"]
states = ["model_state"]
pendulum = Double_Pendulum.make("double_pendulum", rate=rate, actuators=actuators, sensors=sensors, states=states, render_fn="double_pendulum_render_fn")

from double_pendulum.processor import DecomposedAngle,DecomposedAngle_vel
pendulum.sensors.theta.processor = DecomposedAngle.make()
pendulum.sensors.theta.space.low = -1
pendulum.sensors.theta.space.high = 1
pendulum.sensors.theta.space.shape = [4]
pendulum.sensors.theta_dot.processor = DecomposedAngle_vel.make()
pendulum.sensors.theta_dot.space.low = -999
pendulum.sensors.theta_dot.space.high = 999
pendulum.sensors.theta_dot.space.shape = [2]


graph.add(pendulum)

# Connect the pendulum to an action and observations
graph.connect(action="voltage", target=pendulum.actuators.u)
graph.connect(source=pendulum.sensors.theta, observation="angle")
graph.connect(source=pendulum.sensors.theta_dot, observation="angular_velocity")

# Render image
graph.render(source=pendulum.sensors.image, rate=rate)
Double_Pendulum.info()

   entity_type: `Double_Pendulum`
   module: `double_pendulum.objects`
   file: `/home/marunyu/study/eagerx_sideproject/double_pendulum/objects.py`

Supported engines:
 - eagerx_ode.engine/OdeEngine

Make this spec with:
   spec = Double_Pendulum.make(name: str, actuators: List[str] = None, sensors: List[str] = None, states: List[str] = None, rate: float = 30.0, render_shape: List[int] = None, render_fn: str = None)

class Double_Pendulum:
   make(name: str, actuators: List[str] = None, sensors: List[str] = None, states: List[str] = None, rate: float = 30.0, render_shape: List[int] = None, render_fn: str = None):
      sensors:
       - theta: Space(-999.0, 999.0, (), float32)
       - theta_dot: Space(-999.0, 999.0, (), float32)
       - image: Space(uint8)
       - u_applied: Space([-4.], [4.], (1,), float32)
      actuators:
       - u: Space([-4.], [4.], (1,), float32)
      engine_states:
       - model_state: Space([-3.14 -3.14 -9.   -9.  ], [3.14 3.14 9.   9.  ], (4,), float32)


engine and train environment

In [3]:
from eagerx_ode.engine import OdeEngine
from double_pendulum.double_pendulum_env import Double_PendulumEnv
from gym.wrappers.rescale_action import RescaleAction
ode_engine = OdeEngine.make(rate=rate)
train_env = Double_PendulumEnv(name="train", rate=rate, graph=graph, engine=ode_engine, eval=False)
test_env = Double_PendulumEnv(name="test", rate=rate, graph=graph, engine=ode_engine, eval=True)
print("action_space: ", train_env.action_space)
print("observation_space: ", train_env.observation_space)
# ode_render = pendulum.gui(OdeEngine)
from eagerx.wrappers import Flatten
from stable_baselines3.common.env_checker import check_env
train_env = Flatten(train_env)
test_env = Flatten(test_env)

[31m[WARN]: Backend 'SINGLE_PROCESS' does not support multiprocessing, so all nodes are launched in the ENVIRONMENT process.[0m
action_space:  Dict(voltage:Space([-4.], [4.], (1,), float32))
observation_space:  Dict(angle:Box([[-1. -1. -1. -1.]], [[1. 1. 1. 1.]], (1, 4), float32), angular_velocity:Box([[-999. -999.]], [[999. 999.]], (1, 2), float32))


# 2 train

# 2.1 SAC

In [4]:
sac_model = sb3.SAC("MlpPolicy", train_env, verbose=1, learning_rate=7e-4, tensorboard_log="./tensorboard/sac_doupen_tensorboard/")
train_env.render("human")
sac_model.learn(total_timesteps=int(10000))
train_env.close()
sac_model.save("./model/double_pendulum_sac")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./tensorboard/sac_doupen_tensorboard/SAC_12


QObject::moveToThread: Current thread (0x7f24d8001930) is not the object's thread (0x7f24d80d1570).
Cannot move to target thread (0x7f24d8001930)

QObject::moveToThread: Current thread (0x7f24d8001930) is not the object's thread (0x7f24d80d1570).
Cannot move to target thread (0x7f24d8001930)

QObject::moveToThread: Current thread (0x7f24d8001930) is not the object's thread (0x7f24d80d1570).
Cannot move to target thread (0x7f24d8001930)

QObject::moveToThread: Current thread (0x7f24d8001930) is not the object's thread (0x7f24d80d1570).
Cannot move to target thread (0x7f24d8001930)

QObject::moveToThread: Current thread (0x7f24d8001930) is not the object's thread (0x7f24d80d1570).
Cannot move to target thread (0x7f24d8001930)

QObject::moveToThread: Current thread (0x7f24d8001930) is not the object's thread (0x7f24d80d1570).
Cannot move to target thread (0x7f24d8001930)

QObject::moveToThread: Current thread (0x7f24d8001930) is not the object's thread (0x7f24d80d1570).
Cannot move to tar

KeyboardInterrupt: 

In [4]:
from stable_baselines3.common.evaluation import evaluate_policy
# sac_model = sb3.SAC.load("./model/double_pendulum_sac.zip")
sac_model = sb3.SAC.load("./double_pendulum_sac.zip")
mean_reward, std_reward = evaluate_policy(sac_model, test_env, n_eval_episodes=10, render=True)
print("mean_reward:",mean_reward,"std_reward:",std_reward)

QObject::moveToThread: Current thread (0x7f24dc0019c0) is not the object's thread (0x7f24dc100c50).
Cannot move to target thread (0x7f24dc0019c0)

QObject::moveToThread: Current thread (0x7f24dc0019c0) is not the object's thread (0x7f24dc100c50).
Cannot move to target thread (0x7f24dc0019c0)

QObject::moveToThread: Current thread (0x7f24dc0019c0) is not the object's thread (0x7f24dc100c50).
Cannot move to target thread (0x7f24dc0019c0)

QObject::moveToThread: Current thread (0x7f24dc0019c0) is not the object's thread (0x7f24dc100c50).
Cannot move to target thread (0x7f24dc0019c0)

QObject::moveToThread: Current thread (0x7f24dc0019c0) is not the object's thread (0x7f24dc100c50).
Cannot move to target thread (0x7f24dc0019c0)

QObject::moveToThread: Current thread (0x7f24dc0019c0) is not the object's thread (0x7f24dc100c50).
Cannot move to target thread (0x7f24dc0019c0)

QObject::moveToThread: Current thread (0x7f24dc0019c0) is not the object's thread (0x7f24dc100c50).
Cannot move to tar

reward: 97.38063159402375
reward: 95.95907044266751
reward: 93.25270033367876
reward: 90.78136028438419
reward: 93.361631543897
reward: 92.68520005555807
reward: 92.77249300220618
reward: 94.3985243141767
reward: 93.12875050517955
reward: 95.3325056754544
reward: 81.96444041758139
reward: 76.00945084014957
reward: 94.41517933105
reward: 92.15052833761834
reward: 92.200313852986
reward: 92.93450090595188
reward: 92.66680952803739
reward: 94.29514461445157
reward: 92.90853695224784
reward: 98.51368341639066
reward: 97.03153166290186
reward: 93.88636577785967
reward: 91.17595833887212
reward: 92.62289639023625
reward: 92.94377759110391
reward: 92.65619758100206
reward: 94.31076246891752
reward: 92.94470619331098
reward: 96.35078434486596
reward: 76.25136457732317
reward: 94.38307468343584
reward: 91.96682123126355
reward: 92.16779189088308
reward: 92.83517960932309
reward: 92.69570251311096
reward: 94.30735140831386
reward: 92.93654363638983
reward: 95.92714045289813
reward: 79.6642772191

# 2.2 DDPG

In [4]:
ddpg_model = sb3.DDPG("MlpPolicy", train_env, verbose=1, learning_rate=5e-4, tensorboard_log="./tensorboard/ddpg_doupen_tensorboard/")
train_env.render("human")
ddpg_model.learn(total_timesteps=int(10000))
train_env.close()
ddpg_model.save("./model/double_pendulum_ddpg")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./tensorboard/ddpg_doupen_tensorboard/DDPG_3


QObject::moveToThread: Current thread (0x7f46c40019b0) is not the object's thread (0x7f46c41291c0).
Cannot move to target thread (0x7f46c40019b0)

QObject::moveToThread: Current thread (0x7f46c40019b0) is not the object's thread (0x7f46c41291c0).
Cannot move to target thread (0x7f46c40019b0)

QObject::moveToThread: Current thread (0x7f46c40019b0) is not the object's thread (0x7f46c41291c0).
Cannot move to target thread (0x7f46c40019b0)

QObject::moveToThread: Current thread (0x7f46c40019b0) is not the object's thread (0x7f46c41291c0).
Cannot move to target thread (0x7f46c40019b0)

QObject::moveToThread: Current thread (0x7f46c40019b0) is not the object's thread (0x7f46c41291c0).
Cannot move to target thread (0x7f46c40019b0)

QObject::moveToThread: Current thread (0x7f46c40019b0) is not the object's thread (0x7f46c41291c0).
Cannot move to target thread (0x7f46c40019b0)

QObject::moveToThread: Current thread (0x7f46c40019b0) is not the object's thread (0x7f46c41291c0).
Cannot move to tar

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 101       |
|    ep_rew_mean     | -1.04e+04 |
| time/              |           |
|    episodes        | 4         |
|    fps             | 77        |
|    time_elapsed    | 5         |
|    total_timesteps | 404       |
| train/             |           |
|    actor_loss      | 127       |
|    critic_loss     | 1.73e+03  |
|    learning_rate   | 0.0005    |
|    n_updates       | 303       |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 101       |
|    ep_rew_mean     | -1.03e+04 |
| time/              |           |
|    episodes        | 8         |
|    fps             | 89        |
|    time_elapsed    | 9         |
|    total_timesteps | 808       |
| train/             |           |
|    actor_loss      | 288       |
|    critic_loss     | 633       |
|    learning_rate   | 0.0005    |
|    n_updates      

In [5]:
from stable_baselines3.common.evaluation import evaluate_policy
ddpg_model = sb3.DDPG.load("./model/double_pendulum_ddpg.zip")
mean_reward, std_reward = evaluate_policy(ddpg_model, test_env, n_eval_episodes=10, render=True)
print("mean_reward:",mean_reward,"std_reward:",std_reward)

QObject::moveToThread: Current thread (0x7f45f4001770) is not the object's thread (0x7f45f41251a0).
Cannot move to target thread (0x7f45f4001770)

QObject::moveToThread: Current thread (0x7f45f4001770) is not the object's thread (0x7f45f41251a0).
Cannot move to target thread (0x7f45f4001770)

QObject::moveToThread: Current thread (0x7f45f4001770) is not the object's thread (0x7f45f41251a0).
Cannot move to target thread (0x7f45f4001770)

QObject::moveToThread: Current thread (0x7f45f4001770) is not the object's thread (0x7f45f41251a0).
Cannot move to target thread (0x7f45f4001770)

QObject::moveToThread: Current thread (0x7f45f4001770) is not the object's thread (0x7f45f41251a0).
Cannot move to target thread (0x7f45f4001770)

QObject::moveToThread: Current thread (0x7f45f4001770) is not the object's thread (0x7f45f41251a0).
Cannot move to target thread (0x7f45f4001770)

QObject::moveToThread: Current thread (0x7f45f4001770) is not the object's thread (0x7f45f41251a0).
Cannot move to tar

mean_reward: -16366.063647460938 std_reward: 715.1643758213144


# 2.3 PPO

In [24]:
ppo_model = sb3.PPO("MlpPolicy", train_env, verbose=1, learning_rate=5e-4, tensorboard_log="./tensorboard/ppo_doupen_tensorboard/")
train_env.render("human")
ppo_model.learn(total_timesteps=int(10000))
train_env.close()
ddpg_model.save("./model/double_pendulum_ppo")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./tensorboard/ppo_doupen_tensorboard/PPO_1
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 101       |
|    ep_rew_mean     | -1.08e+04 |
| time/              |           |
|    fps             | 129       |
|    iterations      | 1         |
|    time_elapsed    | 15        |
|    total_timesteps | 2048      |
----------------------------------


KeyboardInterrupt: 

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy
ppo_model = sb3.DDPG.load("./model/double_pendulum_ppo.zip")
mean_reward, std_reward = evaluate_policy(ppo_model, test_env, n_eval_episodes=10, render=True)
print("mean_reward:",mean_reward,"std_reward:",std_reward)