# 1. Make OpenAI Gym like environment
- This example uses DDPG(Deep Deterministic Policy Gradient) with pybullet_env
- pybullet_env prerequisites: Open AI Gym, pybullet.

pip install gym

pip install pybullet

In [1]:
import gym
import pybullet_envs
import time
env = gym.make("InvertedPendulumBulletEnv-v0")
env.render(mode="human")

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: Environment '<class 'pybullet_envs.gym_pendulum_envs.InvertedPendulumBulletEnv'>' has deprecated methods. Compatibility code invoked.[0m


array([], dtype=float64)

In [2]:
print('action space:',env.action_space)
print('action space high,low :',env.action_space.high,env.action_space.low)
print('state space:',env.observation_space)
print('state space high,low :',env.observation_space.high,env.observation_space.low)

action space: Box(1,)
action space high,low : [1.] [-1.]
state space: Box(5,)
state space high,low : [inf inf inf inf inf] [-inf -inf -inf -inf -inf]


# 2. Import RL Algorithm

Base agent needs core agent and an environment to interact.

In [4]:
from rlagent.agents import ExperienceReplayAgent
from rlagent.algorithms import DDPG

In [5]:
state_shape = env.observation_space.shape
action_shape = env.action_space.shape
ddpg = DDPG(state_shape, action_shape, tau=0.01, actor_lr=0.0001, critic_lr=0.001,
            action_noise=True, add_memory=True)
tf_agent = ExperienceReplayAgent(agent=ddpg, env=env, save_steps=10000, model_dir='model')

In [6]:
tf_agent.agent.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
actor (ActorFF)              (None, 1)                 9089      
_________________________________________________________________
critic (QCriticFF)           (None, 1)                 9153      
_________________________________________________________________
target_actor (ActorFF)       (None, 1)                 9089      
_________________________________________________________________
target_critic (QCriticFF)    (None, 1)                 9153      
Total params: 36,484
Trainable params: 36,484
Non-trainable params: 0
_________________________________________________________________


# 3. Train

In [7]:
tf_agent.train(max_training_steps=20000)

INFO:tensorflow:Done running initial ops.
INFO:tensorflow: Episode 1: total reward=23.0000, episode steps=23, trained steps=0
INFO:tensorflow: Episode 2: total reward=20.0000, episode steps=20, trained steps=0
INFO:tensorflow: Episode 3: total reward=20.0000, episode steps=20, trained steps=0
INFO:tensorflow: Episode 4: total reward=25.0000, episode steps=25, trained steps=0
INFO:tensorflow: Episode 5: total reward=20.0000, episode steps=20, trained steps=0
INFO:tensorflow:Added 135 to ReplayBuffer. Starting training.
INFO:tensorflow: Episode 6: total reward=27.0000, episode steps=27, trained steps=0
INFO:tensorflow: Episode 7: total reward=21.0000, episode steps=21, trained steps=20
INFO:tensorflow: Episode 8: total reward=25.0000, episode steps=25, trained steps=45
INFO:tensorflow: Episode 9: total reward=28.0000, episode steps=28, trained steps=73
INFO:tensorflow: Episode 10: total reward=25.0000, episode steps=25, trained steps=98
INFO:tensorflow: Episode 11: total reward=26.0000, 

INFO:tensorflow: Episode 94: total reward=115.0000, episode steps=115, trained steps=6011
INFO:tensorflow: Episode 95: total reward=112.0000, episode steps=112, trained steps=6123
INFO:tensorflow: Episode 96: total reward=223.0000, episode steps=223, trained steps=6346
INFO:tensorflow: Episode 97: total reward=135.0000, episode steps=135, trained steps=6481
INFO:tensorflow: Episode 98: total reward=115.0000, episode steps=115, trained steps=6596
INFO:tensorflow: Episode 99: total reward=171.0000, episode steps=171, trained steps=6767
INFO:tensorflow: Episode 100: total reward=186.0000, episode steps=186, trained steps=6953
INFO:tensorflow: Episode 101: total reward=165.0000, episode steps=165, trained steps=7118
INFO:tensorflow: Episode 102: total reward=81.0000, episode steps=81, trained steps=7199
INFO:tensorflow: Episode 103: total reward=107.0000, episode steps=107, trained steps=7306
INFO:tensorflow: Episode 104: total reward=83.0000, episode steps=83, trained steps=7389
INFO:tens

# 4. Check Trained Model

In [1]:
import gym
import pybullet_envs
env = gym.make("InvertedPendulumBulletEnv-v0")
env.render(mode="human")

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: Environment '<class 'pybullet_envs.gym_pendulum_envs.InvertedPendulumBulletEnv'>' has deprecated methods. Compatibility code invoked.[0m


array([], dtype=float64)

In [3]:
from rlagent.agents import ExperienceReplayAgent
from rlagent.algorithms import DDPG

state_shape = env.observation_space.shape
action_shape = env.action_space.shape
ddpg = DDPG(state_shape, action_shape, tau=0.01, actor_lr=0.0001, critic_lr=0.001,
            action_noise=False, add_memory=False)
tf_agent = ExperienceReplayAgent(agent=ddpg, env=env, save_steps=10000, model_dir='model')

tf_agent.load_model(model_path='model/model-19999')

INFO:tensorflow:Restoring parameters from model/model-19999


In [4]:
tf_agent.act()

INFO:tensorflow: Episode 1: total reward=1000.0000, episode steps=1000, trained steps=20000
INFO:tensorflow: Episode 2: total reward=1000.0000, episode steps=1000, trained steps=20000
INFO:tensorflow: Episode 3: total reward=1000.0000, episode steps=1000, trained steps=20000


KeyboardInterrupt: 