
# Pendulum-v0

In [1]:
import numpy as np
import gym

from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Input, Concatenate
from keras.optimizers import Adam

from rl.agents import DDPGAgent
from rl.memory import SequentialMemory
from rl.random import OrnsteinUhlenbeckProcess


ENV_NAME = 'Pendulum-v0'

# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
## play for rondom action, without being trained
for i_episode in range(5):
    observation = env.reset()
    for t in range(100):
        env.render()
        print(observation)
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            break
env.close()

[-0.60522038 -0.79605797  0.77697042]
[-0.59263662 -0.80546995  0.31428745]
[-0.61244299 -0.79051476 -0.49637946]
[-0.6450876  -0.76410863 -0.8398128 ]
[-0.69607641 -0.71796771 -1.37560273]
[-0.76055468 -0.64927388 -1.88497898]
[-0.82418531 -0.56632021 -2.09190227]
[-0.89303211 -0.44999295 -2.70553533]
[-0.95194324 -0.3062745  -3.10960923]
[-0.98761391 -0.15690368 -3.0744453 ]
[-0.99992374 -0.01234967 -2.90409456]
[-0.98952612  0.14435391 -3.14419999]
[-0.95911404  0.2830199  -2.84162552]
[-0.91527399  0.40283187 -2.55334943]
[-0.87078438  0.49166509 -1.98784279]
[-0.8223381   0.56899917 -1.82574775]
[-0.78679444  0.61721512 -1.19819921]
[-0.76271211  0.64673815 -0.76203487]
[-0.76050464  0.6493325  -0.06812829]
[-0.78048135  0.62517907  0.62690914]
[-0.81865855  0.57428057  1.27271715]
[-0.85944477  0.51122861  1.50222727]
[-0.90843353  0.41802933  2.10677585]
[-0.94835808  0.31720175  2.16995068]
[-0.97962168  0.20085162  2.4110045 ]
[-0.99648071  0.08382242  2.36612577]
[-0.99955848

[ 0.20325578  0.97912568 -0.90172849]
[ 0.22562597  0.974214   -0.45807108]
[ 0.22586012  0.97415974 -0.00480712]
[0.19045791 0.98169536 0.72394603]
[0.11670624 0.99316648 1.49311534]
[-0.00818957  0.99996646  2.50324951]
[-0.15767944  0.98749035  3.00301175]
[-0.34671359  0.93797105  3.91449666]
[-0.54492257  0.83848637  4.44463373]
[-0.74135001  0.6711186   5.17565868]
[-0.9052176   0.42494835  5.93623594]
[-0.99239709  0.12307724  6.31029439]
[-0.98132255 -0.19236955  6.33932663]
[-0.86887978 -0.49502316  6.48570523]
[-0.68301185 -0.7304073   6.02113194]
[-0.44815809 -0.89395432  5.74348755]
[-0.21894568 -0.97573705  4.87940123]
[-0.02373308 -0.99971833  3.93996984]
[ 0.14620759 -0.98925393  3.40937759]
[ 0.28944244 -0.95719542  2.93821443]
[ 0.38034084 -0.92484639  1.9304103 ]
[ 0.43053137 -0.90257561  1.09833239]
[ 0.45439693 -0.89079932  0.53227348]
[ 0.43923108 -0.89837412 -0.33905008]
[ 0.38024109 -0.92488741 -1.29371281]
[ 0.27968694 -0.96009125 -2.13177869]
[ 0.14624769 -0.98

[-0.49932012 -0.86641757 -5.92204911]
[-0.74268543 -0.66964046 -6.28515702]
[-0.92810572 -0.37231677 -7.04441126]
[-0.99960628 -0.02805857 -7.06883526]
[-0.94442563  0.32872517 -7.26031315]
[-0.77934762  0.62659181 -6.84437878]
[-0.54696435  0.8371559  -6.29780395]
[-0.28026033  0.95992403 -5.89336982]
[-0.03374269  0.99943055 -5.00632399]
[ 0.18247611  0.98321029 -4.34506695]
[ 0.36310903  0.93174666 -3.76196518]
[ 0.49585545  0.86840507 -2.94434257]
[ 0.59169613  0.80616108 -2.2868308 ]
[ 0.64799253  0.76164669 -1.43569197]
[ 0.68745653  0.72622553 -1.0607026 ]
[ 0.70462752  0.70957738 -0.47834342]
[ 0.71313853  0.70102314 -0.24134114]
[0.69288651 0.72104666 0.56961062]
[0.64070886 0.76778393 1.4012689 ]
[0.55232789 0.83362696 2.20534041]
[0.43247291 0.90164693 2.7584101 ]
[0.27521987 0.96138131 3.3683049 ]
[0.05950147 0.99822822 4.38563515]
[-0.19844855  0.98011131  5.18622745]
[-0.4693991   0.88298612  5.77670773]
[-0.72765184  0.68594664  6.52564696]
[-0.91952853  0.39302327  7.03

In [3]:

np.random.seed(123)
env.seed(123)
assert len(env.action_space.shape) == 1
nb_actions = env.action_space.shape[0]


In [4]:
##ACTOR
# Next, we build a very simple model.
actor = Sequential()
actor.add(Flatten(input_shape=(1,) + env.observation_space.shape))
actor.add(Dense(16))
actor.add(Activation('relu'))
actor.add(Dense(16))
actor.add(Activation('relu'))
actor.add(Dense(16))
actor.add(Activation('relu'))
actor.add(Dense(nb_actions))
actor.add(Activation('linear'))
print(actor.summary())


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 3)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 16)                64        
_________________________________________________________________
activation_1 (Activation)    (None, 16)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 16)                272       
_________________________________________________________________
activation_2 (Activation)    (None, 16)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 16)                272       
_________________________________________________________________
activation_3 (Activation)    (None, 16)                0         
__________

In [5]:
###critic
action_input = Input(shape=(nb_actions,), name='action_input')
observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
flattened_observation = Flatten()(observation_input)
x = Concatenate()([action_input, flattened_observation])
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
observation_input (InputLayer)  (None, 1, 3)         0                                            
__________________________________________________________________________________________________
action_input (InputLayer)       (None, 1)            0                                            
__________________________________________________________________________________________________
flatten_2 (Flatten)             (None, 3)            0           observation_input[0][0]          
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 4)            0           action_input[0][0]               
                                                                 flatten_2[0][0]                  
__________

In [6]:

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, target_model_update=1e-3)
agent.compile(Adam(lr=.0001, clipnorm=1.), metrics=['mae'])


In [7]:

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
# agent.fit(env, nb_steps=100000, visualize=False, verbose=1, nb_max_episode_steps=200)

# After training is done, we save the final weights.
# agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)



In [8]:

weights_filename = 'ddpg_{}_weights.h5f'.format(ENV_NAME)

agent.load_weights(weights_filename)


In [11]:
# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=1, visualize=True, nb_max_episode_steps=200)

Testing for 1 episodes ...
Episode 1: reward: -126.412, steps: 200


<keras.callbacks.History at 0x7fbeb8238110>

In [12]:
env.close()

## Research Infinite Solutions LLP

by [Research Infinite Solutions](http://www.researchinfinitesolutions.com/)
(https://busreservation.ris-ai.com/)
(https://www.bets-ai.com/)

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.