In [1]:
# Derived from keras-rl
import opensim as osim
import numpy as np
import sys

from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Input, concatenate
from keras.optimizers import Adam

import numpy as np

from rl.agents import DDPGAgent
from rl.memory import SequentialMemory
from rl.random import OrnsteinUhlenbeckProcess

from osim.env.arm import ArmEnv

from keras.optimizers import RMSprop

import argparse
import math

Using TensorFlow backend.


In [2]:
# Load walking environment
env = ArmEnv(True)
env.reset()

# Total number of steps in training
nallsteps = 10000

In [3]:
# Create networks for DDPG
# Next, we build a very simple model.
actor = Sequential()
actor.add(Flatten(input_shape=(1,) + env.observation_space.shape))
actor.add(Dense(32))
actor.add(Activation('relu'))
actor.add(Dense(32))
actor.add(Activation('relu'))
actor.add(Dense(32))
actor.add(Activation('relu'))
actor.add(Dense(nb_actions))
actor.add(Activation('sigmoid'))
print(actor.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 14)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 32)                480       
_________________________________________________________________
activation_1 (Activation)    (None, 32)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                1056      
_________________________________________________________________
activation_2 (Activation)    (None, 32)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 32)                1056      
_________________________________________________________________
activation_3 (Activation)    (None, 32)                0         
__________

In [4]:
action_input = Input(shape=(nb_actions,), name='action_input')
observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
flattened_observation = Flatten()(observation_input)
x = concatenate([action_input, flattened_observation])
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
observation_input (InputLayer)   (None, 1, 14)         0                                            
____________________________________________________________________________________________________
action_input (InputLayer)        (None, 6)             0                                            
____________________________________________________________________________________________________
flatten_2 (Flatten)              (None, 14)            0                                            
____________________________________________________________________________________________________
concatenate_1 (Concatenate)      (None, 20)            0                                            
___________________________________________________________________________________________

In [5]:
# Set up the agent for training
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, target_model_update=1e-3,
                  delta_clip=1.)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

In [6]:
# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env, nb_steps=50000, visualize=False, verbose=1, nb_max_episode_steps=200, log_interval=10000)
# After training is done, we save the final weights.
#    agent.save_weights(args.model, overwrite=True)

Training for 10000 steps ...
Interval 1 (0 steps performed)

Distance: 1.462456
True positions: (-0.775025,-0.675188)
Reached: (-1.566736,-0.004444)
  100/10000 [..............................] - ETA: 182s - reward: -0.7521
Distance: 0.658063
True positions: (0.149549,-0.228965)
Reached: (-0.406994,-0.330485)
  200/10000 [..............................] - ETA: 194s - reward: -0.7154
Distance: 1.648409
True positions: (-0.436057,-0.518591)
Reached: (-1.567838,-0.001964)
  300/10000 [..............................] - ETA: 240s - reward: -0.7834
Distance: 1.039672
True positions: (0.041094,-0.158712)
Reached: (-0.781505,-0.375786)
  400/10000 [>.............................] - ETA: 227s - reward: -0.7174
Distance: 1.555081
True positions: (-0.456883,-0.446139)
Reached: (-1.567824,-0.001999)
  500/10000 [>.............................] - ETA: 229s - reward: -0.7441
Distance: 0.472841
True positions: (-0.756667,-0.663398)
Reached: (-0.869688,-0.303578)
  600/10000 [>........................

Distance: 1.857073
True positions: (-0.531581,-0.833223)
Reached: (-1.564839,-0.009408)
Distance: 2.968165
True positions: (-0.956974,-0.313639)
Reached: (-0.528744,-2.853575)
Distance: 1.761903
True positions: (-0.115508,-0.311346)
Reached: (-1.567891,-0.001827)
Distance: 1.845603
True positions: (-0.982332,-0.100574)
Reached: (0.112800,-0.851045)
Distance: 0.942085
True positions: (-1.059572,-0.446698)
Reached: (-1.564688,-0.009730)
Distance: 1.821335
True positions: (-0.077805,-0.355466)
Reached: (0.340218,-1.758778)
Distance: 1.848909
True positions: (-0.511823,-0.794789)
Reached: (-1.567859,-0.001916)
Distance: 0.721368
True positions: (0.102164,-0.304120)
Reached: (-0.478851,-0.444473)
Distance: 1.803081
True positions: (0.067109,-0.180366)
Reached: (-1.565021,-0.009416)
Distance: 0.878687
True positions: (0.189819,-0.222802)
Reached: (-0.456033,-0.455637)
Distance: 1.675537
True positions: (-0.020565,-0.141019)
Reached: (-1.564724,-0.009641)
Distance: 2.680756
True positions: (-

done, took 250.614 seconds


<keras.callbacks.History at 0x7fdc5b614310>

In [7]:
# agent.load_weights(args.model)
# Finally, evaluate our algorithm for 1 episode.
agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=1000)

Testing for 5 episodes ...

Distance: 1.116082
True positions: (-0.803720,-0.364419)
Reached: (-1.564910,-0.009527)

Distance: 0.168364
True positions: (-0.610786,-0.272151)
Reached: (-0.471284,-0.243289)

Distance: 0.598890
True positions: (-1.192979,-0.217319)
Reached: (-0.700967,-0.324197)

Distance: 0.647045
True positions: (-0.388098,-0.196909)
Reached: (-0.825401,-0.406651)

Distance: 0.215215
True positions: (-0.556208,-0.379026)
Reached: (-0.399488,-0.320530)

Distance: 0.670461
True positions: (-0.915811,-0.763724)
Reached: (-0.731732,-0.277342)

Distance: 0.515910
True positions: (-0.753540,-0.108279)
Reached: (-0.991832,-0.385896)

Distance: 0.670115
True positions: (-0.180781,-0.773335)
Reached: (-0.473330,-0.395769)

Distance: 0.649070
True positions: (-0.451092,-0.961598)
Reached: (-0.442753,-0.320867)

Distance: 0.796233
True positions: (-0.516566,-0.052818)
Reached: (-1.012637,-0.352980)
Episode 1: reward: -498.678, steps: 1000

Distance: 2.459794
True positions: (0.169

<keras.callbacks.History at 0x7fdc3a262bd0>

In [8]:
agent.load_weights("../models/example.h5f")
# Finally, evaluate our algorithm for 1 episode.
agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=1000)

Testing for 5 episodes ...

Distance: 1.242484
True positions: (-0.387711,-0.071820)
Reached: (-1.565560,-0.007185)

Distance: 0.094699
True positions: (-0.306534,-0.320054)
Reached: (-0.279753,-0.387973)

Distance: 0.342258
True positions: (-0.041413,-0.225383)
Reached: (-0.127826,-0.481228)

Distance: 0.071426
True positions: (0.074357,-0.365959)
Reached: (0.126484,-0.346660)

Distance: 0.273507
True positions: (-0.254339,-0.147204)
Reached: (-0.267362,-0.407688)

Distance: 0.170239
True positions: (-0.658603,-0.784083)
Reached: (-0.772865,-0.728107)

Distance: 0.219391
True positions: (0.178447,-0.411043)
Reached: (0.255614,-0.268819)

Distance: 0.035296
True positions: (-0.170949,-0.637620)
Reached: (-0.139976,-0.641944)

Distance: 0.231731
True positions: (-0.592417,-0.710361)
Reached: (-0.479023,-0.592024)

Distance: 0.379548
True positions: (-0.430683,-0.941759)
Reached: (-0.332039,-0.660855)
Episode 1: reward: -253.070, steps: 1000

Distance: 1.895049
True positions: (-0.574247

<keras.callbacks.History at 0x7fdc3a167c50>