# Open Sim RL Training
#### Imports

In [1]:
# Dependencies
import numpy as np
import tensorflow as tf
# Environment
from osim.env import L2RunEnv as ENV # rename environment to be used for training

  from ._conv import register_converters as _register_converters


### Agent Class
#### Imports

In [2]:
import keras
import keras.backend as K
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Input, Concatenate
from keras.optimizers import Adam

from rl.processors import WhiteningNormalizerProcessor
from rl.agents import DDPGAgent
from rl.memory import SequentialMemory
from rl.random import OrnsteinUhlenbeckProcess

Using TensorFlow backend.


#### Class
Reference: https://github.com/keras-rl/keras-rl/blob/master/examples/ddpg_mujoco.py

In [3]:
class Agent:
    def __init__(self,env):
        nb_actions = env.action_space.shape[0]
        
        self.env = env
        self.actor = self.build_actor(env)
        self.critic, action_input = self.build_critic(env)
        self.loss = self.build_loss()

        self.memory = SequentialMemory(limit=100000, window_length=1)
        self.random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1)
        self.agent = DDPGAgent(   nb_actions=nb_actions, actor=self.actor, 
                                  critic=self.critic, critic_action_input=action_input,
                                  memory=self.memory, nb_steps_warmup_critic=1000, 
                                  nb_steps_warmup_actor=1000,
                                  random_process=self.random_process, 
                                  gamma=.99, target_model_update=1e-3,
                                  processor=WhiteningNormalizerProcessor()  )
        self.agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=self.loss)

    def build_loss(self):
        return ['mse']

    def build_actor(self,env):
        nb_actions = env.action_space.shape[0]
        actor = Sequential()
        actor.add(Flatten(input_shape=(1,) + env.observation_space.shape))
        actor.add(Dense(400))
        actor.add(Activation('relu'))
        actor.add(Dense(300))
        actor.add(Activation('relu'))
        actor.add(Dense(nb_actions,
                        activation='tanh',
                        kernel_constraint=  keras.constraints.min_max_norm(
                                            min_value=0,
                                            max_value=nb_actions,
                                            axis=1) ) )
        actor.summary()

        inD = Input(shape=(1,) + env.observation_space.shape)
        out = actor(inD)

        return Model(inD,out)

    def build_critic(self,env):
        nb_actions = env.action_space.shape[0]
        action_input = Input(shape=(nb_actions,), name='action_input')
        observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
        flattened_observation = Flatten()(observation_input)
        x = Dense(400)(flattened_observation)
        x = Activation('relu')(x)
        x = Concatenate()([x, action_input])
        x = Dense(300)(x)
        x = Activation('relu')(x)
        x = Dense(1)(x)
        x = Activation('linear')(x)

        critic = Model(inputs=[action_input, observation_input], outputs=x)
        critic.summary()

        return critic, action_input
    
    def fit(self, **kwargs):
        return self.agent.fit(self.env,**kwargs)
    
    def test(self, **kwargs):
        return self.agent.test(self.env,**kwargs)
    
    def save_weights(self,filename='ddpg_{}_weights.h5f'):
        self.agent.save_weights(filename.format("opensim"), overwrite=True)
        
    def load_weights(self,filename='ddpg_{}_weights.h5f'):
        self.agent.load_weights(filename.format("opensim"))

### Environment Class

In [4]:
class TrainEnv(ENV):
    pass
# TODO: define virtual assistant forces on agent
# TODO: define search through easier environments
# TODO: make environment harder once the agent has trained for challenge

# Run Simulation
#### Environment

In [5]:
env = TrainEnv(visualize=True)
observation = env.reset( )

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m




#### Agent

In [6]:
agent = Agent(env)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 41)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 400)               16800     
_________________________________________________________________
activation_1 (Activation)    (None, 400)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 300)               120300    
_________________________________________________________________
activation_2 (Activation)    (None, 300)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 18)                5418      
Total params: 142,518
Trainable params: 142,518
Non-trainable params: 0
_________________________________________________________________
____

### Load previously trained weights

In [7]:
agent.load_weights( )

### Train new weights

In [8]:
for i in range(0): # Train in smaller batches to allow for interuption
    print("\n\niteration:",i)
    agent.fit(nb_steps=2000, visualize=False, verbose=2)
    ## Always save new weights
    agent.save_weights( )

### Test Agent

In [9]:
# Finally, evaluate our algorithm for 5 episodes.
agent.test(nb_episodes=5, visualize=True, nb_max_episode_steps=1000)

Testing for 5 episodes ...
Episode 1: reward: 0.468, steps: 85
Episode 2: reward: 0.459, steps: 88
Episode 3: reward: 0.469, steps: 88
Episode 4: reward: 0.468, steps: 88
Episode 5: reward: 0.473, steps: 88


<keras.callbacks.History at 0xb1aa58b38>