In [1]:
%load_ext autoreload
%autoreload 2

import gym
import numpy as np
import warnings

warnings.simplefilter('ignore')

env = gym.make('MountainCarContinuous-v0')
print('Continuous action space: (%.3f to %.3f)'%(env.action_space.low, env.action_space.high))
print('Reward range: %s'%(str(env.reward_range)))
for i in range(len(env.observation_space.low)):
    print('Observation range, dimension %i: (%.3f to %.3f)'%
          (i,env.observation_space.low[i], env.observation_space.high[i]))

Continuous action space: (-1.000 to 1.000)
Reward range: (-inf, inf)
Observation range, dimension 0: (-1.200 to 0.600)
Observation range, dimension 1: (-0.070 to 0.070)


In [2]:
from agent import DDPG
from visualizations import create_animation

Using TensorFlow backend.


In [3]:
agent = DDPG(env, train_during_episode=True, ou_mu=0, ou_theta=.05, ou_sigma=.25, 
             discount_factor=.999, replay_buffer_size=10000, replay_batch_size=1024,
             tau_actor=.3, tau_critic=.1, 
             relu_alpha_actor=.01, relu_alpha_critic=.01,
             lr_actor=.0001, lr_critic=.005, activation_fn_actor='tanh',
             l2_reg_actor=.01, l2_reg_critic=.01, 
             bn_momentum_actor=0, bn_momentum_critic=.7,
             hidden_layer_sizes_actor=[16,32,16], hidden_layer_sizes_critic=[[16,32],[16,32]], )
agent.print_summary()

Actor model summary:
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
states (InputLayer)          (None, 2)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 16)                48        
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 16)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                544       
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 32)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 16)                528       
_________________________________________________________________
leaky_re_lu_3 (LeakyReLU)    (None, 16)                

In [4]:
agent.train_n_episodes(50, eps=1, eps_decay=1/50, action_repeat=5, 
                       run_tests=True, gen_q_a_frames_every_n_steps=10, )

Episode 1 - epsilon: 0.98, memory size: 39, training score: 94.65, test score: -0.02
Episode 2 - epsilon: 0.96, memory size: 57, training score: 95.78, test score: -0.01
Episode 3 - epsilon: 0.94, memory size: 118, training score: 90.21, test score: -0.01
Episode 4 - epsilon: 0.92, memory size: 147, training score: 94.48, test score: -0.02
Episode 5 - epsilon: 0.90, memory size: 239, training score: 84.68, test score: -0.02
Episode 6 - epsilon: 0.88, memory size: 308, training score: 87.69, test score: -0.01
Episode 7 - epsilon: 0.86, memory size: 498, training score: 69.97, test score: -0.01
Episode 8 - epsilon: 0.84, memory size: 612, training score: 85.64, test score: -0.02
Episode 9 - epsilon: 0.82, memory size: 717, training score: 78.70, test score: -0.01
Episode 10 - epsilon: 0.80, memory size: 866, training score: 78.54, test score: -0.01
Episode 11 - epsilon: 0.78, memory size: 1066, training score: -35.15, test score: -0.23
Episode 12 - epsilon: 0.76, memory size: 1118, train

In [8]:
create_animation(agent, display_mode='video_file', every_n_steps=10)

Using ffmpeg at '/Users/sam/mlenv/lib/python3.6/site-packages/imageio_ffmpeg/binaries/ffmpeg-osx64-v4.1'.
Video saved to training_animation_1561157912.mp4.
