In [1]:
%matplotlib tk
import os
import numpy as np
from kusanagi.ghost import control
from kusanagi.ghost import regression
from kusanagi.shell import cartpole
from kusanagi.ghost.algorithms import pilco_, mc_pilco_
from kusanagi.ghost.optimizers import ScipyOptimizer, SGDOptimizer
from kusanagi.base import apply_controller, train_dynamics, ExperienceDataset
from kusanagi import utils
from functools import partial
#np.random.seed(1337)
np.set_printoptions(linewidth=500)

from matplotlib import pyplot as plt
import theano

Using cuDNN version 6021 on context None
Mapped name None to device cuda0: TITAN Xp (0000:01:00.0)


In [2]:
# setup output directory
utils.set_output_dir(os.path.join(utils.get_output_dir(), 'cartpole'))

params = cartpole.default_params()
n_rnd = 1                                                # number of random initial trials
n_opt = 100                                              #learning iterations
n_samples = 100
H = params['max_steps']
gamma = params['discount']
angle_dims = params['angle_dims']

# initial state distribution
p0 = params['state0_dist']
D = p0.mean.size

# init environment
env = cartpole.Cartpole(**params['plant'])

# init policy
pol = control.NNPolicy(p0.mean, **params['policy'])
#pol.build_network(network_spec=pol.get_default_network_spec(hidden_dims=[20, 20],p=0.05))
#pol = control.RBFPolicy(**params['policy'])
randpol = control.RandPolicy(maxU=pol.maxU)

# init dynmodel
#dyn = regression.SSGP_UI(**params['dynamics_model'])
dyn = regression.BNN(**params['dynamics_model'])
dyn.build_network(network_spec=dyn.get_default_network_spec(hidden_dims=[200, 200]))

# init cost model
cost = partial(cartpole.cartpole_loss, **params['cost'])

# create experience dataset
exp = ExperienceDataset()

# init policy optimizer
params['optimizer']['min_method'] = 'adam'
params['optimizer']['max_evals'] = 1000
polopt = SGDOptimizer(**params['optimizer'])

# callback executed after every call to env.step
def step_cb(state, action, cost, info):
    exp.add_sample(state, action, cost, info)
    #env.render()

# function to execute before applying policy
def gTrig(state):
    return utils.gTrig_np(state, angle_dims).flatten()

(<class 'kusanagi.ghost.control.NNPolicy.NNPolicy'>, True)
[2017-07-27 17:06:04.512085] BNN > Building network
('InputLayer', {'shape': (None, 6), 'name': 'BNN_input'})
('DenseLayer', {'name': 'BNN_fc0', 'nonlinearity': <function sigmoid at 0x7f38ddfff9b0>, 'num_units': 200})
('DropoutLayer', {'rescale': False, 'p': 0.05, 'name': 'BNN_drop0', 'dropout_samples': array(25, dtype=int32)})
('DenseLayer', {'name': 'BNN_fc1', 'nonlinearity': <function sigmoid at 0x7f38ddfff9b0>, 'num_units': 200})
('DropoutLayer', {'rescale': False, 'p': 0.05, 'name': 'BNN_drop1', 'dropout_samples': array(25, dtype=int32)})
('DenseLayer', {'name': 'BNN_output', 'nonlinearity': <function linear at 0x7f38de0002a8>, 'num_units': 4})
[2017-07-27 17:06:04.517250] Experience > Initialising new experience dataset


In [3]:
# during first n_rnd trials, apply randomized controls
for i in range(n_rnd):
    exp.new_episode()
    apply_controller(env, randpol, H,
                     preprocess=gTrig,
                     callback=step_cb)

[2017-07-27 17:06:04.590963] apply_controller > Starting run
[2017-07-27 17:06:04.591791] apply_controller > Running for 4.000000 seconds
[2017-07-27 17:06:04.944725] apply_controller > Done. Stopping robot. Value of run [39.983719]
[2017-07-27 17:06:04.945517] Cartpole > Stopping robot


In [11]:
exp.new_episode()
apply_controller(env, pol, H,
                 preprocess=gTrig,
                 callback=step_cb)
print('')

[2017-07-27 17:26:08.719527] apply_controller > Starting run
[2017-07-27 17:26:08.720517] apply_controller > Running for 4.000000 seconds
[2017-07-27 17:26:08.721808] NNPolicy > Initialising expression graph for prediction
[2017-07-27 17:26:08.774554] NNPolicy > Compiling mean and variance of prediction
[2017-07-27 17:26:08.900147] NNPolicy > Done compiling
[2017-07-27 17:26:09.317240] apply_controller > Done. Stopping robot. Value of run [34.225212]
[2017-07-27 17:26:09.318136] Cartpole > Stopping robot



In [12]:
# train dynamics model
train_dynamics(dyn, exp, angle_dims=angle_dims)

[2017-07-27 17:26:11.498438] train_dynamics > Training dynamics model
[0, 1]
[2017-07-27 17:26:11.501018] train_dynamics > Dataset size:: Inputs: [ (78, 6) ], Targets: [ (78, 4) ]  
[2017-07-27 17:26:11.501818] BNN_opt > Optimizing parameters via mini batches
[2017-07-27 17:26:11.503806] BNN_opt > Initial loss [198.492248535]
[2K[2017-07-27 17:26:14.995113] BNN_opt > Curr loss: 6.994233E+00, n_evals: 1000, Avg. time per updt: 0.002838
[2017-07-27 17:26:14.998125] train_dynamics > Done training dynamics model


<kusanagi.ghost.regression.NN.BNN at 0x7f38d4160850>

In [5]:
rollout_fn = mc_pilco_.build_rollout(pol, dyn, cost, D, angle_dims, n_samples=100, resample_particles=True)
#plt.close('all')

[2017-07-27 17:06:23.704481] NNPolicy > Building network
('InputLayer', {'shape': [None, 5], 'name': 'NNPolicy_input'})
('DenseLayer', {'W': <lasagne.init.HeNormal object at 0x7f38c46513d0>, 'name': 'NNPolicy_fc0', 'nonlinearity': <function elu at 0x7f38ddffff50>, 'num_units': 50})
('DropoutLayer', {'rescale': False, 'p': 0.1, 'name': 'NNPolicy_drop0', 'dropout_samples': array(100, dtype=int32)})
('DenseLayer', {'W': <lasagne.init.HeUniform object at 0x7f38c4675d10>, 'name': 'NNPolicy_output', 'nonlinearity': <function linear at 0x7f38de0002a8>, 'num_units': 1})
[2017-07-27 17:06:23.878129] mc_pilco.rollout > Building computation graph for state particles propagation


In [13]:
dyn.update(n_samples)
pol.update(n_samples)
loss, costs, trajectories = rollout_fn(p0.mean, p0.cov, H, 1)
n_samples, T, dims = trajectories.shape

state_fig, state_axarr = plt.subplots(dims, sharex=True)

exp_states = np.array(exp.states)
for d in range(dims):
    st = trajectories[:,:,d]
    #plot predictive distribution
    for i in range(n_samples):
        state_axarr[d].plot(np.arange(T-1), st[i,:-1], color='steelblue', alpha=0.3)
    #for i in range(len(exp.states)):
    #    state_axarr[d].plot(np.arange(T-1), exp_states[i,1:,d], color='orange', alpha=0.3)
    #plot experience
    state_axarr[d].plot(np.arange(T-1), np.array(exp.states[-1])[1:H,d], color='red')
    state_axarr[d].plot(np.arange(T-1), st[:,:-1].mean(0), color='purple')
plt.show()

In [8]:
# init policy optimizer
learning_rate = theano.tensor.scalar('lr')
loss, inps, updts = mc_pilco_.get_loss(pol, dyn, cost, D, angle_dims, n_samples=100, resample_particles=True)
inps.append(learning_rate)
polopt.set_objective(loss, pol.get_params(symbolic=True), inps, updts, clip=10.0, learning_rate=learning_rate)

[2017-07-27 17:07:02.164664] mc_pilco.rollout > Building computation graph for state particles propagation
[2017-07-27 17:07:02.554753] SGDOptimizer > Building computation graph for gradients
[2017-07-27 17:07:03.052644] SGDOptimizer > Computing parameter update rules
[2017-07-27 17:07:03.071048] SGDOptimizer > Compiling function for loss
[2017-07-27 17:07:04.883308] SGDOptimizer > Compiling parameter updates


In [9]:
### initial state distribution
polopt.max_evals = 1000
x0 = np.array([st[0] for st in exp.states])
m0 = x0.mean(0)
S0 = np.cov(x0, rowvar=False, ddof=1) + 1e-7*np.eye(x0.shape[1]) if len(x0) > 2 else p0.cov
polopt.minimize(m0, S0, H, gamma, 5e-3, callback=lambda *args, **kwargs: dyn.update())

[2017-07-27 17:07:17.251487] SGDOptimizer > Optimizing parameters
[2017-07-27 17:07:17.358637] SGDOptimizer > Initial loss [0.988976657391]
[2K[2017-07-27 17:12:44.586076] SGDOptimizer > Curr loss: 7.578629E-01, n_evals: 1000, Avg. time per updt: 0.326532
[2017-07-27 17:12:44.586919] SGDOptimizer > Done training. New loss [0.573032] iter: [942]


In [19]:
polopt.update_params_fn.profile.summary()

Function profiling
  Message: /home/thalassa/gamboa/workspace/kusanagi/kusanagi/ghost/optimizers/sgd_optimizer.py:98
  Time in 99 calls to Function.__call__: 5.939406e+01s
  Time in Function.fn.__call__: 5.938823e+01s (99.990%)
  Time in thunks: 5.936592e+01s (99.953%)
  Total compile time: 2.060179e+01s
    Number of Apply nodes: 208
    Theano Optimizer time: 9.841736e+00s
       Theano validate time: 1.435325e-01s
    Theano Linker time (includes C, CUDA code generation/compiling): 1.069763e+01s
       Import time 4.667449e-02s
       Node make_thunk time 1.068052e+01s
           Node forall_inplace,gpu,grad_of_mc_pilco>rollout_scan}(Elemwise{maximum,no_inplace}.0, InplaceGpuDimShuffle{0,2,1}.0, GpuElemwise{true_div,no_inplace}.0, GpuAlloc<None>{memset_0=True}.0, GpuSubtensor{int64:int64:int64}.0, GpuSubtensor{int64:int64:int64}.0, GpuSubtensor{int64:int64:int64}.0, GpuSubtensor{int64:int64:int64}.0, GpuAlloc<None>.0, GpuAlloc<None>{memset_0=True}.0, GpuAlloc<None>{memset_0=True}.0,

In [14]:
%timeit loss, costs, trajectories = rollout_fn(p0.mean, p0.cov, H, 1)

10 loops, best of 3: 106 ms per loop


In [44]:
r = rollout_fn.maker.fgraph.outputs[0].get_parents()[0].get_parents()[0].get_parents()[0].get_parents()[0].get_parents()[0].get_parents()[0]
r.fgraph.profile.summary()

Function profiling
  Message: /home/thalassa/gamboa/workspace/kusanagi/kusanagi/ghost/algorithms/mc_pilco_.py:169
  Time in 44 calls to Function.__call__: 4.701500e+00s
  Time in Function.fn.__call__: 4.698047e+00s (99.927%)
  Time in thunks: 4.694997e+00s (99.862%)
  Total compile time: 2.285289e+00s
    Number of Apply nodes: 80
    Theano Optimizer time: 1.041116e+00s
       Theano validate time: 1.371765e-02s
    Theano Linker time (includes C, CUDA code generation/compiling): 1.201480e+00s
       Import time 2.764463e-02s
       Node make_thunk time 1.197365e+00s
           Node forall_inplace,gpu,mc_pilco>rollout_scan}(H, GpuIncSubtensor{InplaceSet;:int64:}.0, GpuIncSubtensor{InplaceSet;:int64:}.0, GpuIncSubtensor{InplaceSet;:int64:}.0, H, H, NNPolicy_fc0>W, NNPolicy_drop0>mask, NNPolicy_output>W, BNN_fc0>W, BNN_drop0>mask, BNN_fc1>W, BNN_drop1>mask, BNN_output>W, GpuFromHost<None>.0, GpuAllocDiag{offset=0}.0, InplaceGpuDimShuffle{x,0}.0, InplaceGpuDimShuffle{x,0}.0, InplaceGpuDi

In [45]:
theano.printing.debugprint(polopt.update_params_fn.maker.fgraph.outputs[0])

HostFromGpu(gpuarray) [id A] 'mean'   
 |GpuElemwise{TrueDiv}[(0, 0)]<gpuarray> [id B] ''   
   |GpuDnnReduction{red_op='add', axis=None, acc_dtype='float32', dtype='float32', return_indices=False} [id C] ''   
   | |forall_inplace,gpu,mc_pilco>rollout_scan}.3 [id D] ''   
   |   |H [id E]
   |   |GpuIncSubtensor{InplaceSet;:int64:} [id F] ''   
   |   | |GpuAllocEmpty{dtype='float32', context_name=None} [id G] ''   
   |   | | |Elemwise{Composite{(Switch(LT(maximum(i0, i1), i2), (maximum(i0, i1) + i3), (maximum(i0, i1) - i2)) + i4)}}[(0, 0)] [id H] ''   
   |   | | | |Elemwise{Composite{((i0 - Switch(LT(Composite{Switch(LT(i0, i1), i1, i0)}(Composite{Switch(LT(i0, i1), (i2 - i3), i0)}(Composite{((i0 - (Switch(LT(i1, i2), i2, i1) - i3)) - i3)}((Composite{Switch(GE(Composite{Switch(LT(i0, i1), i2, i0)}(Composite{Switch(LT(i0, i1), (i0 + i2), i0)}(i0, i1, i2), i1, i3), i2), (i2 - i4), Composite{Switch(LT(i0, i1), i2, i0)}(Composite{Switch(LT(i0, i1), (i0 + i2), i0)}(i0, i1, i2), i1, i3))

In [12]:
from IPython.display import SVG

In [46]:
theano.printing.pydotprint(polopt.update_params_fn.maker.fgraph.outputs[0], outfile='/localdata_ssd/juan/forw.svg', scan_graphs=True, format='svg')

The output file is available at /localdata_ssd/juan/forw_mc_pilco>rollout_scan_83.svg
The output file is available at /localdata_ssd/juan/forw.svg


In [11]:
theano.printing.pydotprint(loss, outfile='/localdata_ssd/juan/loss.svg', scan_graphs=True, format='svg')

The output file is available at /localdata_ssd/juan/loss_mc_pilco>rollout_scan_124_RBFPolicy_sat>logL_scan_46.svg
The output file is available at /localdata_ssd/juan/loss_mc_pilco>rollout_scan_124.svg
The output file is available at /localdata_ssd/juan/loss.svg


In [47]:
theano.printing.pydotprint(polopt.update_params_fn.maker.fgraph.outputs[1], outfile='/localdata_ssd/juan/gradsp0.svg', scan_graphs=True, format='svg')

The output file is available at /localdata_ssd/juan/gradsp0_mc_pilco>rollout_scan_120.svg
The output file is available at /localdata_ssd/juan/gradsp0_grad_of_mc_pilco>rollout_scan_156.svg
The output file is available at /localdata_ssd/juan/gradsp0.svg


In [12]:
theano.printing.debugprint(env.loss_func.maker.fgraph.outputs[0])

HostFromGpu(gpuarray) [id A] ''   
 |GpuElemwise{Composite{(i0 - exp((i1 * i2)))}}[(0, 2)]<gpuarray> [id B] ''   
   |GpuArrayConstant{[ 1.]} [id C]
   |GpuArrayConstant{[-0.5]} [id D]
   |GpuDnnReduction{red_op='add', axis=(1,), acc_dtype='float32', dtype='float32', return_indices=False} [id E] ''   
     |GpuElemwise{Mul}[(0, 0)]<gpuarray> [id F] ''   
       |InplaceGpuDimShuffle{x,0} [id G] ''   
       | |GpuGemv{inplace=True} [id H] ''   
       |   |GpuAllocEmpty{dtype='float32', context_name=None} [id I] ''   
       |   | |TensorConstant{5} [id J]
       |   |TensorConstant{1.0} [id K]
       |   |GpuArrayConstant{[[ 16.   0.   0.   8.   0.]
 [  0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.]
 [  8.   0.   0.   4.   0.]
 [  0.   0.   0.   0.   4.]]} [id L]
       |   |InplaceGpuDimShuffle{1} [id M] ''   
       |   | |GpuElemwise{Sub}[(0, 0)]<gpuarray> [id N] ''   
       |   |   |InplaceGpuDimShuffle{x,0} [id O] ''   
       |   |   | |GpuReshape{1} [id P] ''   
       | 