In [1]:
%matplotlib tk
import os
import numpy as np
from kusanagi.ghost import control
from kusanagi.ghost import regression
from kusanagi.shell import cartpole
from kusanagi.ghost.algorithms import pilco_, mc_pilco_
from kusanagi.ghost.optimizers import ScipyOptimizer, SGDOptimizer
from kusanagi.base import apply_controller, train_dynamics, ExperienceDataset
from kusanagi import utils
from functools import partial
#np.random.seed(1337)
np.set_printoptions(linewidth=500)

from matplotlib import pyplot as plt
import theano

Using cuDNN version 6021 on context None
Mapped name None to device cuda0: TITAN Xp (0000:01:00.0)


In [2]:
# setup output directory
utils.set_output_dir(os.path.join(utils.get_output_dir(), 'cartpole'))

params = cartpole.default_params()
n_rnd = 1                                                # number of random initial trials
n_opt = 100                                              #learning iterations
H = params['max_steps']
gamma = params['discount']
angle_dims = params['angle_dims']

# initial state distribution
p0 = params['state0_dist']
D = p0.mean.size

# init environment
env = cartpole.Cartpole(**params['plant'])

# init policy
#pol = control.NNPolicy(p0.mean, **params['policy'])
#pol.build_network(network_spec=pol.get_default_network_spec(hidden_dims=[20, 20],p=0.05))
pol = control.RBFPolicy(**params['policy'])
randpol = control.RandPolicy(maxU=pol.maxU)

# init dynmodel
#dyn = regression.SSGP_UI(**params['dynamics_model'])
dyn = regression.BNN(**params['dynamics_model'])
dyn.build_network(network_spec=dyn.get_default_network_spec(hidden_dims=[200, 200]))

# init cost model
cost = partial(cartpole.cartpole_loss, **params['cost'])

# create experience dataset
exp = ExperienceDataset()

# init policy optimizer
params['optimizer']['min_method'] = 'adam'
params['optimizer']['max_evals'] = 1000
polopt = SGDOptimizer(**params['optimizer'])

# callback executed after every call to env.step
def step_cb(state, action, cost, info):
    exp.add_sample(state, action, cost, info)
    #env.render()

# function to execute before applying policy
def gTrig(state):
    return utils.gTrig_np(state, angle_dims).flatten()

[2017-07-14 15:59:32.792075] RBFPolicy_sat > Initializing parameters
[2017-07-14 15:59:32.812474] RBFPolicy_sat > Building full GP loss
[2017-07-14 15:59:32.868581] RBFPolicy_sat > Initialising expression graph for prediction
[2017-07-14 15:59:32.883670] RBFPolicy_sat > Compiling mean and variance of prediction
local_gpu_cholesky
cusolver available!
float32!
GpuCholesky{lower=True, inplace=False}
[2017-07-14 15:59:34.106024] RBFPolicy_sat > Done compiling
[2017-07-14 15:59:34.111667] BNN > Building network
('InputLayer', {'shape': (None, 6), 'name': 'BNN_input'})
('DenseLayer', {'name': 'BNN_fc0', 'nonlinearity': <function sigmoid at 0x7f0e631b2410>, 'num_units': 200})
('DropoutLayer', {'rescale': False, 'p': 0.05, 'name': 'BNN_drop0', 'dropout_samples': array(25, dtype=int32)})
('DenseLayer', {'name': 'BNN_fc1', 'nonlinearity': <function sigmoid at 0x7f0e631b2410>, 'num_units': 200})
('DropoutLayer', {'rescale': False, 'p': 0.05, 'name': 'BNN_drop1', 'dropout_samples': array(25, dtype

In [3]:
# during first n_rnd trials, apply randomized controls
for i in range(n_rnd):
    exp.new_episode()
    apply_controller(env, randpol, H,
                     preprocess=gTrig,
                     callback=step_cb)

[2017-07-14 15:59:34.190493] apply_controller > Starting run
[2017-07-14 15:59:34.191752] apply_controller > Running for 4.000000 seconds
[2017-07-14 15:59:34.539191] apply_controller > Done. Stopping robot. Value of run [38.476944]
[2017-07-14 15:59:34.539963] Cartpole > Stopping robot


In [4]:
exp.new_episode()
apply_controller(env, pol, H,
                 preprocess=gTrig,
                 callback=step_cb)
print('')

[2017-07-14 00:47:51.695186] apply_controller > Starting run
[2017-07-14 00:47:51.696194] apply_controller > Running for 4.000000 seconds
[2017-07-14 00:47:52.076872] apply_controller > Done. Stopping robot. Value of run [39.985435]
[2017-07-14 00:47:52.077688] Cartpole > Stopping robot



In [4]:
# train dynamics model
train_dynamics(dyn, exp, angle_dims=angle_dims)

[2017-07-14 15:59:35.232216] train_dynamics > Training dynamics model
[0]
[2017-07-14 15:59:35.237101] train_dynamics > Dataset size:: Inputs: [ (39, 6) ], Targets: [ (39, 4) ]  
[2017-07-14 15:59:35.237767] BNN > Building network
('InputLayer', {'shape': (None, 6), 'name': 'BNN_input'})
('DenseLayer', {'name': 'BNN_fc0', 'nonlinearity': <function sigmoid at 0x7f0e631b2410>, 'num_units': 200})
('DropoutLayer', {'rescale': False, 'p': 0.05, 'name': 'BNN_drop0', 'dropout_samples': array(25, dtype=int32)})
('DenseLayer', {'name': 'BNN_fc1', 'nonlinearity': <function sigmoid at 0x7f0e631b2410>, 'num_units': 200})
('DropoutLayer', {'rescale': False, 'p': 0.05, 'name': 'BNN_drop1', 'dropout_samples': array(25, dtype=int32)})
('DenseLayer', {'name': 'BNN_output', 'nonlinearity': <function linear at 0x7f0e631b2c80>, 'num_units': 4})
[2017-07-14 15:59:35.241779] BNN > Initialising loss function
[2017-07-14 15:59:35.377447] BNN_opt > Building computation graph for gradients
[2017-07-14 15:59:35.

<kusanagi.ghost.regression.NN.BNN at 0x7f0e50d2da90>

In [5]:
rollout_fn = mc_pilco_.build_rollout(pol, dyn, cost, D, angle_dims, n_samples=100, resample_particles=True)
#plt.close('all')

[2017-07-14 15:59:42.207122] mc_pilco.rollout > Building computation graph for state particles propagation
local_gpu_cholesky
cusolver available!
float32!
GpuCholesky{lower=True, inplace=False}
local_gpu_cholesky
cusolver available!
float32!
GpuCholesky{lower=True, inplace=False}
local_gpu_cholesky
cusolver available!
float32!
GpuCholesky{lower=True, inplace=False}
local_gpu_cholesky
cusolver available!
float32!
GpuCholesky{lower=True, inplace=False}


In [6]:
dyn.update()
#pol.update()
loss, costs, trajectories = rollout_fn(p0.mean, p0.cov, H, 1)
n_samples, T, dims = trajectories.shape

state_fig, state_axarr = plt.subplots(dims, sharex=True)

exp_states = np.array(exp.states)
for d in range(dims):
    st = trajectories[:,:,d]
    #plot predictive distribution
    for i in range(n_samples):
        state_axarr[d].plot(np.arange(T-1), st[i,:-1], color='steelblue', alpha=0.3)
    #for i in range(len(exp.states)):
    #    state_axarr[d].plot(np.arange(T-1), exp_states[i,1:,d], color='orange', alpha=0.3)
    #plot experience
    state_axarr[d].plot(np.arange(T-1), np.array(exp.states[-1])[1:H,d], color='red')
    state_axarr[d].plot(np.arange(T-1), st[:,:-1].mean(0), color='purple')
plt.show()

In [7]:
# init policy optimizer
learning_rate = theano.tensor.scalar('lr')
loss, inps, updts = mc_pilco_.get_loss(pol, dyn, cost, D, angle_dims, n_samples=100, resample_particles=True)
inps.append(learning_rate)
polopt.set_objective(loss, pol.get_params(symbolic=True), inps, updts, clip=10.0, learning_rate=learning_rate)

[2017-07-14 15:59:49.205051] mc_pilco.rollout > Building computation graph for state particles propagation
[2017-07-14 15:59:49.732656] SGDOptimizer > Building computation graph for gradients
[2017-07-14 15:59:50.480523] SGDOptimizer > Computing parameter update rules
[2017-07-14 15:59:50.494770] SGDOptimizer > Compiling function for loss
local_gpu_cholesky
cusolver available!
float32!
GpuCholesky{lower=True, inplace=False}
local_gpu_cholesky
cusolver available!
float32!
GpuCholesky{lower=True, inplace=False}
local_gpu_cholesky
cusolver available!
float32!
GpuCholesky{lower=True, inplace=False}
[2017-07-14 15:59:54.346904] SGDOptimizer > Compiling parameter updates
local_gpu_cholesky
cusolver available!
float32!
GpuCholesky{lower=True, inplace=False}
local_gpu_cholesky
cusolver available!
float32!
GpuCholesky{lower=True, inplace=False}
local_gpu_cholesky
cusolver available!
float32!
GpuCholesky{lower=True, inplace=False}
local_gpu_cholesky
cusolver available!
float32!
GpuCholesky{lower

In [8]:
### initial state distribution
polopt.max_evals = 1000
x0 = np.array([st[0] for st in exp.states])
m0 = x0.mean(0)
S0 = np.cov(x0, rowvar=False, ddof=1) + 1e-7*np.eye(x0.shape[1]) if len(x0) > 2 else p0.cov
polopt.minimize(m0, S0, H, gamma, 5e-3, callback=lambda *args, **kwargs: dyn.update())

[2017-07-14 16:00:18.583296] SGDOptimizer > Optimizing parameters
[2017-07-14 16:00:18.737104] SGDOptimizer > Initial loss [0.99305832386]
[2K[2017-07-14 16:01:00.697153] SGDOptimizer > Current value: 9.924074E-01, Total evaluations: 59, Avg. time per updt: 0.571990

KeyboardInterrupt: 

In [9]:
polopt.update_params_fn.profile.summary()

Function profiling
  Message: /home/thalassa/gamboa/workspace/kusanagi/kusanagi/ghost/optimizers/sgd_optimizer.py:88
  Time in 58 calls to Function.__call__: 3.313068e+01s
  Time in Function.fn.__call__: 3.312201e+01s (99.974%)
  Time in thunks: 3.310489e+01s (99.922%)
  Total compile time: 2.414831e+01s
    Number of Apply nodes: 241
    Theano Optimizer time: 1.040159e+01s
       Theano validate time: 1.702495e-01s
    Theano Linker time (includes C, CUDA code generation/compiling): 1.368465e+01s
       Import time 5.295134e-02s
       Node make_thunk time 1.366419e+01s
           Node forall_inplace,gpu,grad_of_mc_pilco>rollout_scan}(Elemwise{maximum,no_inplace}.0, InplaceGpuDimShuffle{0,2,1}.0, GpuElemwise{true_div,no_inplace}.0, GpuAlloc<None>{memset_0=True}.0, GpuSubtensor{int64:int64:int64}.0, GpuSubtensor{int64:int64:int64}.0, GpuSubtensor{int64:int64:int64}.0, GpuSubtensor{int64:int64:int64}.0, GpuAlloc<None>.0, GpuAlloc<None>{memset_0=True}.0, GpuAlloc<None>{memset_0=True}.0,

In [23]:
%timeit loss, costs, trajectories = rollout_fn(p0.mean, p0.cov, H, 1)

10 loops, best of 3: 147 ms per loop


In [24]:
rollout_fn.profile.summary()

Function profiling
  Message: /home/thalassa/gamboa/workspace/kusanagi/kusanagi/ghost/algorithms/mc_pilco_.py:196
  Time in 410 calls to Function.__call__: 6.121445e+01s
  Time in Function.fn.__call__: 6.117908e+01s (99.942%)
  Time in thunks: 6.114092e+01s (99.880%)
  Total compile time: 4.033573e+00s
    Number of Apply nodes: 102
    Theano Optimizer time: 2.006438e+00s
       Theano validate time: 1.815152e-02s
    Theano Linker time (includes C, CUDA code generation/compiling): 1.969665e+00s
       Import time 2.098870e-02s
       Node make_thunk time 1.964165e+00s
           Node forall_inplace,gpu,mc_pilco>rollout_scan}(H, GpuIncSubtensor{InplaceSet;:int64:}.0, GpuIncSubtensor{InplaceSet;:int64:}.0, GpuIncSubtensor{InplaceSet;:int64:}.0, GpuIncSubtensor{InplaceSet;:int64:}.0, H, H, RBFPolicy_sat>X, BNN_fc0>W, BNN_drop0>mask, BNN_fc1>W, BNN_drop1>mask, BNN_output>W, GpuFromHost<None>.0, GpuFromHost<None>.0, InplaceGpuDimShuffle{x,0}.0, InplaceGpuDimShuffle{x,0}.0, InplaceGpuDimSh

In [33]:
theano.printing.debugprint(polopt.update_params_fn.maker.fgraph.outputs[1])

HostFromGpu(gpuarray) [id A] ''   
 |GpuElemwise{Mul}[(0, 0)]<gpuarray> [id B] ''   
   |GpuSubtensor{int64} [id C] ''   
   | |forall_inplace,gpu,grad_of_mc_pilco>rollout_scan}.5 [id D] ''   
   | | |Elemwise{maximum,no_inplace} [id E] ''   
   | | | |Elemwise{Composite{minimum(maximum(maximum(((i0 + i1) - i2), ((i3 + i1) - i2)), ((i4 + i1) - i2)), i5)}} [id F] ''   
   | | | | |Elemwise{add,no_inplace} [id G] ''   
   | | | | | |TensorConstant{-1} [id H]
   | | | | | |Elemwise{add,no_inplace} [id I] ''   
   | | | | |   |TensorConstant{1} [id J]
   | | | | |   |Elemwise{Cast{int64}} [id K] ''   
   | | | | |     |H [id L]
   | | | | |TensorConstant{1} [id J]
   | | | | |TensorConstant{1} [id M]
   | | | | |Elemwise{add,no_inplace} [id N] ''   
   | | | | | |TensorConstant{-1} [id H]
   | | | | | |Elemwise{add,no_inplace} [id I] ''   
   | | | | |Elemwise{add,no_inplace} [id O] ''   
   | | | | | |TensorConstant{-1} [id H]
   | | | | | |Elemwise{add,no_inplace} [id I] ''   
   | | | |

 > | | | | | |   |       | |   |   | | | | | | | | | |   | | | |   | |   |     | | | | | | | |       | |GpuAllocEmpty{dtype='float32', context_name=None} [id XD] ''   
 > | | | | | |   |       | |   |   | | | | | | | | | |   | | | |   | |   |     | | | | | | | |       | | |TensorConstant{5} [id XE]
 > | | | | | |   |       | |   |   | | | | | | | | | |   | | | |   | |   |     | | | | | | | |       | |TensorConstant{1.0} [id LY]
 > | | | | | |   |       | |   |   | | | | | | | | | |   | | | |   | |   |     | | | | | | | |       | |GpuCusolverSolve{A_structure='general', trans='N', inplace=False} [id XF] ''   
 > | | | | | |   |       | |   |   | | | | | | | | | |   | | | |   | |   |     | | | | | | | |       | | |GpuContiguous [id XG] ''   
 > | | | | | |   |       | |   |   | | | | | | | | | |   | | | |   | |   |     | | | | | | | |       | | | |InplaceGpuDimShuffle{1,0} [id XH] ''   
 > | | | | | |   |       | |   |   | | | | | | | | | |   | | | |   | |   |     | | | | | | | |       |

 >               |TensorConstant{-1.0} [id BVX]
 >               |GpuElemwise{TrueDiv}[(0, 0)]<gpuarray> [id BNZ] ''   
 >               |GpuElemwise{TrueDiv}[(0, 0)]<gpuarray> [id BNZ] ''   
 >GpuElemwise{mul,no_inplace} [id BVY] ''   
 > |gamma[t-1][Gpua] [id BVZ] -> [id DI]
 > |gamma_copy[Gpua] [id BWA] -> [id DP]
 >GPUA_mrg_uniform{GpuArrayType<None>(float32, vector),no_inplace}.0 [id BQY] ''   
 >GPUA_mrg_uniform{GpuArrayType<None>(float32, vector),no_inplace}.0 [id BVA] ''   
 >GpuElemwise{Composite{(i0 * (i1 + ((-exp((i2 * i3))) / sqrt(i4))))}}[]<gpuarray> [id BWB] ''   
 > |gamma[t-1][Gpua] [id BVZ] -> [id DI]
 > |GpuArrayConstant{1.0} [id BWC]
 > |GpuArrayConstant{-0.5} [id BWD]
 > |InplaceGpuDimShuffle{} [id BWE] ''   
 > | |GpuGemv{inplace=True} [id BWF] ''   
 > |   |GpuAllocEmpty{dtype='float32', context_name=None} [id BWG] ''   
 > |   | |TensorConstant{1} [id BWH]
 > |   |TensorConstant{1.0} [id BQM]
 > |   |InplaceGpuDimShuffle{x,0} [id BWI] ''   
 > |   | |GpuElemwise{

In [12]:
from IPython.display import SVG

In [10]:
theano.printing.pydotprint(polopt.update_params_fn.maker.fgraph.outputs[0], outfile='/localdata_ssd/juan/forw.svg', scan_graphs=True, format='svg')

The output file is available at /localdata_ssd/juan/forw_mc_pilco>rollout_scan_108_RBFPolicy_sat>logL_scan_68.svg
The output file is available at /localdata_ssd/juan/forw_mc_pilco>rollout_scan_108.svg
The output file is available at /localdata_ssd/juan/forw.svg


In [11]:
theano.printing.pydotprint(loss, outfile='/localdata_ssd/juan/loss.svg', scan_graphs=True, format='svg')

The output file is available at /localdata_ssd/juan/loss_mc_pilco>rollout_scan_124_RBFPolicy_sat>logL_scan_46.svg
The output file is available at /localdata_ssd/juan/loss_mc_pilco>rollout_scan_124.svg
The output file is available at /localdata_ssd/juan/loss.svg


In [32]:
theano.printing.pydotprint(polopt.update_params_fn.maker.fgraph.outputs[1], outfile='/localdata_ssd/juan/gradsp0.svg', scan_graphs=True, format='svg')

The output file is available at /localdata_ssd/juan/gradsp0_mc_pilco>rollout_scan_160_RBFPolicy_sat>logL_scan_68.svg
The output file is available at /localdata_ssd/juan/gradsp0_mc_pilco>rollout_scan_160.svg
The output file is available at /localdata_ssd/juan/gradsp0_grad_of_mc_pilco>rollout_scan_198_RBFPolicy_sat>logL_scan_197.svg
The output file is available at /localdata_ssd/juan/gradsp0_grad_of_mc_pilco>rollout_scan_198_grad_of_RBFPolicy_sat>logL_scan_605.svg


KeyboardInterrupt: 

In [None]:
theano.printing.pydotprint(polopt.update_params_fn.maker.fgraph.outputs[2], outfile='/localdata_ssd/juan/gradsp1.svg', scan_graphs=True, format='svg')