In [1]:
%matplotlib tk
import os
import numpy as np
from kusanagi.ghost import control
from kusanagi.ghost import regression
from kusanagi.shell import cartpole
from kusanagi.ghost.algorithms import pilco_, mc_pilco_
from kusanagi.ghost.optimizers import ScipyOptimizer, SGDOptimizer
from kusanagi.base import apply_controller, train_dynamics, ExperienceDataset
from kusanagi import utils
from functools import partial
#np.random.seed(1337)
np.set_printoptions(linewidth=500)

from matplotlib import pyplot as plt
import theano

  "The 'nose-parameterized' package has been renamed 'parameterized'. "
Using cuDNN version 6021 on context None
Mapped name None to device cuda0: GeForce GTX 960M (0000:01:00.0)


In [2]:
# setup output directory
utils.set_output_dir(os.path.join(utils.get_output_dir(), 'cartpole'))

params = cartpole.default_params()
n_rnd = 1                                                # number of random initial trials
n_opt = 100                                              #learning iterations
H = params['max_steps']
gamma = params['discount']
angle_dims = params['angle_dims']

# initial state distribution
p0 = params['state0_dist']
D = p0.mean.size

# init environment
env = cartpole.Cartpole(**params['plant'])

# init policy
#pol = control.NNPolicy(p0.mean, **params['policy'])
#pol.build_network(network_spec=pol.get_default_network_spec(hidden_dims=[20, 20],p=0.05))
pol = control.RBFPolicy(**params['policy'])
randpol = control.RandPolicy(maxU=pol.maxU)

# init dynmodel
#dyn = regression.SSGP_UI(**params['dynamics_model'])
dyn = regression.BNN(**params['dynamics_model'])
dyn.build_network(network_spec=dyn.get_default_network_spec(hidden_dims=[200, 200]))

# init cost model
cost = partial(cartpole.cartpole_loss, **params['cost'])

# create experience dataset
exp = ExperienceDataset()

# init policy optimizer
params['optimizer']['min_method'] = 'adam'
params['optimizer']['max_evals'] = 1000
polopt = SGDOptimizer(**params['optimizer'])

# callback executed after every call to env.step
def step_cb(state, action, cost, info):
    exp.add_sample(state, action, cost, info)
    #env.render()

# function to execute before applying policy
def gTrig(state):
    return utils.gTrig_np(state, angle_dims).flatten()

[2017-07-23 17:23:24.563712] RBFPolicy_sat > Initializing parameters
[2017-07-23 17:23:24.580375] RBFPolicy_sat > Building full GP loss
[2017-07-23 17:23:24.629977] RBFPolicy_sat > Initialising expression graph for prediction
[2017-07-23 17:23:24.645871] RBFPolicy_sat > Compiling mean and variance of prediction
[2017-07-23 17:23:26.407207] RBFPolicy_sat > Done compiling
[2017-07-23 17:23:26.411406] BNN > Building network
InputLayer {'shape': (None, 6), 'name': 'BNN_input'}
DenseLayer {'num_units': 200, 'nonlinearity': <function sigmoid at 0x7f78144cca60>, 'name': 'BNN_fc0'}
DropoutLayer {'p': 0.05, 'rescale': False, 'name': 'BNN_drop0', 'dropout_samples': array(25, dtype=int32)}
DenseLayer {'num_units': 200, 'nonlinearity': <function sigmoid at 0x7f78144cca60>, 'name': 'BNN_fc1'}
DropoutLayer {'p': 0.05, 'rescale': False, 'name': 'BNN_drop1', 'dropout_samples': array(25, dtype=int32)}
DenseLayer {'num_units': 4, 'nonlinearity': <function linear at 0x7f78144d3d90>, 'name': 'BNN_output'}

In [3]:
# during first n_rnd trials, apply randomized controls
for i in range(n_rnd):
    exp.new_episode()
    apply_controller(env, randpol, H,
                     preprocess=gTrig,
                     callback=step_cb)

[2017-07-23 17:23:26.495168] apply_controller > Starting run
[2017-07-23 17:23:26.497162] apply_controller > Running for 4.000000 seconds
[2017-07-23 17:23:26.762595] apply_controller > Done. Stopping robot. Value of run [39.826462]
[2017-07-23 17:23:26.763294] Cartpole > Stopping robot


In [4]:
exp.new_episode()
apply_controller(env, pol, H,
                 preprocess=gTrig,
                 callback=step_cb)
print('')

[2017-07-23 17:23:26.831503] apply_controller > Starting run
[2017-07-23 17:23:26.832904] apply_controller > Running for 4.000000 seconds
[2017-07-23 17:23:27.102914] apply_controller > Done. Stopping robot. Value of run [39.930588]
[2017-07-23 17:23:27.103637] Cartpole > Stopping robot



In [5]:
# train dynamics model
train_dynamics(dyn, exp, angle_dims=angle_dims)

[2017-07-23 17:23:27.169578] train_dynamics > Training dynamics model
[0, 1]
[2017-07-23 17:23:27.173369] train_dynamics > Dataset size:: Inputs: [ (78, 6) ], Targets: [ (78, 4) ]  
[2017-07-23 17:23:27.174015] BNN > Building network
InputLayer {'shape': (None, 6), 'name': 'BNN_input'}
DenseLayer {'num_units': 200, 'nonlinearity': <function sigmoid at 0x7f78144cca60>, 'name': 'BNN_fc0'}
DropoutLayer {'p': 0.05, 'rescale': False, 'name': 'BNN_drop0', 'dropout_samples': array(25, dtype=int32)}
DenseLayer {'num_units': 200, 'nonlinearity': <function sigmoid at 0x7f78144cca60>, 'name': 'BNN_fc1'}
DropoutLayer {'p': 0.05, 'rescale': False, 'name': 'BNN_drop1', 'dropout_samples': array(25, dtype=int32)}
DenseLayer {'num_units': 4, 'nonlinearity': <function linear at 0x7f78144d3d90>, 'name': 'BNN_output'}
[2017-07-23 17:23:27.178842] BNN > Initialising loss function
[2017-07-23 17:23:27.297227] BNN_opt > Building computation graph for gradients
[2017-07-23 17:23:27.365139] BNN_opt > Computing

<kusanagi.ghost.regression.NN.BNN at 0x7f77ff76b748>

In [6]:
rollout_fn = mc_pilco_.build_rollout(pol, dyn, cost, D, angle_dims, n_samples=100, resample_particles=True)
#plt.close('all')

[2017-07-23 17:23:32.738346] mc_pilco.rollout > Building computation graph for state particles propagation


In [7]:
dyn.update()
#pol.update()
loss, costs, trajectories = rollout_fn(p0.mean, p0.cov, H, 1)
n_samples, T, dims = trajectories.shape

state_fig, state_axarr = plt.subplots(dims, sharex=True)

exp_states = np.array(exp.states)
for d in range(dims):
    st = trajectories[:,:,d]
    #plot predictive distribution
    for i in range(n_samples):
        state_axarr[d].plot(np.arange(T-1), st[i,:-1], color='steelblue', alpha=0.3)
    #for i in range(len(exp.states)):
    #    state_axarr[d].plot(np.arange(T-1), exp_states[i,1:,d], color='orange', alpha=0.3)
    #plot experience
    state_axarr[d].plot(np.arange(T-1), np.array(exp.states[-1])[1:H,d], color='red')
    state_axarr[d].plot(np.arange(T-1), st[:,:-1].mean(0), color='purple')
plt.show()

In [8]:
# init policy optimizer
learning_rate = theano.tensor.scalar('lr')
loss, inps, updts = mc_pilco_.get_loss(pol, dyn, cost, D, angle_dims, n_samples=100, resample_particles=True)
inps.append(learning_rate)
polopt.set_objective(loss, pol.get_params(symbolic=True), inps, updts, clip=10.0, learning_rate=learning_rate)

[2017-07-23 17:23:39.809708] mc_pilco.rollout > Building computation graph for state particles propagation
[2017-07-23 17:23:40.292725] SGDOptimizer > Building computation graph for gradients
[2017-07-23 17:23:40.971823] SGDOptimizer > Computing parameter update rules
[2017-07-23 17:23:40.988290] SGDOptimizer > Compiling function for loss
[2017-07-23 17:23:43.935102] SGDOptimizer > Compiling parameter updates


In [9]:
### initial state distribution
polopt.max_evals = 1000
x0 = np.array([st[0] for st in exp.states])
m0 = x0.mean(0)
S0 = np.cov(x0, rowvar=False, ddof=1) + 1e-7*np.eye(x0.shape[1]) if len(x0) > 2 else p0.cov
polopt.minimize(m0, S0, H, gamma, 5e-3, callback=lambda *args, **kwargs: dyn.update())

[2017-07-23 17:24:03.861271] SGDOptimizer > Optimizing parameters
[2017-07-23 17:24:03.980602] SGDOptimizer > Initial loss [0.9996506571769714]
[2K[2017-07-23 17:24:29.356351] SGDOptimizer > Current value: 9.997444E-01, Total evaluations: 50, Avg. time per updt: 0.405802

KeyboardInterrupt: 

In [9]:
polopt.update_params_fn.profile.summary()

Function profiling
  Message: /home/thalassa/gamboa/workspace/kusanagi/kusanagi/ghost/optimizers/sgd_optimizer.py:88
  Time in 58 calls to Function.__call__: 3.313068e+01s
  Time in Function.fn.__call__: 3.312201e+01s (99.974%)
  Time in thunks: 3.310489e+01s (99.922%)
  Total compile time: 2.414831e+01s
    Number of Apply nodes: 241
    Theano Optimizer time: 1.040159e+01s
       Theano validate time: 1.702495e-01s
    Theano Linker time (includes C, CUDA code generation/compiling): 1.368465e+01s
       Import time 5.295134e-02s
       Node make_thunk time 1.366419e+01s
           Node forall_inplace,gpu,grad_of_mc_pilco>rollout_scan}(Elemwise{maximum,no_inplace}.0, InplaceGpuDimShuffle{0,2,1}.0, GpuElemwise{true_div,no_inplace}.0, GpuAlloc<None>{memset_0=True}.0, GpuSubtensor{int64:int64:int64}.0, GpuSubtensor{int64:int64:int64}.0, GpuSubtensor{int64:int64:int64}.0, GpuSubtensor{int64:int64:int64}.0, GpuAlloc<None>.0, GpuAlloc<None>{memset_0=True}.0, GpuAlloc<None>{memset_0=True}.0,

In [11]:
%timeit loss, costs, trajectories = rollout_fn(p0.mean, p0.cov, H, 1)

105 ms ± 482 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [9]:
rollout_fn.profile.summary()

AttributeError: 'NoneType' object has no attribute 'summary'

In [12]:
theano.printing.debugprint(rollout_fn.maker.fgraph.outputs[0])

HostFromGpu(gpuarray) [id A] 'mean'   
 |GpuElemwise{Composite{(i0 / Cast{float32}(i1))}}[(0, 0)]<gpuarray> [id B] ''   
   |GpuDnnReduction{red_op='add', axis=None, acc_dtype='float32', dtype='float32', return_indices=False} [id C] ''   
   | |forall_inplace,gpu,mc_pilco>rollout_scan}.4 [id D] ''   
   |   |H [id E]
   |   |GpuIncSubtensor{InplaceSet;:int64:} [id F] ''   
   |   | |GpuAllocEmpty{dtype='float32', context_name=None} [id G] ''   
   |   | | |Elemwise{Composite{(Switch(LT(maximum(i0, i1), i2), (maximum(i0, i1) + i3), (maximum(i0, i1) - i2)) + i4)}}[(0, 0)] [id H] ''   
   |   | | | |Elemwise{Composite{((i0 - Switch(LT(i1, i2), i1, i2)) + i1)}} [id I] ''   
   |   | | | | |H [id E]
   |   | | | | |TensorConstant{1} [id J]
   |   | | | | |Elemwise{add,no_inplace} [id K] ''   
   |   | | | |   |TensorConstant{1} [id J]
   |   | | | |   |H [id E]
   |   | | | |TensorConstant{2} [id L]
   |   | | | |TensorConstant{1} [id M]
   |   | | | |TensorConstant{1} [id J]
   |   | | | |

In [12]:
from IPython.display import SVG

In [10]:
theano.printing.pydotprint(polopt.update_params_fn.maker.fgraph.outputs[0], outfile='/localdata_ssd/juan/forw.svg', scan_graphs=True, format='svg')

The output file is available at /localdata_ssd/juan/forw_mc_pilco>rollout_scan_108_RBFPolicy_sat>logL_scan_68.svg
The output file is available at /localdata_ssd/juan/forw_mc_pilco>rollout_scan_108.svg
The output file is available at /localdata_ssd/juan/forw.svg


In [11]:
theano.printing.pydotprint(loss, outfile='/localdata_ssd/juan/loss.svg', scan_graphs=True, format='svg')

The output file is available at /localdata_ssd/juan/loss_mc_pilco>rollout_scan_124_RBFPolicy_sat>logL_scan_46.svg
The output file is available at /localdata_ssd/juan/loss_mc_pilco>rollout_scan_124.svg
The output file is available at /localdata_ssd/juan/loss.svg


In [32]:
theano.printing.pydotprint(polopt.update_params_fn.maker.fgraph.outputs[1], outfile='/localdata_ssd/juan/gradsp0.svg', scan_graphs=True, format='svg')

The output file is available at /localdata_ssd/juan/gradsp0_mc_pilco>rollout_scan_160_RBFPolicy_sat>logL_scan_68.svg
The output file is available at /localdata_ssd/juan/gradsp0_mc_pilco>rollout_scan_160.svg
The output file is available at /localdata_ssd/juan/gradsp0_grad_of_mc_pilco>rollout_scan_198_RBFPolicy_sat>logL_scan_197.svg
The output file is available at /localdata_ssd/juan/gradsp0_grad_of_mc_pilco>rollout_scan_198_grad_of_RBFPolicy_sat>logL_scan_605.svg


KeyboardInterrupt: 

In [12]:
theano.printing.debugprint(env.loss_func.maker.fgraph.outputs[0])

HostFromGpu(gpuarray) [id A] ''   
 |GpuElemwise{Composite{(i0 - exp((i1 * i2)))}}[(0, 2)]<gpuarray> [id B] ''   
   |GpuArrayConstant{[ 1.]} [id C]
   |GpuArrayConstant{[-0.5]} [id D]
   |GpuDnnReduction{red_op='add', axis=(1,), acc_dtype='float32', dtype='float32', return_indices=False} [id E] ''   
     |GpuElemwise{Mul}[(0, 0)]<gpuarray> [id F] ''   
       |InplaceGpuDimShuffle{x,0} [id G] ''   
       | |GpuGemv{inplace=True} [id H] ''   
       |   |GpuAllocEmpty{dtype='float32', context_name=None} [id I] ''   
       |   | |TensorConstant{5} [id J]
       |   |TensorConstant{1.0} [id K]
       |   |GpuArrayConstant{[[ 16.   0.   0.   8.   0.]
 [  0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.]
 [  8.   0.   0.   4.   0.]
 [  0.   0.   0.   0.   4.]]} [id L]
       |   |InplaceGpuDimShuffle{1} [id M] ''   
       |   | |GpuElemwise{Sub}[(0, 0)]<gpuarray> [id N] ''   
       |   |   |InplaceGpuDimShuffle{x,0} [id O] ''   
       |   |   | |GpuReshape{1} [id P] ''   
       | 