In [1]:
%matplotlib tk
import os
import numpy as np
from kusanagi.ghost import control
from kusanagi.ghost import regression
from kusanagi.shell import cartpole
from kusanagi.ghost.algorithms import pilco_, mc_pilco_
from kusanagi.ghost.optimizers import ScipyOptimizer, SGDOptimizer
from kusanagi.base import apply_controller, train_dynamics, ExperienceDataset
from kusanagi import utils
from functools import partial
#np.random.seed(1337)
np.set_printoptions(linewidth=500)

from matplotlib import pyplot as plt
import theano

In [2]:
# setup output directory
utils.set_output_dir(os.path.join(utils.get_output_dir(), 'cartpole'))

params = cartpole.default_params()
n_rnd = 1                                                # number of random initial trials
n_opt = 100                                              #learning iterations
H = 26#params['max_steps']
gamma = params['discount']
angle_dims = params['angle_dims']

# initial state distribution
p0 = params['state0_dist']
D = p0.mean.size

# init environment
env = cartpole.Cartpole(**params['plant'])

# init policy
#pol = control.NNPolicy(p0.mean, **params['policy'])
#pol.build_network(network_spec=pol.get_default_network_spec(hidden_dims=[20, 20],p=0.05))
pol = control.RBFPolicy(**params['policy'])
randpol = control.RandPolicy(maxU=pol.maxU)

# init dynmodel
#dyn = regression.SSGP_UI(**params['dynamics_model'])
dyn = regression.BNN(**params['dynamics_model'])
dyn.build_network(network_spec=dyn.get_default_network_spec(hidden_dims=[200, 200]))

# init cost model
cost = partial(cartpole.cartpole_loss, **params['cost'])

# create experience dataset
exp = ExperienceDataset()

# init policy optimizer
params['optimizer']['min_method'] = 'adam'
params['optimizer']['max_evals'] = 1000
polopt = SGDOptimizer(**params['optimizer'])

# callback executed after every call to env.step
def step_cb(state, action, cost, info):
    exp.add_sample(state, action, cost, info)
    env.render()

# function to execute before applying policy
def gTrig(state):
    return utils.gTrig_np(state, angle_dims).flatten()

[2017-07-13 09:51:48.787398] RBFPolicy_sat > Initializing parameters
[2017-07-13 09:51:48.801249] RBFPolicy_sat > Building full GP loss
[2017-07-13 09:51:48.847385] RBFPolicy_sat > Initialising expression graph for prediction
[2017-07-13 09:51:48.863013] RBFPolicy_sat > Compiling mean and variance of prediction
[2017-07-13 09:51:49.857707] RBFPolicy_sat > Done compiling
[2017-07-13 09:51:49.873333] BNN > Building network
InputLayer {'shape': (None, 6), 'name': 'BNN_input'}
DenseLayer {'num_units': 200, 'nonlinearity': <function sigmoid at 0x7f01d69b1ae8>, 'name': 'BNN_fc0'}
DropoutLayer {'p': 0.05, 'rescale': False, 'name': 'BNN_drop0', 'dropout_samples': array(25, dtype=int32)}
DenseLayer {'num_units': 200, 'nonlinearity': <function sigmoid at 0x7f01d69b1ae8>, 'name': 'BNN_fc1'}
DropoutLayer {'p': 0.05, 'rescale': False, 'name': 'BNN_drop1', 'dropout_samples': array(25, dtype=int32)}
DenseLayer {'num_units': 4, 'nonlinearity': <function linear at 0x7f01d6950510>, 'name': 'BNN_output'}

In [3]:
# during first n_rnd trials, apply randomized controls
for i in range(n_rnd):
    exp.new_episode()
    apply_controller(env, randpol, H,
                     preprocess=gTrig,
                     callback=step_cb)

[2017-07-13 09:51:49.959822] apply_controller > Starting run
[2017-07-13 09:51:49.968197] apply_controller > Running for 2.600000 seconds
[2017-07-13 09:51:50.162054] apply_controller > Done. Stopping robot. Value of run [25.843404]
[2017-07-13 09:51:50.162859] Cartpole > Stopping robot


In [84]:
exp.new_episode()
apply_controller(env, pol, H,
                 preprocess=gTrig,
                 callback=step_cb)
print('')

[2017-07-13 12:58:22.466420] apply_controller > Starting run
[2017-07-13 12:58:22.467846] apply_controller > Running for 4.000000 seconds
[2017-07-13 12:58:22.765400] apply_controller > Done. Stopping robot. Value of run [11.003082]
[2017-07-13 12:58:22.767646] Cartpole > Stopping robot



In [85]:
# train dynamics model
train_dynamics(dyn, exp, angle_dims=angle_dims)

[2017-07-13 12:58:29.642724] train_dynamics > Training dynamics model
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
[2017-07-13 12:58:29.646944] train_dynamics > Dataset size:: Inputs: [ (343, 6) ], Targets: [ (343, 4) ]  
[2017-07-13 12:58:29.647608] BNN_opt > Optimizing parameters via mini batches
[2017-07-13 12:58:29.655615] BNN_opt > Initial loss [-3.172745697860457]
[2K[2017-07-13 12:58:42.426185] BNN_opt > Current value: -3.362612E+00, Total evaluations: 1000, Avg. time per updt: 0.019044
[2017-07-13 12:58:42.435197] train_dynamics > Done training dynamics model


<kusanagi.ghost.regression.NN.BNN at 0x7f01b9b28390>

In [88]:
rollout_fn = mc_pilco_.build_rollout(pol, dyn, cost, D, angle_dims, n_samples=100, resample_particles=False)
plt.close('all')


[2017-07-13 13:02:59.233807] mc_pilco.rollout > Building computation graph for state particles propagation


In [90]:
state_fig = None
dyn.update()
#pol.update()
H=40
loss, costs, trajectories = rollout_fn(p0.mean, p0.cov, H, 1)
n_samples, T, dims = trajectories.shape

if not state_fig:
    state_fig, state_axarr = plt.subplots(dims, sharex=True)
exp_states = np.array(exp.states)
for d in range(dims):
    st = trajectories[:,:,d]
    #plot predictive distribution
    for i in range(n_samples):
        state_axarr[d].plot(np.arange(T-1), st[i,:-1], color='steelblue', alpha=0.3)
    #for i in range(len(exp.states)):
    #    state_axarr[d].plot(np.arange(T-1), exp_states[i,1:,d], color='orange', alpha=0.3)
    #plot experience
    state_axarr[d].plot(np.arange(T-1), np.array(exp.states[-1])[1:H,d], color='red')
    state_axarr[d].plot(np.arange(T-1), st[:,:-1].mean(0), color='purple')
plt.show()

In [92]:
# init policy optimizer
learning_rate = theano.tensor.scalar('lr')
loss, inps, updts = mc_pilco_.get_loss(pol, dyn, cost, D, angle_dims, n_samples=50, resample_particles=True)
inps.append(learning_rate)
polopt.set_objective(loss, pol.get_params(symbolic=True), inps, updts, clip=10.0, learning_rate=learning_rate)

[2017-07-13 13:46:02.003239] mc_pilco.rollout > Building computation graph for state particles propagation
[2017-07-13 13:46:02.992456] SGDOptimizer > Building computation graph for gradients
[2017-07-13 13:46:04.942065] SGDOptimizer > Computing parameter update rules
[2017-07-13 13:46:04.982579] SGDOptimizer > Compiling function for loss
[2017-07-13 13:46:11.199063] SGDOptimizer > Compiling parameter updates


In [None]:
### initial state distribution
polopt.max_evals = 1000
x0 = np.array([st[0] for st in exp.states])
m0 = x0.mean(0)
S0 = np.cov(x0, rowvar=False, ddof=1) + 1e-7*np.eye(x0.shape[1]) if len(x0) > 2 else p0.cov
polopt.minimize(m0, S0, H, gamma, 1e-2, callback=lambda *args, **kwargs: dyn.update())

[2017-07-13 13:46:53.048362] SGDOptimizer > Optimizing parameters
[2017-07-13 13:46:53.437657] SGDOptimizer > Initial loss [0.3920362848683339]
[2K[2017-07-13 13:56:02.793611] SGDOptimizer > Current value: 3.988480E-01, Total evaluations: 284, Avg. time per updt: 1.625315

In [56]:
dyn.optimizer.loss_fn()

TypeError: Missing required input: BNN>train_inputs

In [83]:
# callback executed after every call to env.step
def step_cb(state, action, cost, info):
    exp.add_sample(state, action, cost, info)
    #env.render()

In [12]:
25*13

325

In [21]:
exp.get_dynmodel_dataset(filter_episodes=[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], deltas=True, angle_dims=[3])[0].shape

(275, 6)

In [60]:
exp_states.shape

(11,)

In [77]:
dyn.build_network(network_spec=dyn.get_default_network_spec(hidden_dims=[1000,1000]))
dyn.optimizer.loss_fn = None

[2017-07-12 22:01:13.252980] BNN > Building network
InputLayer {'shape': (None, 6), 'name': 'BNN_input'}
DenseLayer {'num_units': 1000, 'nonlinearity': <function sigmoid at 0x7fc1cd0beae8>, 'name': 'BNN_fc0'}
DropoutLayer {'p': 0.05, 'rescale': False, 'name': 'BNN_drop0', 'dropout_samples': array(25, dtype=int32)}
DenseLayer {'num_units': 1000, 'nonlinearity': <function sigmoid at 0x7fc1cd0beae8>, 'name': 'BNN_fc1'}
DropoutLayer {'p': 0.05, 'rescale': False, 'name': 'BNN_drop1', 'dropout_samples': array(25, dtype=int32)}
DenseLayer {'num_units': 4, 'nonlinearity': <function linear at 0x7fc1cd05d510>, 'name': 'BNN_output'}
