In [1]:
%load_ext autoreload
%autoreload 2
'''
Example of how to use the library for learning using the PILCO learner on the cartpole task
'''
# pylint: disable=C0103
import atexit
import sys
import os
import numpy as np
import kusanagi.ghost.regression as kreg

from kusanagi import utils
from kusanagi.shell.cartpole import default_params
from kusanagi.ghost.learners.PILCO import PILCO, MC_PILCO
from kusanagi.ghost.control import NNPolicy
from kusanagi.utils import plot_results

#np.random.seed(31337)
np.set_printoptions(linewidth=500)

In [2]:
if __name__ == '__main__':
    # setup output directory
    utils.set_output_dir(os.path.join(utils.get_output_dir(), 'cartpole'))

    use_bnn = True
    J = 1                                                       # number of random initial trials
    N = 100                                                     #learning iterations
    learner_params = default_params()
    # initialize learner
    learner_params['params']['use_empirical_x0'] = True
    learner_params['params']['realtime'] = False
    learner_params['params']['H'] = 4.0
    learner_params['params']['plant']['dt'] = 0.1
    learner_params['params']['plant']['params']['l'] = .6
    learner_params['params']['cost']['pendulum_length'] = .6

    # dropout network (BNN) based PILCO
    learner_params['params']['min_method'] = 'ADAM'
    learner_params['params']['learning_rate'] = 1e-3
    learner_params['params']['max_evals'] = 1000
    learner_params['params']['clip'] = 1.0
    learner_params['dynmodel_class'] = kreg.BNN
    #learner_params['policy_class'] = NNPolicy

    learner = MC_PILCO(**learner_params)
    try:
        learner.load()
    except Exception:
        utils.print_with_stamp('Unable to learner state', 'main')
    # register exit hook
    atexit.register(learner.stop)
    
    #if we have no prior data, run trials with random controls
    if learner.experience.n_samples() == 0:
        # gather data with random trials
        for i in range(J):
            learner.plant.reset_state()
            learner.apply_controller(random_controls=True)
    # otherwise, check if we need to collect data with the current policy parameters
    else:
        last_pp = learner.experience.policy_parameters[-1]
        current_pp = learner.policy.get_params(symbolic=False)
        should_run = True
        for lastp, curp in zip(last_pp, current_pp):
            should_run = should_run and not np.allclose(lastp, curp)

        if should_run:
            learner.plant.reset_state()
            learner.apply_controller()

[2017-05-01 14:56:35.494303] RBFPolicy_sat > Initializing parameters
[2017-05-01 14:56:35.501123] RBFPolicy_sat > Initialising expression graph for full GP training loss function
[2017-05-01 14:56:35.638161] RBFPolicy_sat > Initialising expression graph for prediction
[2017-05-01 14:56:35.654210] RBFPolicy_sat > Compiling mean and variance of prediction
[2017-05-01 14:56:37.032169] RBFPolicy_sat > Done compiling
[2017-05-01 14:56:37.043564] Experience > Initialising new experience dataset
[2017-05-01 14:56:37.044255] MC_PILCO > Initialising new MC_PILCO learner
('float64', dtype('float64'))
[2017-05-01 14:56:37.045854] MC_PILCO > Unable to load state from /home/juancamilog/.kusanagi/output/cartpole/MC_PILCO_BNN_6_4_Cartpole_RBFPolicy_sat.zip
[2017-05-01 14:56:37.046331] MC_PILCO > Cost parameters: {'target': [0, 0, 0, 3.141592653589793], 'width': 0.25, 'expl': 0.0, 'pendulum_length': 0.6, 'angle_dims': [3]}
[2017-05-01 14:56:37.047043] RBFPolicy_sat > Unable to load state from /home/ju

In [3]:
learner.train_dynamics(in_steps=1)

[2017-05-01 14:56:40.546045] MC_PILCO > Training dynamics model
[(41, 4)]
[2017-05-01 14:56:40.548563] MC_PILCO > Dataset size:: Inputs: [ (40, 6) ], Targets: [ (40, 4) ]  
[2017-05-01 14:56:40.549724] BNN > Building network
InputLayer {'shape': (None, 6), 'name': 'BNN_input'}
DropoutLayer {'p': 0.0, 'rescale': False, 'name': 'BNN_drop_input', 'dropout_samples': array(10, dtype=int32)}
DenseLayer {'num_units': 200, 'nonlinearity': <function sigmoid at 0x7f54e2394510>, 'name': 'BNN_fc0'}
DropoutLayer {'p': 0.05, 'rescale': False, 'name': 'BNN_drop0', 'dropout_samples': array(10, dtype=int32)}
DenseLayer {'num_units': 200, 'nonlinearity': <function sigmoid at 0x7f54e2394510>, 'name': 'BNN_fc1'}
DropoutLayer {'p': 0.05, 'rescale': False, 'name': 'BNN_drop1', 'dropout_samples': array(10, dtype=int32)}
DenseLayer {'num_units': 4, 'nonlinearity': <function linear at 0x7f54e23a4d90>, 'name': 'BNN_output'}
[2017-05-01 14:56:40.568956] BNN > Initialising loss function
[2017-05-01 14:56:40.77471

<kusanagi.ghost.regression.NN.BNN at 0x7f54a03a4cc0>

In [None]:
plot_results(learner)

In [4]:
import pybullet

In [4]:
pclient = pybullet.connect(pybullet.GUI)

NameError: name 'pybullet' is not defined

In [6]:
import theano

In [8]:
theano.config.floatX

'float64'