In [1]:
import numpy as np
import kusanagi

from functools import partial
from kusanagi.shell.arduino import SerialPlant
from kusanagi.utils import print_with_stamp
from kusanagi.shell.cartpole import cartpole_loss
from kusanagi.shell.cost import build_loss_func

In [2]:
# state dims are [cart_pos, cart_vel, pole_vel, pole_angle]

# control rate
dt = 0.1

# cost function
loss_task1 = partial(cartpole_loss, target=[0.0,0.0,0.0,np.pi])
loss_task2 = partial(cartpole_loss, target=[0.5,0.0,0.0,np.pi])
loss_task3 = partial(cartpole_loss, target=[-0.5,0.0,0.0,np.pi])

loss_func = build_loss_func(loss_task1, False, 'cartpole_loss')

env = SerialPlant(state_indices=[0,2,3,1], dt=dt, port='/dev/ttyACM0', maxU=[10], loss_func=loss_func)

In [3]:
# this waits for user input
#cart_pos, cart_vel, pole_vel, pole_angle  = env.reset()

# this doesn't wait for user input
cart_pos, cart_vel, pole_vel, pole_angle = env._reset(wait_for_user=False)

In [4]:
env._reset(False)
t = env.t
# runs for 40 steps (when dt =0.1, this is 4 seconds)
for i in range(40):
    obs, cost, done, info = env.step(np.array([10])*np.sin(2*np.pi*t))
    t = info['t']
    print_with_stamp('%f        ' % cost, same_line=True)

[2K[2018-05-11 02:22:42.619798] 0.999259        

In [10]:
#loading a policy learned kusanagi mc_pilco
from kusanagi.ghost.control import NNPolicy
from kusanagi.base import apply_controller
from kusanagi import utils

H = 1000
angle_dims = [3]

# loading policy
# pol should define the method pol.evaluate(states)
# which returns a list of [actions, uncertainties]
# uncertainties are actually not required, it's just for compatibility with old code (i.e. uncertainties can be None)
# In this case, the dimensions for the policy input are cart_pos, cart_vel, pole_vel, sin(pole_angle), cos(pole_angle) 
#pol = NNPolicy(5,
#    filename=kusanagi.install_path+'/examples/data/serial_plant_policy')
pol = NNPolicy(5,
    filename='/home/juancamilog/.kusanagi/output/SerialPlant_8/policy_19')

# function to execute before applying policy
def gTrig(state):
    return utils.gTrig_np(state, angle_dims).flatten()

# apply controller for H steps
states, actions, costs, infos =  apply_controller(env, pol, H, gTrig)

[2018-05-11 02:26:20.672511] NNPolicy > Loading state from /home/juancamilog/.kusanagi/output/SerialPlant_8/policy_19.zip
[2018-05-11 02:26:20.691511] NNPolicy > Building network
('InputLayer', {'shape': (None, 5), 'name': 'NNPolicy_input'})
('DenseLayer', {'W': NNPolicy_fc0>W, 'b': NNPolicy_fc0>b, 'name': 'NNPolicy_fc0', 'nonlinearity': <function rectify at 0x7f4442ffa2a8>, 'num_units': 200})
('DenseDropoutLayer', {'b': NNPolicy_fc1>b, 'name': 'NNPolicy_fc1', 'nonlinearity': <function rectify at 0x7f4442ffa2a8>, 'noise_samples': NNPolicy_fc1>noise_samples, 'p': 0.1, 'num_units': 200, 'W': NNPolicy_fc1>W})
('DenseDropoutLayer', {'b': NNPolicy_output>b, 'name': 'NNPolicy_output', 'nonlinearity': <function linear at 0x7f4442ffa6e0>, 'noise_samples': NNPolicy_output>noise_samples, 'p': 0.1, 'num_units': 1, 'W': NNPolicy_output>W})
[2018-05-11 02:26:20.696955] apply_controller > Starting run
[2018-05-11 02:26:20.698281] apply_controller > Running for 100.000000 seconds
[2018-05-11 02:26:20

KeyboardInterrupt: 