In [1]:
 %matplotlib tk
import gym
import os
import sys
import numpy as np
import theano
import lasagne

from kusanagi.ghost import control
from kusanagi.ghost import regression
from kusanagi.ghost.algorithms import pilco, mc_pilco
from kusanagi.ghost.optimizers import ScipyOptimizer, SGDOptimizer
from kusanagi.base import apply_controller, train_dynamics, ExperienceDataset
from kusanagi import utils
from functools import partial
from matplotlib import pyplot as plt

Using cuDNN version 7003 on context None
Mapped name None to device cuda0: TITAN Xp (0000:01:00.0)


In [5]:
# init environment
env = gym.make('Hopper-v1')
x0 = env.reset()
maxU = np.abs(np.stack([env.action_space.high, env.action_space.low])).max(0)

# init policy
pol = control.NNPolicy(x0, maxU=maxU)
pol_spec = regression.dropout_mlp(
    input_dims=pol.D,
    output_dims=pol.E,
    hidden_dims=[50]*4,
    p=0.05, p_input=0.0,
    nonlinearities=lasagne.nonlinearities.rectify,
    W_init=lasagne.init.Orthogonal(gain='relu'),
    output_nonlinearity=pol.sat_func,
    dropout_class=regression.layers.DenseDropoutLayer,
    name=pol.name)
pol.network = pol.build_network(pol_spec)

# init dynmodel
dyn = regression.BNN(x0.size + maxU.size, x0.size)
odims = 2*dyn.E if dyn.heteroscedastic else dyn.E
dyn_spec = regression.dropout_mlp(
    input_dims=dyn.D,
    output_dims=odims,
    hidden_dims=[200]*4,
    p=True, p_input=True,
    nonlinearities=lasagne.nonlinearities.rectify,
    W_init=lasagne.init.Orthogonal(gain='relu'),
    dropout_class=regression.layers.DenseLogNormalDropoutLayer,
    name=dyn.name)
dyn.network = dyn.build_network(dyn_spec)

# init a regression model for learning the reward function
rew = regression.BNN(x0.size + maxU.size, 1)
odims = 2 if rew.heteroscedastic else 1
rew_spec = regression.dropout_mlp(
    input_dims=dyn.D,
    output_dims=odims,
    hidden_dims=[200]*2,
    p=True, p_input=True,
    nonlinearities=lasagne.nonlinearities.rectify,
    W_init=lasagne.init.Orthogonal(gain='relu'),
    dropout_class=regression.layers.DenseLogNormalDropoutLayer,
    name=rew.name)
rew.network = rew.build_network(rew_spec)

# create experience dataset
exp = ExperienceDataset()

# init policy optimizer
polopt = SGDOptimizer(min_method='adam', max_evals=1000)


[2017-10-12 17:00:23.752730] NNPolicy > Building network
('InputLayer', {'shape': (None, 11), 'name': 'NNPolicy_input'})
('DenseLayer', {'W': <lasagne.init.Orthogonal object at 0x7fddf02c4f50>, 'b': <lasagne.init.Constant object at 0x7fde41f5cd10>, 'name': 'NNPolicy_fc0', 'nonlinearity': <function rectify at 0x7fde306c88c0>, 'num_units': 50})
('DenseDropoutLayer', {'b': <lasagne.init.Constant object at 0x7fde41f5cd10>, 'name': 'NNPolicy_fc1', 'nonlinearity': <function rectify at 0x7fde306c88c0>, 'p': 0.05, 'num_units': 50, 'W': <lasagne.init.Orthogonal object at 0x7fddf02c4f50>})
('DenseDropoutLayer', {'b': <lasagne.init.Constant object at 0x7fde41f5cd10>, 'name': 'NNPolicy_fc2', 'nonlinearity': <function rectify at 0x7fde306c88c0>, 'p': 0.05, 'num_units': 50, 'W': <lasagne.init.Orthogonal object at 0x7fddf02c4f50>})
('DenseDropoutLayer', {'b': <lasagne.init.Constant object at 0x7fde41f5cd10>, 'name': 'NNPolicy_fc3', 'nonlinearity': <function rectify at 0x7fde306c88c0>, 'p': 0.05, 'num