In [2]:
using Revise

In [18]:
using Flux
using JLD2
using FileIO
using MLDataPattern
using CoordinateTransformations
using ProgressMeter
using RigidBodyDynamics
using DrakeVisualizer
DrakeVisualizer.any_open_windows() || DrakeVisualizer.new_window()
import FluxExtensions
import LearningMPC
import LCPSim
import Hoppers

In [5]:
samples = load("2018-02-07-hopper-smaller-grid/grid_search.jld2")["samples"];

In [6]:
filter!(samples) do sample
    sample.state[2] <= sample.state[1]
end;

In [7]:
features(sample::LearningMPC.Sample) = (sample.state, sample.uJ[:, 1])
data = features.(samples);
train_data, test_data = splitobs(shuffleobs(data), at=0.85);

In [49]:
function setup_model()
    signals = Chain(Dense(4, 2 * 16), x -> reshape(x, 16, 2))
    weights = Chain(
        LinearMap(UniformScaling(0.2)),
        Dense(4, 16, elu),
        Dense(16, 16, elu),
        softmax)
    model = FluxExtensions.Attention(signals, weights)
            
    loss = (x, y) -> Flux.mse(vec(model(x)), y)
    model, loss
end

model, loss = setup_model()
opt = Flux.ADADelta(params(model))

(::#71) (generic function with 1 method)

In [50]:
import LCPSim
import Hoppers
using RigidBodyDynamics
using Gurobi
using DrakeVisualizer
DrakeVisualizer.any_open_windows() || DrakeVisualizer.new_window()

true

In [51]:
robot = Hoppers.Hopper()
xstar = Hoppers.nominal_state(robot)
ustar = zeros(num_velocities(xstar))
basevis = Visualizer()[:hopper]
setgeometry!(basevis, robot)
settransform!(basevis[:robot], xstar)
Δt = 0.05

0.05

In [52]:
net_controller = x -> vec(Flux.Tracker.value(model(state_vector(x))))

(::#15) (generic function with 1 method)

In [53]:
x_init = MechanismState{Float64}(robot.mechanism)
set_configuration!(x_init, [1.0, 1.0])
set_velocity!(x_init, [0., 0.])
LearningMPC.randomize!(x_init, x_init, 0.5, 1.0)
results = LCPSim.simulate(x_init, net_controller,
    robot.environment,
    Δt,
    100,
    GurobiSolver(Gurobi.Env(), OutputFlag=0));

In [54]:
LearningMPC.playback(basevis[:robot], results, Δt)

In [59]:
@showprogress for i in 1:100
    Flux.train!(loss, train_data, opt)
end

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:12:36[39m


In [60]:
params(model)

6-element Array{Any,1}:
 param([0.316718 -0.315982 0.0362808 0.379321; 0.0630288 -0.0939456 0.113194 0.00598398; … ; 9.64426 13.3897 -1.58045 3.90986; 1.30677 -6.71482 0.0785534 -1.81041])                                                   
 param([0.00501965, 0.172233, -0.735358, -0.0767995, -0.0495874, -0.0271283, 0.123576, -0.0578249, 0.00466749, -0.136831  …  15.2922, 15.1371, 15.1697, 15.1928, 27.6338, 15.5114, 16.9579, 14.9992, 15.4874, 4.18218])
 param([4.7263 -1.82219 -1.08901 0.428086; -5.53996 2.25817 0.686609 0.221881; … ; 1.31654 3.24633 0.972213 -0.0940963; 5.61796 -1.05542 1.8555 0.53821])                                                              
 param([-1.49376, 0.193408, -0.276425, -0.8048, 0.156168, 0.72824, 3.60415, 0.0296689, 5.11868, 0.672663, -1.59691, -2.54904, -0.791005, 0.00550947, 0.856356, 0.731837])                                              
 param([0.109783 1.87797 … -1.87662 0.865416; -2.11114 1.72057 … -2.11689 -0.88466; … ; -2.14179 -0.129833 … -1.

In [85]:
x_init = MechanismState{Float64}(robot.mechanism)
set_configuration!(x_init, [1.0, 1.0])
set_velocity!(x_init, [0., 0.])
settransform!(basevis[:robot], x_init)
# LearningMPC.randomize!(x_init, x_init, 0.5, 1.0)
results = LCPSim.simulate(x_init, net_controller,
    robot.environment,
    Δt,
    200,
    GurobiSolver(Gurobi.Env(), OutputFlag=0));

In [86]:
LearningMPC.playback(basevis[:robot], results, 2 * Δt)

In [87]:
save("2model.jld2", "model", model, "parameters", params(model))