In [1]:
using Revise

In [21]:
using MeshCatMechanisms
using MeshCat
using RigidBodyDynamics
using Gurobi
using Flux
using ProgressMeter
using MLDataPattern
using JLD2
using JuMP
using IterTools: subsets
using Plots; gr()

Plots.GRBackend()

In [3]:
import LCPSim
import LearningMPC
import Hoppers
import FluxExtensions

In [4]:
robot = Hoppers.Hopper()
mvis = MechanismVisualizer(robot.mechanism, URDFVisuals(Hoppers.urdf))
IJuliaCell(mvis)

Listening on 127.0.0.1:7012...
zmq_url=tcp://127.0.0.1:6012
web_url=http://127.0.0.1:7012/static/


In [5]:
function contact_state(x::MechanismState, env::LCPSim.Environment, threshold=1e-2)
    map(env.contacts) do c
        body, pt, obstacle = c
        ϕ = LCPSim.separation(obstacle, transform_to_root(x, pt.frame) * pt)
        ϕ < threshold
    end
end     

contact_state (generic function with 2 methods)

In [6]:
function create_net()
    net = Chain(
        Dense(4, 32, elu),
        Dense(32, 32, elu),
        Dense(32, 1)
    )
    loss = (x, lb, ub) -> begin
        y = net(x)
        sum(ifelse.(y .< lb, lb .- y, ifelse.(y .> ub, y .- ub, 0 .* y)))
    end
    net, loss
end

create_net (generic function with 1 method)

In [7]:
net, loss = create_net()
net_params = params(net)
optimizer = Flux.Optimise.ADAM(net_params)

mpc_params = LearningMPC.MPCParams(
    Δt=0.05,
    horizon=10,
    mip_solver=GurobiSolver(Gurobi.Env(), OutputFlag=0, 
        TimeLimit=5, 
        MIPGap=1e-1,
        FeasibilityTol=1e-3),
    lcp_solver=GurobiSolver(Gurobi.Env(), OutputFlag=0))

xstar = Hoppers.nominal_state(robot)
contact_index_sets = subsets(1:length(robot.environment.contacts))
B_matrices = Dict(map(contact_index_sets) do idxs
    contacts = robot.environment.contacts[idxs]
    contact_state = fill(false, length(robot.environment.contacts))
    contact_state[idxs] = true
    pts = unique(Point3D[p[2] for p in contacts])
    ustar = LearningMPC.nominal_input(xstar, pts)
    Jc = LCPSim.ContactLQR.contact_jacobian(xstar, pts)
    A, B, c = LCPSim.ContactLQR.contact_linearize(xstar, ustar, Jc)
    contact_state => B
end)

Q, R = Hoppers.default_costs(robot)
foot = findbody(robot.mechanism, "foot")
lqrsol = LearningMPC.LQRSolution(xstar, Q, R, mpc_params.Δt, [Point3D(default_frame(foot), 0., 0., 0.)])
lqrsol.S .= 1 ./ mpc_params.Δt .* Q

x_net = MechanismState(robot.mechanism)
tangent_net = FluxExtensions.TangentPropagator(net)
net_controller = state -> begin
    set_configuration!(x_net, configuration(state))
    set_velocity!(x_net, velocity(state))
    value, jac = tangent_net(Vector(state))
    cs = contact_state(x_net, robot.environment)
    B = B_matrices[cs]
    u = vec(-inv(R) * B' * Flux.Tracker.data(jac)')
end

mpc_controller = LearningMPC.MPCController(robot.mechanism, 
    robot.environment, mpc_params, lqrsol, 
    [net_controller, lqrsol]);

sample_sink = LearningMPC.MPCSampleSink{Float64}(true)
playback_sink = LearningMPC.PlaybackSink(mvis, mpc_params.Δt)

mpc_controller.callback = LearningMPC.call_each(
    sample_sink,
#     playback_sink,
)

live_viewer = LearningMPC.live_viewer(mvis)


dagger_controller = LearningMPC.call_each(
    LearningMPC.dagger_controller(
        mpc_controller,
        net_controller,
        0.2),
    live_viewer
    )

dataset = LearningMPC.Dataset(lqrsol)

x_init = MechanismState(robot.mechanism)
set_configuration!(x_init, [1, 1])

function collect_into!(data::Vector{<:LearningMPC.Sample})
    empty!(sample_sink)
    LearningMPC.randomize!(x0, x_init, 0.0, 1.5)
    results = LCPSim.simulate(x0, 
        dagger_controller,
        robot.environment, mpc_params.Δt, 50, 
        mpc_params.lcp_solver);
    append!(data, sample_sink.samples)
end

x0 = MechanismState{Float64}(robot.mechanism)

features(s::LearningMPC.Sample) = (s.state, s.mip.objective_bound, s.mip.objective_value)

features (generic function with 1 method)

In [8]:
datasets = Vector{LearningMPC.Dataset{Float64}}()
all_training_data = Vector{Tuple{Vector{Float64}, Float64, Float64}}()
all_validation_data = Vector{Tuple{Vector{Float64}, Float64, Float64}}()
losses = Vector{Tuple{Float64, Float64}}()

@showprogress for i in 1:20
    dataset = LearningMPC.Dataset(lqrsol)
    for i in 1:2
        collect_into!(dataset.training_data)
    end
    collect_into!(dataset.testing_data)
    collect_into!(dataset.validation_data)
    append!(all_training_data, features.(dataset.training_data))
    append!(all_validation_data, features.(dataset.validation_data))
    
    @time for i in 1:100
        Flux.train!(loss, shuffleobs(all_training_data), optimizer)
        push!(losses, 
            (mean(xy -> Flux.Tracker.data(loss(xy...)), 
                  all_training_data),
             mean(xy -> Flux.Tracker.data(loss(xy...)), 
                  all_validation_data)))
    end
    push!(datasets, dataset)
    
    jldopen("hopper.jld2", "w") do file
        file["datasets"] = datasets
        file["net"] = net
        file["lqrsol"] = lqrsol
        file["mpc_params"] = Dict(
            "Δt" => mpc_params.Δt,
            "horizon" => mpc_params.horizon,
        )
        file["all_training_data"] = all_training_data
        file["all_validation_data"] = all_validation_data
        file["losses"] = losses
    end
    
    plt = plot(first.(losses), label="training")
    plot!(plt, last.(losses), label="validation")
    ylims!(plt, (0, ylims(plt)[2]))
    display(plt)
end

  2.314121 seconds (1.71 M allocations: 94.053 MiB, 2.30% gc time)


[32mProgress:   5%|██                                       |  ETA: 0:14:15[39m

  1.313859 seconds (1.66 M allocations: 108.187 MiB, 2.96% gc time)


[32mProgress:  10%|████                                     |  ETA: 0:07:38[39m

  2.343525 seconds (2.97 M allocations: 193.014 MiB, 2.97% gc time)


[32mProgress:  15%|██████                                   |  ETA: 0:05:27[39m

  3.190791 seconds (4.06 M allocations: 263.941 MiB, 2.92% gc time)


[32mProgress:  20%|████████                                 |  ETA: 0:04:20[39m

  3.970248 seconds (5.06 M allocations: 328.325 MiB, 2.92% gc time)


[32mProgress:  25%|██████████                               |  ETA: 0:03:42[39m

  5.108731 seconds (6.45 M allocations: 418.563 MiB, 2.91% gc time)


[32mProgress:  30%|████████████                             |  ETA: 0:03:17[39m

  5.901507 seconds (7.57 M allocations: 491.611 MiB, 2.92% gc time)


[32mProgress:  35%|██████████████                           |  ETA: 0:02:57[39m

  6.941408 seconds (8.96 M allocations: 581.633 MiB, 2.93% gc time)


[32mProgress:  40%|████████████████                         |  ETA: 0:02:40[39m

  7.765685 seconds (9.92 M allocations: 643.515 MiB, 2.97% gc time)


[32mProgress:  45%|██████████████████                       |  ETA: 0:02:25[39m

  8.717504 seconds (11.06 M allocations: 717.928 MiB, 3.02% gc time)


[32mProgress:  50%|████████████████████                     |  ETA: 0:02:13[39m

  9.571503 seconds (12.16 M allocations: 789.874 MiB, 2.97% gc time)


[32mProgress:  55%|███████████████████████                  |  ETA: 0:02:00[39m

 10.268544 seconds (13.14 M allocations: 854.635 MiB, 3.09% gc time)


[32mProgress:  60%|█████████████████████████                |  ETA: 0:01:48[39m

 11.374715 seconds (14.49 M allocations: 942.337 MiB, 3.02% gc time)


[32mProgress:  65%|███████████████████████████              |  ETA: 0:01:36[39m

 12.442877 seconds (15.66 M allocations: 1019.657 MiB, 3.08% gc time)


[32mProgress:  70%|█████████████████████████████            |  ETA: 0:01:24[39m

 13.372542 seconds (17.16 M allocations: 1.091 GiB, 3.06% gc time)


[32mProgress:  75%|███████████████████████████████          |  ETA: 0:01:12[39m

 14.428636 seconds (18.46 M allocations: 1.174 GiB, 3.06% gc time)


[32mProgress:  80%|█████████████████████████████████        |  ETA: 0:00:59[39m

 15.179031 seconds (19.40 M allocations: 1.234 GiB, 3.01% gc time)


[32mProgress:  85%|███████████████████████████████████      |  ETA: 0:00:45[39m

 16.370557 seconds (20.93 M allocations: 1.331 GiB, 3.04% gc time)


[32mProgress:  90%|█████████████████████████████████████    |  ETA: 0:00:31[39m

 17.415178 seconds (22.18 M allocations: 1.411 GiB, 3.04% gc time)


[32mProgress:  95%|███████████████████████████████████████  |  ETA: 0:00:16[39m

 18.269266 seconds (23.23 M allocations: 1.478 GiB, 3.00% gc time)


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:05:23[39m


In [9]:
# optimizer = Flux.ADAM(net_params)
# @time for i in 1:1000
#     Flux.train!(loss, shuffleobs(all_training_data), optimizer)
#     push!(losses, 
#         (mean(xy -> Flux.Tracker.data(loss(xy...)), 
#               all_training_data),
#          mean(xy -> Flux.Tracker.data(loss(xy...)), 
#               all_validation_data)))
# end

# plt = plot(first.(losses), label="training")
# plot!(plt, last.(losses), label="validation")
# ylims!(plt, (0, ylims(plt)[2]))
# plt

In [13]:
net_controller2 = state -> begin
    set_configuration!(x_net, configuration(state))
    set_velocity!(x_net, velocity(state))
    value, jac = tangent_net(Vector(state))
    cs = fill(true, length(robot.environment.contacts))
    B = B_matrices[cs]
    u = vec(-inv(R) * B' * Flux.Tracker.data(jac)')
end

(::#25) (generic function with 1 method)

In [14]:
x0 = MechanismState{Float64}(robot.mechanism)
# LearningMPC.randomize!(x0, xstar, 0.0, 1.0)
set_configuration!(x0, [1, 1])
results = LCPSim.simulate(x0, 
    net_controller2,
    robot.environment, mpc_params.Δt, 100, 
    mpc_params.lcp_solver,
    termination=x -> false);
LearningMPC.playback(mvis, results, 0.05)

In [36]:
net_qp_controller = state -> begin
    set_configuration!(x_net, configuration(state))
    set_velocity!(x_net, velocity(state))
    value, jac = tangent_net(Vector(state))
    m = Model(solver=mpc_params.mip_solver)
    _, results = LCPSim.optimize(x_net, robot.environment, mpc_params.Δt, 1, m)
    @assert length(results) == 1
    x = results[1].state.state
    u = results[1].input
    q = vec(Flux.Tracker.data(jac))
    @objective m Min u' * lqrsol.R * u + x' * lqrsol.Q * x + q' * x
    solve(m)
    getvalue(results[1].input)
end
    

(::#69) (generic function with 1 method)

In [37]:
x0 = MechanismState{Float64}(robot.mechanism)
# LearningMPC.randomize!(x0, xstar, 0.0, 1.0)
set_configuration!(x0, [1, 1])
results = LCPSim.simulate(x0, 
    net_qp_controller,
    robot.environment, mpc_params.Δt, 100, 
    mpc_params.lcp_solver,
    termination=x -> false);
LearningMPC.playback(mvis, results, 0.05)

In [None]:
k