In [1]:
using Revise

In [4]:
using RigidBodyDynamics
using DrakeVisualizer
DrakeVisualizer.any_open_windows() || DrakeVisualizer.new_window()
using RigidBodyTreeInspector
using Gurobi
import StochasticOptimization
using Plots; gr()
using JLD2
using ProgressMeter
using Optim

In [3]:
import LCPSim
import LearningMPC
import Hoppers
import Nets

[1m[36mINFO: [39m[22m[36mRecompiling stale cache file /home/rdeits/locomotion/explorations/learning-mpc/packages/lib/v0.6/LearningMPC.ji for module LearningMPC.
[39m[1m[36mINFO: [39m[22m[36mPrecompiling module Hoppers.
[39m

In [39]:
robot = Hoppers.Hopper()
mechanism = robot.mechanism
xstar = Hoppers.nominal_state(robot)

basevis = Visualizer()[:hopper]
setgeometry!(basevis, robot)
settransform!(basevis[:robot], xstar)

mpc_params = LearningMPC.MPCParams(
    Δt=0.05,
    horizon=10,
    mip_solver=GurobiSolver(Gurobi.Env(), OutputFlag=0, TimeLimit=120, MIPGap=1e-1, FeasibilityTol=1e-3),
    lcp_solver=GurobiSolver(Gurobi.Env(), OutputFlag=0))

Q, R = Hoppers.default_costs(robot)
foot = findbody(mechanism, "foot")
lqrsol = LearningMPC.LQRSolution(xstar, Q, R, mpc_params.Δt, [Point3D(default_frame(foot), 0., 0., 0.)])
lqrsol.S .= 1 ./ mpc_params.Δt .* Q

hidden_widths = [10, 10]
activation = Nets.elu
net = LearningMPC.control_net(mechanism, hidden_widths, activation)
for I in eachindex(net.params.data)
    net.params.data[I] += 0.1 * randn()
end

net_controller = x -> Nets.predict(net, state_vector(x))

mpc_controller = LearningMPC.MPCController(mechanism, 
    robot.environment, mpc_params, lqrsol, 
    [net_controller, lqrsol]);

sample_sink = LearningMPC.MPCSampleSink{Float64}()
playback_sink = LearningMPC.PlaybackSink(basevis[:robot], 0.25 * mpc_params.Δt)
live_viewer = LearningMPC.live_viewer(mechanism, basevis[:robot])

mpc_controller.callback = LearningMPC.call_each(
    sample_sink,
#     playback_sink,
#     (args...) -> println("tick")
#     (x, results) -> live_viewer(x)
)


dagger_controller = LearningMPC.call_each(
    LearningMPC.dagger_controller(
        mpc_controller,
        net_controller,
        0.5),
    live_viewer
    )

termination = x -> false

dataset = LearningMPC.Dataset(lqrsol)

gradient_sensitivity = 0.2

x0 = MechanismState{Float64}(mechanism)

x_init = MechanismState{Float64}(mechanism)
set_configuration!(x_init, [1.0, 1.0])
set_velocity!(x_init, [0., 0.])
# x_init = xstar

function collect_into!(data::Vector{<:LearningMPC.Sample}, σ_q=0.5, σ_v=1.0)
    empty!(sample_sink)
    LearningMPC.randomize!(x0, x_init, σ_q, σ_v)
    if configuration(x0)[1] - configuration(x0)[2] < 0
        set_configuration!(x0, [configuration(x0)[2], configuration(x0)[2]])
    end
    results = LCPSim.simulate(x0, 
        dagger_controller,
        robot.environment, mpc_params.Δt, 50, 
        mpc_params.lcp_solver;
        termination=termination);
    samples_to_keep = filter(1:length(sample_sink.samples)) do i
        for j in (i+1):length(sample_sink.samples)
            if norm(sample_sink.samples[j].state .- sample_sink.samples[i].state) < 1e-2
                return false
            end
        end
        return true
    end
    append!(data, sample_sink.samples[samples_to_keep])
#     append!(data, sample_sink.samples)
end

collect_into!(dataset.training_data, 0.0, 0.0)
f, g! = Nets.cost_function(LearningMPC.features, net, dataset.training_data, gradient_sensitivity)

all_losses(net, dataset) = (LearningMPC.training_loss(net, dataset),
                            LearningMPC.validation_loss(net, dataset),
                            f(net.params.data))

Academic license - for non-commercial use only
Academic license - for non-commercial use only


all_losses (generic function with 1 method)

In [40]:
# LearningMPC.randomize!(x0, x_init, 0.0, 0.0)
# results = LCPSim.simulate(x0, 
#     mpc_controller,
#     robot.environment, mpc_params.Δt, 50, mpc_params.lcp_solver);

In [41]:
# LearningMPC.playback(basevis[:robot], results, mpc_params.Δt)

In [42]:
losses = Tuple{Float64, Float64, Float64}[]
snapshots = LearningMPC.Snapshot{Float64}[]
gr()

@showprogress for i in 1:10
    for i in 1:2
        collect_into!(dataset.training_data)
    end
    collect_into!(dataset.testing_data)
    collect_into!(dataset.validation_data);
    
    optim_results = Optim.optimize(f, g!, copy(net.params.data), LBFGS(), Optim.Options(allow_f_increases=true, iterations=100))
    @show optim_results
    net.params.data .= optim_results.minimizer
    push!(losses, all_losses(net, dataset))
    
    push!(snapshots, LearningMPC.Snapshot(net.params.data, net))
    
#     jldopen("hopper-$gradient_sensitivity.jld2", "w") do file
#         file["dataset"] = dataset
#         file["snapshots"] = snapshots
#     end
    
    plt = plot(first.(losses), label="training", yscale=:log10)
    plot!(plt, getindex.(losses, 2), label="validation")
    plot!(plt, getindex.(losses, 3), label="learning")
    ylims!(plt, (1, ylims(plt)[2]))
    display(plt)
end

optim_results = Results of Optimization Algorithm
 * Algorithm: L-BFGS
 * Starting Point: [0.7768450991588259,-1.3795841460841682, ...]
 * Minimizer: [0.6493515188499748,-1.3609865713609495, ...]
 * Minimum: 3.549532e+02
 * Iterations: 100
 * Convergence: false
   * |x - x'| < 1.0e-32: false 
     |x - x'| = 2.88e-05 
   * |f(x) - f(x')| / |f(x)| < 1.0e-32: false
     |f(x) - f(x')| / |f(x)| = 1.00e-06 
   * |g(x)| < 1.0e-08: false 
     |g(x)| = 4.22e+01 
   * stopped by an increasing objective: true
   * Reached Maximum Number of Iterations: true
 * Objective Calls: 4757
 * Gradient Calls: 4757


[32mProgress:  10%|████                                     |  ETA: 0:03:25[39m

optim_results = Results of Optimization Algorithm
 * Algorithm: L-BFGS
 * Starting Point: [0.6493515188499748,-1.3609865713609495, ...]
 * Minimizer: [0.7137108682068372,-1.291400238708888, ...]
 * Minimum: 4.512115e+02
 * Iterations: 100
 * Convergence: false
   * |x - x'| < 1.0e-32: false 
     |x - x'| = 3.16e-05 
   * |f(x) - f(x')| / |f(x)| < 1.0e-32: false
     |f(x) - f(x')| / |f(x)| = 1.00e-06 
   * |g(x)| < 1.0e-08: false 
     |g(x)| = 1.55e+01 
   * stopped by an increasing objective: true
   * Reached Maximum Number of Iterations: true
 * Objective Calls: 4816
 * Gradient Calls: 4816


[32mProgress:  20%|████████                                 |  ETA: 0:02:45[39m

optim_results = Results of Optimization Algorithm
 * Algorithm: L-BFGS
 * Starting Point: [0.7137108682068372,-1.291400238708888, ...]
 * Minimizer: [0.679775628482949,-1.275136742741146, ...]
 * Minimum: 6.129064e+02
 * Iterations: 100
 * Convergence: false
   * |x - x'| < 1.0e-32: false 
     |x - x'| = 1.05e-05 
   * |f(x) - f(x')| / |f(x)| < 1.0e-32: false
     |f(x) - f(x')| / |f(x)| = 1.00e-06 
   * |g(x)| < 1.0e-08: false 
     |g(x)| = 1.10e+01 
   * stopped by an increasing objective: true
   * Reached Maximum Number of Iterations: true
 * Objective Calls: 4444
 * Gradient Calls: 4444


[32mProgress:  30%|████████████                             |  ETA: 0:02:24[39m

optim_results = Results of Optimization Algorithm
 * Algorithm: L-BFGS
 * Starting Point: [0.679775628482949,-1.275136742741146, ...]
 * Minimizer: [0.6782216525985143,-1.0879369560534558, ...]
 * Minimum: 6.213380e+02
 * Iterations: 100
 * Convergence: false
   * |x - x'| < 1.0e-32: false 
     |x - x'| = 1.42e-05 
   * |f(x) - f(x')| / |f(x)| < 1.0e-32: false
     |f(x) - f(x')| / |f(x)| = 1.00e-06 
   * |g(x)| < 1.0e-08: false 
     |g(x)| = 2.09e+01 
   * stopped by an increasing objective: true
   * Reached Maximum Number of Iterations: true
 * Objective Calls: 4847
 * Gradient Calls: 4847


[32mProgress:  40%|████████████████                         |  ETA: 0:02:09[39m

optim_results = Results of Optimization Algorithm
 * Algorithm: L-BFGS
 * Starting Point: [0.6782216525985143,-1.0879369560534558, ...]
 * Minimizer: [0.6112627898485008,-0.5947013845318682, ...]
 * Minimum: 6.913870e+02
 * Iterations: 100
 * Convergence: false
   * |x - x'| < 1.0e-32: false 
     |x - x'| = 1.71e-05 
   * |f(x) - f(x')| / |f(x)| < 1.0e-32: false
     |f(x) - f(x')| / |f(x)| = 1.00e-06 
   * |g(x)| < 1.0e-08: false 
     |g(x)| = 1.26e+01 
   * stopped by an increasing objective: true
   * Reached Maximum Number of Iterations: true
 * Objective Calls: 4502
 * Gradient Calls: 4502


[32mProgress:  50%|████████████████████                     |  ETA: 0:01:49[39m

optim_results = Results of Optimization Algorithm
 * Algorithm: L-BFGS
 * Starting Point: [0.6112627898485008,-0.5947013845318682, ...]
 * Minimizer: [0.610890127176916,-0.5946984441467545, ...]
 * Minimum: 7.370135e+02
 * Iterations: 100
 * Convergence: false
   * |x - x'| < 1.0e-32: false 
     |x - x'| = 1.20e-05 
   * |f(x) - f(x')| / |f(x)| < 1.0e-32: false
     |f(x) - f(x')| / |f(x)| = 1.00e-06 
   * |g(x)| < 1.0e-08: false 
     |g(x)| = 8.03e+00 
   * stopped by an increasing objective: true
   * Reached Maximum Number of Iterations: true
 * Objective Calls: 5185
 * Gradient Calls: 5185


[32mProgress:  60%|█████████████████████████                |  ETA: 0:01:33[39m

optim_results = Results of Optimization Algorithm
 * Algorithm: L-BFGS
 * Starting Point: [0.610890127176916,-0.5946984441467545, ...]
 * Minimizer: [0.6103886612738743,-0.5947060551264451, ...]
 * Minimum: 7.249321e+02
 * Iterations: 100
 * Convergence: false
   * |x - x'| < 1.0e-32: false 
     |x - x'| = 4.10e-06 
   * |f(x) - f(x')| / |f(x)| < 1.0e-32: false
     |f(x) - f(x')| / |f(x)| = 1.00e-06 
   * |g(x)| < 1.0e-08: false 
     |g(x)| = 4.08e+00 
   * stopped by an increasing objective: true
   * Reached Maximum Number of Iterations: true
 * Objective Calls: 5001
 * Gradient Calls: 5001


[32mProgress:  70%|█████████████████████████████            |  ETA: 0:01:13[39m

optim_results = Results of Optimization Algorithm
 * Algorithm: L-BFGS
 * Starting Point: [0.6103886612738743,-0.5947060551264451, ...]
 * Minimizer: [0.6097456785595025,-0.5947542610664387, ...]
 * Minimum: 7.251126e+02
 * Iterations: 100
 * Convergence: false
   * |x - x'| < 1.0e-32: false 
     |x - x'| = 3.36e-06 
   * |f(x) - f(x')| / |f(x)| < 1.0e-32: false
     |f(x) - f(x')| / |f(x)| = 1.00e-06 
   * |g(x)| < 1.0e-08: false 
     |g(x)| = 2.32e+00 
   * stopped by an increasing objective: true
   * Reached Maximum Number of Iterations: true
 * Objective Calls: 5125
 * Gradient Calls: 5125


[32mProgress:  80%|█████████████████████████████████        |  ETA: 0:00:50[39m

captured: InterruptException()


LoadError: [91mUndefVarError: mip_results not defined[39m

In [43]:
# @showprogress for i in 1:100
#     Nets.update!(net.params.data, optimizer, LearningMPC.features.(dataset.training_data))
#     optimizer.opts.learning_rate *= (1 - 3e-2)
#     push!(losses, all_losses(net, dataset))
# end

In [44]:
LearningMPC.randomize!(x0, x_init, 0.1, 0.5)
results = LCPSim.simulate(x0, 
    net_controller,
    robot.environment, mpc_params.Δt, 200, mpc_params.lcp_solver);

In [45]:
LearningMPC.playback(basevis[:robot], results, mpc_params.Δt)

In [46]:
plotly()

Plots.PlotlyBackend()

In [47]:
function slice(data)
    filter(data) do sample
        x = sample.state
        (abs(x[1] - x[2]) < 1e-1) && (abs(x[3] - x[4]) < 1e-1)
    end
end

slice (generic function with 1 method)

In [48]:
plt = plot([s.state[1] for s in slice(dataset.training_data)], [s.state[3] for s in slice(dataset.training_data)],
     [s.uJ[2, 1] for s in slice(dataset.training_data)], line=nothing, marker=:dot, markersize=0.3)
surface!(plt, linspace(0, 2), linspace(-4, 4), (x, y) -> net([x, x, y, y])[2])
# plot!(plt, [s.state[1] for s in dataset.training_data], [s.state[3] for s in dataset.training_data],
#      [net(s.state)[2] for s in dataset.training_data], line=nothing, marker=:dot, markersize=0.3, markercolor=:red)
zlims!(plt, -10, 50)
plt