In [1]:
using Revise

In [10]:
using Optim
using ForwardDiff
using Plots; gr()
using ColorTypes
using ReverseDiff
using Base.Test

In [3]:
import Nets
import LearningMPC

In [4]:
target = x -> [sin(x[1])]
# target = x -> x

(::#1) (generic function with 1 method)

In [5]:
training_pts = linspace(-2π, π, 5)
training_data = [
    ([x], hcat(target(x), ForwardDiff.jacobian(target, [x]))) 
    for x in training_pts];

In [34]:
struct CompiledGradient{Tape <: ReverseDiff.CompiledTape}
    tape::Tape
end

function (c::CompiledGradient)(∇, x)
    ReverseDiff.gradient!((∇,), c.tape, (x,))
end

In [104]:
function trainable_net(training_data)
    net = Nets.Net(zeros(Nets.Params{Float64}, [1, 10, 10, 1]), Nets.elu)
    for I in eachindex(net.params.data)
        net.params.data[I] += 0.1 * randn()
    end

    f = (params::AbstractVector) -> begin
        n = Nets.similar(net, params)
        q = [1.0 1.0]
        sum(training_data) do xyJ
            x, yJ = xyJ
            sum(abs2, q .* (Nets.predict_sensitivity(n, x) .- yJ))
        end
    end

    loss_tape = ReverseDiff.compile(ReverseDiff.GradientTape(f, (net.params.data,)))
    gradient_result = (similar(net.params.data),);

    g! = CompiledGradient(loss_tape)
    net, f, g!, loss_tape
end

net, f, g! = trainable_net(training_data)
@inferred f(net.params.data)
@inferred g!(similar(net.params.data), net.params.data)

([0.0469058, -0.00755694, -0.0274922, -0.00143651, -0.00644097, 0.0181396, -0.00341134, -0.00525958, -0.0120157, 0.0257308  …  -0.0102819, 0.0289733, -0.00214394, -0.00249579, -0.0630438, 0.0117527, -0.0338932, 0.0595366, 0.00252548, -0.559983],)

In [105]:
@show f(net.params.data)
solver = LBFGS()
# options = Optim.Options(iterations=10)
options = Optim.Options()
g_calls = 0
# @time @profile for i in 1:100
    results = optimize(f, g!, copy(net.params.data), solver, options)
    net.params.data .= results.minimizer;
    g_calls += results.g_calls
# end
@show f(net.params.data)
results

f(net.params.data) = 4.980463453226413
f(net.params.data) = 2.6393062155931597e-19


Results of Optimization Algorithm
 * Algorithm: L-BFGS
 * Starting Point: [-0.0023835034757604904,-0.11917370440714084, ...]
 * Minimizer: [-0.5532667683527774,-0.2669450325302178, ...]
 * Minimum: 2.639306e-19
 * Iterations: 162
 * Convergence: true
   * |x - x'| < 1.0e-32: false 
     |x - x'| = 1.93e-07 
   * |f(x) - f(x')| / |f(x)| < 1.0e-32: false
     |f(x) - f(x')| / |f(x)| = 9.65e-01 
   * |g(x)| < 1.0e-08: true 
     |g(x)| = 5.03e-09 
   * stopped by an increasing objective: false
   * Reached Maximum Number of Iterations: false
 * Objective Calls: 506
 * Gradient Calls: 506

In [106]:
g_calls

506

In [107]:
plt = plot([x[1] for (x, yJ) in training_data], 
    [yJ[1] for (x, yJ) in training_data],
    line=nothing,
    marker=:dot)
for (x, yJ) in training_data
    y = yJ[1, 1]
    slope = yJ[1, 2]
    θ = atan(slope)
    δx = 0.1 * cos(θ)
    xs = [x[1] - δx, x[1] + δx]
    ys = [y - slope * δx, y + slope * δx]
    plot!(plt, xs, ys, color=colorant"red", legend=nothing)
end

x_samples = linspace(-2π, 2π, 101)
plot!(plt, x_samples, [net([x])[1] for x in x_samples], color=colorant"blue")

plt