In [None]:
using Plots
gr(leg=false)

In [None]:
# this is the function we're trying to minimize...
# note: the 2-arg version is ONLY used to plot the surface
f(x,y) = sin(x)+cos(y)

In [None]:
# plot a surface with a close-up view
x = linspace(2,6,100)
surface(x,x,f);

In [None]:
# plot a surface with a zoomed-out view
x = linspace(2,40,200)
surface(x,x,f);

In [None]:
# this reexports Transformations and StochasticOptimization, which is what
# we'll use to minimize our function f
using Learn

# define the function to minimize... x is assumed to be a length-2 vector
f(x) = sin(x[1])+cos(x[2])

# define the derivative of the function to minimize
df(x) = [cos(x[1]), -sin(x[2])]

In [None]:
# this is ONLY for the plotting... set up an animation, add a new series
# to track the learning curve, and define the method that will get called
# once per iteration in the call to learn!
anim = Animation()
p = path3d!(1, l=(:black,2), m=(:circle,5))
function addpt(m,i)
    θ = params(m)
    push!(p, 2, θ..., f(θ))
    mod1(i,20)==1 && frame(anim)
end

In [None]:
# !! this is important !!
# tfunc is a convenience constructor to build a Transformations.Differentiable
# In this case we are building a OnceDifferentiable, similar to what's available in Optim.
# We pass in an arbitrary function, the size of the params θ, and a function that returns
# a deriv/gradient at θ.  Since we didn't specify a size for inputs, it is assumed 0.
# So this is a "closed system"... f is not a function of inputs x, only of params θ.
# We are finding a minimum of a function by moving the params θ closer to the argmin of f.
t = tfunc(f, 2, df)

# I wanted to define a very specific initial point θ₀ so the animation looked nice
params(t)[:] = [2,5]

# At this point we have a transformation t <: OnceDifferentiable, and we're going to call
#   learn!(t, metalearner)
# We build a generic MetaLearner using the `make_learner` convenience.  The constructor accepts zero
# or more LearningStrategy objects, plus there are a few keywords (just for convenience!!) to add
# common strategies.
learn!(t, make_learner(

    # This is a strategy that updates the parameters of the transformation `t` using the Adam method.
    # The learning rate is fixed at 1e-2
    GradientLearner(1e-2,Adam()),

    # This is a strategy that evaluates the function, and compares the result to the last result.
    # When the difference is sufficiently close, we are done.
    Converged(m -> params(m)),

    # This is a convenience to add a `MaxIter` strategy, which stops us after 1000 iterations
    maxiter = 1000,

    # This is a convenience to add a `IterFunction` strategy, which calls a function every iteration:
    #   f(model, itr_num)
    # In this case: model == t
    oniter = addpt
))

# NOTE: there are many more strategies and settings... until I fill out the documentation, look at the code

In [None]:
# output the animation
gif(anim)

In [None]:
# ignore this... old code
x,y = Plots.unzip(pts)
plot!(x,y,map(f,x,y),l=(:black,2),m)