In [1]:
using ReverseDiff
using IterTools

In [57]:
struct Affine{M <: AbstractMatrix, V <: AbstractVector}
    A::M
    b::V
end

(a::Affine)(x) = a.A * x .+ a.b

In [124]:
tape = ReverseDiff.InstructionTape()

function param(A::AbstractArray{T}, tape::ReverseDiff.InstructionTape) where T
    At = ReverseDiff.track(similar(A), T, tape)
    ReverseDiff.track!(At, A)
    At
end

A0 = param(rand(1, 3), tape)
b0 = param(rand(1), tape)
loss = (x, y) -> sum(abs2, A0 * x .+ b0 .- y)
x0 = [1.0, 1.0, 1.0]
y0 = [1.0]

1-element Array{Float64,1}:
 1.0

In [126]:
function optimize(loss, data, tape)
    x0, y0 = first(data)
    x0p = param(x0, tape)
    y0p = param(y0, tape)
    tracked_loss = loss(x0p, y0p)
    inputs = (A0, b0, x0p, y0p)
    params = (A0, b0, x0p, y0p)
    result = similar.(ReverseDiff.value.(params))
    gtape = ReverseDiff._GradientTape(loss, (A0, b0, x0p, y0p), tracked_loss, tape)
    for (x, y) in data
        ReverseDiff.value!(x0p, x)
        ReverseDiff.value!(y0p, y)
        ReverseDiff.forward_pass!(gtape)
        ReverseDiff.seeded_reverse_pass!(result, gtape)
        for i in 1:length(params)
            params[i].value .-= 0.01 .* result[i]
        end
    end 
end

optimize (generic function with 1 method)

In [129]:
optimize(loss, IterTools.ncycle([(x0, y0)], 100), tape)

In [130]:
A0 * x0 .+ b0 

1-element ReverseDiff.TrackedArray{Float64,Float64,1,Array{Float64,1},Array{Float64,1}}:
 TrackedReal<5ml>(1.0000000404051255, 0.0, Bhx, 1, 1bh)