In [1]:
using ReverseDiff
using ProgressMeter
using MLDataPattern
using Plots
gr()

Plots.GRBackend()

In [2]:
head(t::Tuple) = tuple(t[1])

function viewblocks{T <: NTuple}(data::AbstractArray, shapes::AbstractVector{T})
    starts = cumsum(vcat([1], prod.(shapes)))
    [reshape(view(data, starts[i]:(starts[i+1] - 1)), shapes[i]) for i in 1:length(shapes)]
end

viewblocks (generic function with 1 method)

In [122]:
predict = (flat_weights, shapes, x) -> begin
    weights = viewblocks(flat_weights, shapes)
    num_weights = sum(length, weights)
    biases = viewblocks(@view(flat_weights[(num_weights + 1):end]), head.(shapes))
    y = x
    J = eye(length(x))
    for i in 1:(length(shapes) - 1)
        w = weights[i]
        y = w * y .+ biases[i]
        J = w * J
#         for I in eachindex(y)
#             if y[I] >= 0
#                 z = 1
        z = [yy >= 0 ? 1.0 : 0.1 for yy in y]
#         z = y .>= 0
        y = y .* z
        J = J .* z
    end
    w = weights[end]
    vcat(vec(w * y), vec(w * J))
end

loss = (w, shapes, x, y) -> sumabs2(y - predict(w, shapes, x))

(::#225) (generic function with 1 method)

In [134]:
function train(w, shapes, data, lr=0.1, momentum=0.8)
    last_descent = zeros(w)
    for batch in batchview(data, 1)
        dw = zeros(w)
        for (x, y) in batch
            sample_grad = ReverseDiff.gradient(w -> loss(w, shapes, x, y), w)
            dw .+= sample_grad
        end
        for i in 1:length(w)
            v = lr * dw[i] + momentum * last_descent[i]
            w[i] -= v
            last_descent[i] = v
        end
    end
    w
end



train (generic function with 3 methods)

In [135]:
x = [1.0]
shapes = [(4, 1), (4, 4), (1, 4)]
w = rand(sum(prod, shapes) + sum(first, shapes))
# w = [0.2, 0.3, 0.4, 0.5, 1.0, 2.0, 3.0]

train_data = [
begin
    x = randn()
    if x >= 0
        [x], [x, 1]
    else
        [x], [-x, -1]
    end
    end for i in 1:10];

In [136]:
# for (x, y) in train_data
#     c = loss(w, shapes, x, y)
#     g = ReverseDiff.gradient(w -> loss(w, shapes, x, y), w)
#     for i in 1:length(w)
#         e = zeros(w)
#         e[i] = 0.0001
#         println((loss(w .+ e, shapes, x, y) - c) / e[i], "\t", g[i])
#     end
# end

In [137]:
@show w
@show predict(w, shapes, [1]) predict(w, shapes, [-1])

w = [0.434579,0.344954,0.621312,0.456559,0.538371,0.307844,0.225783,0.0948286,0.174821,0.885005,0.857429,0.240383,0.914718,0.697435,0.458682,0.329734,0.422081,0.898118,0.893225,0.287781,0.526699,0.0237905,0.87646,0.14588,0.594556,0.452657,0.0687968,0.999535,0.27735,0.65723,0.945202,0.80351,0.0107511]
predict(w,shapes,[1]) = [4.55267,1.60594]
predict(w,shapes,[-1]) = [1.81245,1.07556]


2-element Array{Float64,1}:
 1.81245
 1.07556

In [138]:
@showprogress for i in 1:100
    train(w, shapes, train_data, 0.01, 0.5)
end

Progress: 100%|█████████████████████████████████████████| Time: 0:00:01


In [139]:
@show w
@show predict(w, shapes, [1]) predict(w, shapes, [-1])

w = [0.436871,0.269599,1.12573,0.579702,0.266356,0.655231,-0.299986,-0.0506832,-0.0190307,1.12192,0.495532,0.137643,1.15925,0.11688,0.924261,0.553199,0.0483452,1.34398,0.231814,0.0535004,0.743828,-1.11148,1.04524,0.57733,0.686845,0.422013,0.0227893,0.922217,0.437219,0.172256,1.26896,0.896722,0.0107511]
predict(w,shapes,[1]) = [1.07606,1.02446]
predict(w,shapes,[-1]) = [1.03013,-1.01992]


2-element Array{Float64,1}:
  1.03013
 -1.01992

In [140]:
xx = linspace(-2, 2)
plt = plot(xx, x -> predict(w, shapes, [x])[1], ylim=(-1, 1), legend=nothing, linewidth=4)
plot!(plt, [x for (x, y) in train_data], [y for (x, y) in train_data], 
line=nothing,
markershape=:circle,
markersize=1
)
plt