In [1]:
using ReverseDiff
using ProgressMeter
using MLDataPattern
using Plots
gr()

Plots.GRBackend()

In [2]:
head(t::Tuple) = tuple(t[1])

function viewblocks{T <: NTuple}(data::AbstractArray, shapes::AbstractVector{T})
    starts = cumsum(vcat([1], prod.(shapes)))
    [reshape(view(data, starts[i]:(starts[i+1] - 1)), shapes[i]) for i in 1:length(shapes)]
end

viewblocks (generic function with 1 method)

In [145]:
predict = (flat_weights, shapes, x) -> begin
    weights = viewblocks(flat_weights, shapes)
    num_weights = sum(length, weights)
    biases = viewblocks(@view(flat_weights[(num_weights + 1):end]), head.(shapes))
    y = x
    J = eye(length(x))
    for i in 1:(length(shapes) - 1)
        w = weights[i]
        y = w * y .+ biases[i]
        J = w * J
        z = [yy >= 0 ? 1.0 : 0.1 for yy in y]
        y = y .* z
        J = J .* z
    end
    w = weights[end]
    vcat(vec(w * y), vec(w * J))
end

loss = (w, shapes, x, y) -> sumabs2(y - predict(w, shapes, x))

function train(w, shapes, data, lr=0.1, momentum=0.8)
    last_descent = zeros(w)
    for batch in batchview(data, 1)
        dw = zeros(w)
        for (x, y) in batch
            sample_grad = ReverseDiff.gradient(w -> loss(w, shapes, x, y), w)
            dw .+= sample_grad
        end
        for i in 1:length(w)
            v = lr * dw[i] + momentum * last_descent[i]
            w[i] -= v
            last_descent[i] = v
        end
    end
    w
end

x = [1.0]
shapes = [(4, 1), (4, 4), (1, 4)]
w = rand(sum(prod, shapes) + sum(first, shapes))
# w = [0.2, 0.3, 0.4, 0.5, 1.0, 2.0, 3.0]

train_data = [
begin
    x = randn()
    if x >= 0
        [x], [x, 1]
    else
        [x], [-x, -1]
    end
    end for i in 1:10];

@showprogress for i in 1:100
    train(w, shapes, train_data, 0.01, 0.5)
end

xx = linspace(-2, 2)
plt = plot(xx, x -> predict(w, shapes, [x])[1], ylim=(-1, 1), legend=nothing, linewidth=4)
plot!(plt, [x for (x, y) in train_data], [y for (x, y) in train_data], 
line=nothing,
markershape=:circle,
markersize=1
)
plt

Progress: 100%|█████████████████████████████████████████| Time: 0:00:01


In [144]:
predict = (flat_weights, shapes, x) -> begin
    weights = viewblocks(flat_weights, shapes)
    num_weights = sum(length, weights)
    biases = viewblocks(@view(flat_weights[(num_weights + 1):end]), head.(shapes))
    y = x
    J = eye(length(x))
    for i in 1:(length(shapes) - 1)
        w = weights[i]
        y = w * y .+ biases[i]
        J = w * J
        z = [yy >= 0 ? 1.0 : 0.1 for yy in y]
        y = y .* z
        J = J .* z
    end
    w = weights[end]
    vcat(vec(w * y), vec(w * J))
end

loss = (w, shapes, x, y) -> sumabs2(y[1] - predict(w, shapes, x)[1])

function train(w, shapes, data, lr=0.1, momentum=0.8)
    last_descent = zeros(w)
    for batch in batchview(data, 1)
        dw = zeros(w)
        for (x, y) in batch
            sample_grad = ReverseDiff.gradient(w -> loss(w, shapes, x, y), w)
            dw .+= sample_grad
        end
        for i in 1:length(w)
            v = lr * dw[i] + momentum * last_descent[i]
            w[i] -= v
            last_descent[i] = v
        end
    end
    w
end

x = [1.0]
shapes = [(4, 1), (4, 4), (1, 4)]
w = rand(sum(prod, shapes) + sum(first, shapes))
# w = [0.2, 0.3, 0.4, 0.5, 1.0, 2.0, 3.0]

train_data = [
begin
    x = randn()
    if x >= 0
        [x], [x, 1]
    else
        [x], [-x, -1]
    end
    end for i in 1:10];

@showprogress for i in 1:100
    train(w, shapes, train_data, 0.01, 0.5)
end

xx = linspace(-2, 2)
plt = plot(xx, x -> predict(w, shapes, [x])[1], ylim=(-1, 1), legend=nothing, linewidth=4)
plot!(plt, [x for (x, y) in train_data], [y for (x, y) in train_data], 
line=nothing,
markershape=:circle,
markersize=1
)
plt

Progress: 100%|█████████████████████████████████████████| Time: 0:00:01
