In [46]:
using ReverseDiff
using ProgressMeter
using Base.Test
using MLDataPattern
using Plots
gr()

Plots.GRBackend()

In [93]:
module Nets

import ReverseDiff
using MLDataPattern: batchview, shuffleobs

head(t::Tuple) = tuple(t[1])

function viewblocks{T <: NTuple}(data::AbstractArray, shapes::AbstractVector{T})
    starts = cumsum(vcat([1], prod.(shapes)))
    [reshape(view(data, starts[i]:(starts[i+1] - 1)), shapes[i]) for i in 1:length(shapes)]
end

type PANet{Sensitive}
    shapes::Vector{NTuple{2, Int}}
end

nweights(net::PANet) = sum(prod, net.shapes)
nbiases(net::PANet) = sum(first, net.shapes)
nparams(net::PANet) = nweights(net) + nbiases(net)
Base.rand(net::PANet) = rand(nparams(net))
Base.randn(net::PANet) = randn(nparams(net))

function predict(net::PANet{true}, params::AbstractVector, x::AbstractVector)
    weights = viewblocks(params, net.shapes)
    biases = viewblocks(@view(params[(nweights(net) + 1):end]), head.(net.shapes))
    y = similar(x, Base.promote_eltype(params, x), (length(x), 1))
    y .= x
    J = eye(eltype(y), length(x))
    for i in 1:(length(net.shapes) - 1)
        w = weights[i]
        y = w * y .+ biases[i]
        J = w * J
        z = [yy >= 0 ? 1.0 : 0.1 for yy in y]
        y = y .* z
        J = J .* z
    end
    w = weights[end]
    vcat(vec(w * y), vec(w * J))
end

function predict(net::PANet{false}, params::AbstractVector, x::AbstractVector)
    weights = viewblocks(params, net.shapes)
    biases = viewblocks(@view(params[(nweights(net) + 1):end]), head.(net.shapes))
    y = similar(x, Base.promote_eltype(params, x), (length(x), 1))
    y .= x
    for i in 1:(length(net.shapes) - 1)
        w = weights[i]
        y = w * y .+ biases[i]
        z = [yy >= 0 ? 1.0 : 0.1 for yy in y]
        y = y .* z
    end
    w = weights[end]
    vec(w * y)
end

function sgd!(loss, params, data, lr=0.01, momentum=0.8, batchsize=1)
    last_descent = zeros(params)
    dw = zeros(params)
    dw_sample = zeros(dw)
    for batch in batchview(shuffleobs(data), batchsize)
        dw .= 0
        for (x, y) in batch
            ReverseDiff.gradient!(dw_sample, w -> loss(w, x, y), params)
            if any(isnan.(dw_sample))
                @show params x y dw_sample
                error("nan")
            end
            dw .+= dw_sample
        end
        for i in eachindex(params)
            v = lr * dw[i] + momentum * last_descent[i]
            params[i] -= v
            last_descent[i] = v
        end
    end
    params
end

end



Nets

In [126]:
data = [
begin
    x = randn()
    if x >= 0
        [x], [x, 1]
    else
        [x], [-x, -1]
    end
    end for i in 1:20];
train_data, test_data = splitobs(data, 0.5)


shapes = [(4, 1), (4, 4), (1, 4)]
net = Nets.PANet{true}(shapes)
params = randn(net)
@inferred Nets.predict(net, params, [0])

predict = (params, x) -> Nets.predict(net, params, x)
loss = (params, x, y) -> sumabs2(y - predict(params, x))

@showprogress for i in 1:20
    Nets.sgd!(loss, params, train_data)
end

@show mean(xy -> loss(params, xy[1], xy[2]), test_data)

xx = linspace(-2, 2)
plt = plot(xx, x -> predict(params, [x])[1], ylim=(-1, 1), legend=nothing, linewidth=4)
plot!(plt, [x for (x, y) in train_data], [y for (x, y) in train_data], 
line=nothing,
markershape=:circle,
markercolor=:green,
markersize=3
)
plot!(plt, [x for (x, y) in test_data], [y for (x, y) in test_data], 
line=nothing,
markershape=:circle,
markercolor=:red,
markersize=3
)
plt

Progress:   5%|██                                       |  ETA: 0:00:02Progress:  50%|████████████████████                     |  ETA: 0:00:00

mean((xy->begin  # In[126], line 25:
            loss(params,xy[1],xy[2])
        end),test_data) = 0.07861941889925812


Progress:  95%|███████████████████████████████████████  |  ETA: 0:00:00Progress: 100%|█████████████████████████████████████████| Time: 0:00:00


In [127]:
data = [
begin
    x = randn()
    if x >= 0
        [x], [x]
    else
        [x], [-x]
    end
    end for i in 1:20];
train_data, test_data = splitobs(data, 0.5)


shapes = [(4, 1), (4, 4), (1, 4)]
net = Nets.PANet{false}(shapes)
params = randn(net)
@inferred Nets.predict(net, params, [0])

predict = (params, x) -> Nets.predict(net, params, x)
loss = (params, x, y) -> sumabs2(y - predict(params, x))

@showprogress for i in 1:10
    Nets.sgd!(loss, params, train_data)
end

@show mean(xy -> loss(params, xy[1], xy[2]), test_data)

xx = linspace(-2, 2)
plt = plot(xx, x -> predict(params, [x])[1], ylim=(-1, 1), legend=nothing, linewidth=4)
plot!(plt, [x for (x, y) in train_data], [y for (x, y) in train_data], 
line=nothing,
markershape=:circle,
markercolor=:green,
markersize=3
)
plot!(plt, [x for (x, y) in test_data], [y for (x, y) in test_data], 
line=nothing,
markershape=:circle,
markercolor=:red,
markersize=3
)
plt

Progress:  10%|████                                     |  ETA: 0:00:01Progress: 100%|█████████████████████████████████████████| Time: 0:00:00


mean((xy->begin  # In[127], line 25:
            loss(params,xy[1],xy[2])
        end),test_data) = 0.4778154980705157
