In [46]:
using ReverseDiff
using ProgressMeter
using Base.Test
using MLDataPattern
using Plots
gr()

Plots.GRBackend()

In [93]:
module Nets

import ReverseDiff
using MLDataPattern: batchview, shuffleobs

head(t::Tuple) = tuple(t[1])

function viewblocks{T <: NTuple}(data::AbstractArray, shapes::AbstractVector{T})
    starts = cumsum(vcat([1], prod.(shapes)))
    [reshape(view(data, starts[i]:(starts[i+1] - 1)), shapes[i]) for i in 1:length(shapes)]
end

type PANet{Sensitive}
    shapes::Vector{NTuple{2, Int}}
end

nweights(net::PANet) = sum(prod, net.shapes)
nbiases(net::PANet) = sum(first, net.shapes)
nparams(net::PANet) = nweights(net) + nbiases(net)
Base.rand(net::PANet) = rand(nparams(net))
Base.randn(net::PANet) = randn(nparams(net))

function predict(net::PANet{true}, params::AbstractVector, x::AbstractVector)
    weights = viewblocks(params, net.shapes)
    biases = viewblocks(@view(params[(nweights(net) + 1):end]), head.(net.shapes))
    y = similar(x, Base.promote_eltype(params, x), (length(x), 1))
    y .= x
    J = eye(eltype(y), length(x))
    for i in 1:(length(net.shapes) - 1)
        w = weights[i]
        y = w * y .+ biases[i]
        J = w * J
        z = [yy >= 0 ? 1.0 : 0.1 for yy in y]
        y = y .* z
        J = J .* z
    end
    w = weights[end]
    vcat(vec(w * y), vec(w * J))
end

function predict(net::PANet{false}, params::AbstractVector, x::AbstractVector)
    weights = viewblocks(params, net.shapes)
    biases = viewblocks(@view(params[(nweights(net) + 1):end]), head.(net.shapes))
    y = similar(x, Base.promote_eltype(params, x), (length(x), 1))
    y .= x
    for i in 1:(length(net.shapes) - 1)
        w = weights[i]
        y = w * y .+ biases[i]
        z = [yy >= 0 ? 1.0 : 0.1 for yy in y]
        y = y .* z
    end
    w = weights[end]
    vec(w * y)
end

function sgd!(loss, params, data, lr=0.01, momentum=0.8, batchsize=1)
    last_descent = zeros(params)
    dw = zeros(params)
    dw_sample = zeros(dw)
    for batch in batchview(shuffleobs(data), batchsize)
        dw .= 0
        for (x, y) in batch
            ReverseDiff.gradient!(dw_sample, w -> loss(w, x, y), params)
            if any(isnan.(dw_sample))
                @show params x y dw_sample
                error("nan")
            end
            dw .+= dw_sample
        end
        for i in eachindex(params)
            v = lr * dw[i] + momentum * last_descent[i]
            params[i] -= v
            last_descent[i] = v
        end
    end
    params
end

end



Nets

In [94]:
data = [
begin
    x = randn()
    if x >= 0
        [x], [x, 1]
    else
        [x], [-x, -1]
    end
    end for i in 1:20];
train_data, test_data = splitobs(data, 0.5)


shapes = [(4, 1), (4, 4), (1, 4)]
net = Nets.PANet{true}(shapes)
params = randn(net)
@inferred Nets.predict(net, params, [0])
predict = (params, x) -> Nets.predict(net, params, x)
loss = (params, x, y) -> sumabs2(y - predict(params, x))

(::#83) (generic function with 1 method)

In [96]:
@showprogress for i in 1:10
    Nets.sgd!(loss, params, train_data)
end

xx = linspace(-2, 2)
plt = plot(xx, x -> predict(params, [x])[1], ylim=(-1, 1), legend=nothing, linewidth=4)
plot!(plt, [x for (x, y) in train_data], [y for (x, y) in train_data], 
line=nothing,
markershape=:circle,
markersize=1
)
plt

Progress:  80%|█████████████████████████████████        |  ETA: 0:00:00Progress: 100%|█████████████████████████████████████████| Time: 0:00:00


In [28]:
Nets.predict(Nets.PANet{false}(shapes), params, [0])

1-element Array{Float64,1}:
 1.18915

In [None]:


function sgd(net, params, data, lr=0.1, momentum=0.8, batchsize=1)
    last_descent = zeros(w)
    for batch in batchview(shuffleobs(data), batchsize)
        dw = zeros(w)
        for (x, y) in batch
            sample_grad = ReverseDiff.gradient(w -> loss(w, shapes, x, y), w)
            dw .+= sample_grad
        end
        for i in 1:length(w)
            v = lr * dw[i] + momentum * last_descent[i]
            w[i] -= v
            last_descent[i] = v
        end
    end
    w
end

function train(w, shapes, data, lr=0.1, momentum=0.8)
    last_descent = zeros(w)
    for batch in batchview(data, 1)
        dw = zeros(w)
        for (x, y) in batch
            sample_grad = ReverseDiff.gradient(w -> loss(w, shapes, x, y), w)
            dw .+= sample_grad
        end
        for i in 1:length(w)
            v = lr * dw[i] + momentum * last_descent[i]
            w[i] -= v
            last_descent[i] = v
        end
    end
    w
end

w = rand(sum(prod, shapes) + sum(first, shapes))

@showprogress for i in 1:100
    train(w, shapes, train_data, 0.01, 0.5)
end

xx = linspace(-2, 2)
plt = plot(xx, x -> predict(w, shapes, [x])[1], ylim=(-1, 1), legend=nothing, linewidth=4)
plot!(plt, [x for (x, y) in train_data], [y for (x, y) in train_data], 
line=nothing,
markershape=:circle,
markersize=1
)
plt

In [148]:
predict = (flat_weights, shapes, x) -> begin
    weights = viewblocks(flat_weights, shapes)
    num_weights = sum(length, weights)
    biases = viewblocks(@view(flat_weights[(num_weights + 1):end]), head.(shapes))
    y = x
    J = eye(length(x))
    for i in 1:(length(shapes) - 1)
        w = weights[i]
        y = w * y .+ biases[i]
        J = w * J
        z = [yy >= 0 ? 1.0 : 0.1 for yy in y]
        y = y .* z
        J = J .* z
    end
    w = weights[end]
    vcat(vec(w * y), vec(w * J))
end

# Include gradients in loss function
loss = (w, shapes, x, y) -> sumabs2(y - predict(w, shapes, x))

function train(w, shapes, data, lr=0.1, momentum=0.8)
    last_descent = zeros(w)
    for batch in batchview(data, 1)
        dw = zeros(w)
        for (x, y) in batch
            sample_grad = ReverseDiff.gradient(w -> loss(w, shapes, x, y), w)
            dw .+= sample_grad
        end
        for i in 1:length(w)
            v = lr * dw[i] + momentum * last_descent[i]
            w[i] -= v
            last_descent[i] = v
        end
    end
    w
end

w = rand(sum(prod, shapes) + sum(first, shapes))

@showprogress for i in 1:100
    train(w, shapes, train_data, 0.01, 0.5)
end

xx = linspace(-2, 2)
plt = plot(xx, x -> predict(w, shapes, [x])[1], ylim=(-1, 1), legend=nothing, linewidth=4)
plot!(plt, [x for (x, y) in train_data], [y for (x, y) in train_data], 
line=nothing,
markershape=:circle,
markersize=1
)
plt

Progress: 100%|█████████████████████████████████████████| Time: 0:00:01


In [151]:
predict = (flat_weights, shapes, x) -> begin
    weights = viewblocks(flat_weights, shapes)
    num_weights = sum(length, weights)
    biases = viewblocks(@view(flat_weights[(num_weights + 1):end]), head.(shapes))
    y = x
    J = eye(length(x))
    for i in 1:(length(shapes) - 1)
        w = weights[i]
        y = w * y .+ biases[i]
        J = w * J
        z = [yy >= 0 ? 1.0 : 0.1 for yy in y]
        y = y .* z
        J = J .* z
    end
    w = weights[end]
    vcat(vec(w * y), vec(w * J))
end

# Loss function with no gradients
loss = (w, shapes, x, y) -> sumabs2(y[1] - predict(w, shapes, x)[1])

function train(w, shapes, data, lr=0.1, momentum=0.8)
    last_descent = zeros(w)
    for batch in batchview(data, 1)
        dw = zeros(w)
        for (x, y) in batch
            sample_grad = ReverseDiff.gradient(w -> loss(w, shapes, x, y), w)
            dw .+= sample_grad
        end
        for i in 1:length(w)
            v = lr * dw[i] + momentum * last_descent[i]
            w[i] -= v
            last_descent[i] = v
        end
    end
    w
end

w = rand(sum(prod, shapes) + sum(first, shapes))

@showprogress for i in 1:100
    train(w, shapes, train_data, 0.01, 0.5)
end

xx = linspace(-2, 2)
plt = plot(xx, x -> predict(w, shapes, [x])[1], ylim=(-1, 1), legend=nothing, linewidth=4)
plot!(plt, [x for (x, y) in train_data], [y for (x, y) in train_data], 
line=nothing,
markershape=:circle,
markersize=1
)
plt

Progress: 100%|█████████████████████████████████████████| Time: 0:00:01
