In [1]:
# Activations (ϕ)
function ReLU(x::T)::T where T<:Real 
    return max(x, 0)
end

function LeakyReLU(x::T)::T where T<:Real 
    return max(x, 0.01x)
end

function sigmoid(x::T)::T where T<:Real 
    return 1.0 / (1 + exp(-x))
end

# Derivatives
function ∇ReLU(x::T)::T where T<:Real 
    return x > 0
end

function ∇LeakyReLU(x::T)::T where T<:Real 
    return x < 0 ? 0.01 : 1.0
end

function ∇sigmoid(x::T)::T where T<:Real
    y = sigmoid(x)
    return y * (1 - y)
end;

In [2]:
function gradient(f::Function)
    if f == ReLU
        ∇f = ∇ReLU
    elseif f == LeakyReLU
        ∇f = ∇LeakyReLU
    elseif f == sigmoid
        ∇f = ∇sigmoid
    elseif f == mse
        ∇f = ∇mse
    end
    
    return ∇f
end

gradient (generic function with 1 method)

In [3]:
# Error Calculations

# Mean Squared Error
function mse(x::T, target::T) where T<:Real
    return .5(target-x)^2
end

function mse(xs::Vector{T}, target::T) where T<:Real 
    err(x) = target - x
    return sum(err.(xs).^2)/2*length(xs)
end

function mse(xs::Vector{T}, target::Vector{T}) where T<:Real
    sum((xs .- target).^2)/2*length(xs)
end

# Derivatives
function ∇mse(x::T, target::T) where T<:Real
    return x - target
end

function ∇mse(xs::Vector{T}, target::T) where T<:Real
    return xs .- target
end

function ∇mse(xs::Vector{T}, target::Vector{T}) where T<:Real
    return xs .- target
end    

∇mse (generic function with 3 methods)

In [4]:
abstract type Layer end

In [5]:
mutable struct FeedForwardLayer <: Layer
    neurons::Array
    bias::Array
    
    ϕ::Function
    ∇ϕ::Function
    
    input::Array
    net::Array
    output::Array
      
    function FeedForwardLayer(input_dim::Int, output_dim::Int, ϕ::Function)
        neurons = randn(input_dim, output_dim)
        bias = randn(1, output_dim)
        return new(neurons, bias, ϕ, gradient(ϕ))
    end
    
    function FeedForwardLayer(neurons::Array, bias::Vector, ϕ::Function)
        return new(neurons, bias, ϕ, gradient(ϕ))
    end
end

In [6]:
mutable struct FeedForwardOutputLayer <: Layer
    neurons::Array
    bias::Array
    
    ϕ::Function
    ∇ϕ::Function
    
    input::Array
    net::Array
    output::Array
      
    function FeedForwardOutputLayer(input_dim::Int, output_dim::Int, ϕ::Function)
        neurons = randn(input_dim, output_dim)
        bias = zeros(1, output_dim)
        return new(neurons, bias, ϕ, gradient(ϕ))
    end
end

In [7]:
abstract type NeuralNetwork end

In [8]:
mutable struct FeedForwardNeuralNetwork <: NeuralNetwork
    layers::Array{Layer}
    η::Float64
    
    loss::Function
    ∇loss::Function
    
    function FeedForwardNeuralNetwork(input_dim::Int, hidden_dims::Vector, output_dim::Int, ϕ::Vector{Function}, loss::Function, η=0.01)
        layers = []
        
        push!(layers, FeedForwardLayer(input_dim, hidden_dims[1], ϕ[1]))
        
        for i in 1:length(hidden_dims)-1
            push!(layers, FeedForwardLayer(hidden_dims[i], hidden_dims[i+1], ϕ[i+1]))
        end
        
        push!(layers, FeedForwardOutputLayer(hidden_dims[end], output_dim, ϕ[end]))
        
        return new(layers, η, loss, gradient(loss))
    end
end

In [9]:
function predict(model::FeedForwardNeuralNetwork, data)
    for layer in model.layers
        data = data * layer.neurons .+ layer.bias
        data = layer.ϕ.(data)
    end
    return data[:,1]
end

predict (generic function with 1 method)

In [10]:
function train(model::FeedForwardNeuralNetwork, data, target, epochs=1, clear=true)
    for i in 1:epochs
        forwardpass(model, data)
        backprop(model, target)
    end
    if clear
        for i in 1:length(model.layers)
            model.layers[i].input = []
            model.layers[i].net = []
            model.layers[i].output = []
        end
    end
end

train (generic function with 3 methods)

In [11]:
function forwardpass(model::FeedForwardNeuralNetwork, data)
    for layer in model.layers 
        layer.input = data
        layer.net = data * layer.neurons .+ layer.bias
        layer.output = layer.ϕ.(layer.net)
        data = layer.output
    end
    return data
end        

forwardpass (generic function with 1 method)

In [12]:
function backprop(model::FeedForwardNeuralNetwork, target)    
    # Work backwards from output layer
    layer = model.layers[end]
    # Calculate partial derivative of error with respect to each weight
    # ∂E/∂wᵢⱼ = ∂E/∂oⱼ * ∂oⱼ/∂netⱼ * ∂netⱼ/∂wᵢⱼ
    # Partial derivative of loss
    ∂E_∂o = model.∇loss.(layer.output, target)
    # Partial derivative of activated output
    ∂o_∂net = layer.∇ϕ.(layer.net)
    # δ=∂E/∂net
    # Error with respect to net -- the error terms
    δ = ∂E_∂o .* ∂o_∂net
    # ∂net/∂w is equal to the previous layer's output (https://bit.ly/backproperror)
    ∂net_∂w = layer.input'
    # Calculate delta terms for the neurons and adjust by the learning rate
    η = model.η
    Δw = -η * ∂net_∂w * δ
    # Update the weights of the output layer
    layer.neurons += Δw
    # Output layer has no bias, so no need to update it
    # Now do the rest of the layers in reverse order
    for i in length(model.layers)-1:-1:1
        layer = model.layers[i]
        # Error term of this layer is based on the error of the following layer
        ∂E_∂o = δ * model.layers[i+1].neurons'
        # The next part is the same
        ∂o_∂net = layer.∇ϕ.(layer.net)
        δ = ∂E_∂o .* ∂o_∂net
        ∂net_∂w = layer.input' 
        Δw = -η * ∂net_∂w * δ
        # Update the neurons
        layer.neurons += Δw
        # Update the bias
        layer.bias = layer.bias .+ (-η * δ)
    end   
end

backprop (generic function with 1 method)

In [108]:
function fit(model::FeedForwardNeuralNetwork, data::Array{T}, target::Vector{T}, epochs::Int, showtime=false) where T<:Real
    prediction = predict(model, data)
    @show loss(prediction, target)
    print("Training for ", epochs, " epochs.")
    if showtime
        @time train(model, data, target, epochs)
    else
        train(model, data, target, epochs)
    end
    prediction = predict(model, data)
    @show loss(prediction, target)
end

fit (generic function with 2 methods)

In [110]:
inputsize = 32
hidden_layers = [64,64,32,32,16,16,8,4,2]
output_size = 1
activations = vcat([LeakyReLU],repeat([sigmoid], length(hidden_layers)+1))
loss=mse;

m = FeedForwardNeuralNetwork(inputsize, hidden_layers, output_size, activations, loss);

samples = 32
v = randn(samples, inputsize)
t = rand([0.,1.], samples)

fit(m, v, t, 100000, true);

loss(prediction, target) = 115.38295454406902
Training for 100000 epochs. 30.526745 seconds (24.80 M allocations: 68.584 GiB, 7.05% gc time)
loss(prediction, target) = 0.017017430486698907
