In [None]:
import Base.+, Base.*, Base.(.*)

In [None]:
type AD{T}
    value::T
    derivative::T
    derivativeOp::Function
    parents::Array{AD}
    
    function AD(val::T, grad::T) 
        if size(val) == ()
            return new(val, 0, ad_constD, Array(AD,0))
        else
            return new(val, grad, ad_constD, Array(AD,0))
        end
    end
end
AD{T}(val::T) = AD{T}(val, zeros(size(val)))

function ad_constD{T}(prevDerivative::T, adNodes::Array{AD})
    return 0
end

In [None]:
function ad_add{T}(a::AD{T}, b::AD{T})
    result = AD{T}(a.value + b.value)
    result.derivativeOp = ad_addD
    push!(result.parents, a)
    push!(result.parents, b)
    return result
end
function ad_addD{T}(prevDerivative::T, adNodes::Array{AD, 1})
    adNodes[1].derivative += 1 * prevDerivative
    adNodes[2].derivative += 1 * prevDerivative
    return
end
+(x::AD, y::AD) = ad_add(x, y)

In [None]:
function ad_mul{T}(a::AD{T}, b::AD{T})
    resultValue = a.value * b.value
    result = AD{T}(resultValue, zeros(size(resultValue)))
    result.derivativeOp = ad_mulD
    push!(result.parents, a)
    push!(result.parents, b)
    return result
end
function ad_mulD{T}(prevDerivative::T, adNodes::Array{AD,1})
    rowNode1, colNode1 = size(adNodes[1].value)
    rowNode2, colNode2 = size(adNodes[2].value)
    
    for i = 1:m1.rowNode1, j = 1:colNode2
        pd = prevDerivative[i,j]
        for k = 1:colNode1
            adNodes[1].derivative[i,k] += adNodes[2].value[k,j] * pd
            adNodes[2].derivative[k,j] += adNodes[1].value[i,k] * pd
        end
    end
    
end
*(x::AD, y::AD) = ad_mul(x, y)

In [None]:
function ad_elmul{T}(a::AD{T}, b::AD{T})
    result = AD{T}(a.value .* b.value)
    result.derivativeOp = ad_elmulD
    push!(result.parents, a)
    push!(result.parents, b)
    return result
end
function ad_elmulD{T}(prevDerivative::T, adNodes::Array{AD,1})
    adNodes[1].derivative += adNodes[2].value * prevDerivative
    adNodes[2].derivative += adNodes[1].value * prevDerivative
    return
end
.*(x::AD, y::AD) = ad_elmul(x, y)

In [None]:
function tanh{T}(a::AD{T})
    result = AD{T}(tanh(a.value))
    result.derivativeOp = ad_tanhD
    push!(result.parents, a)
    return result
end
functino ad_tanhD{T}(prevDerivative::T, adNodes::Array{AD,1})
    adNodes[1].derivative += (1. - adNodes[1].value.^2) * prevDerivative             
    return
end

In [None]:
function sigmoid{T}(a::AD{T})
    result = AD{T}(1.0 / (1.0 + exp(-a.value)))
    result.derivativeOp = ad_sigmoidD
    push!(result.parents, a)
    return result
end
function ad_sigmoidD{T}(prevDerivative::T, adNodes::Array{AD,1})
    adNodes[1].derivative += adNodes[1].value * (1. - adNodes[1].value) * prevDerivative     
    return
end


In [None]:
function relu{T}(a::AD{T})
    result = AD{T}(max(0.0, a.value))
    result.derivativeOp = ad_reluD
    push!(result.parents, a)
    return result
end
function ad_reluD{T}(prevDerivative::T, adNodes::Array{AD,1})
    grad = ones(a.value)
    grad[a.<=0] = 0
    adNodes[1].derivative += grad * prevDerivative     
    return
end


In [None]:
x = AD{Float64}(2.0)

In [None]:
y = AD{Array{Float64,2}}(randn(2,2))

In [None]:
z = AD{Array{Float64,1}}([1.0,2.0])

In [None]:
a1 = AD{Array{Float64,2}}(ones(2,2)*2)
a2 = AD{Array{Float64,2}}(ones(2,2))

In [None]:
function f(x::AD,y::AD)
    (x+y)*(y + AD{Array{Float64,2}}(ones(2,2)))
end 

In [None]:
f(a1,a2)

In [None]:
function backprop(graph::AD)
    current = graph
    # set the derivative to 1
    current.derivative = current.derivative*0 + 1.0
    bfs = [current]
    while length(bfs) != 0
        currDerivative = current.derivative
        #println(typeof(currDerivative))
        current.derivativeOp(currDerivative, current.parents)
        numParents = length(current.parents)
        for i=1:numParents 
            push!(bfs, current.parents[i])
        end
        current = pop!(bfs)
    end
    return graph
end

In [None]:
backprop(f(a1,a2))

In [None]:
a1

In [None]:
a2