# MLP on MINST Dataset

In [1]:
Pkg.update()
Pkg.add("MNIST")

INFO: Updating METADATA...
INFO: Computing changes...
INFO: No packages to install, update or remove
INFO: Nothing to be done


In [2]:
using MNIST
features = trainfeatures(1)
label = trainlabel(1)

trainX, trainY = traindata()
testX, testY = testdata()

(
[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0],

[7.0,2.0,1.0,0.0,4.0,1.0,4.0,9.0,5.0,9.0  …  7.0,8.0,9.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0])

In [3]:
abstract Layer
abstract Nonlinearity <: Layer
abstract LossCriteria <: Layer

In [4]:
# Define the Fully Connected layers
type FCLayer <: Layer
    W           :: Array{Float64}
    last_input  :: Array{Float64}
    last_output :: Array{Float64}
    last_loss   :: Array{Float64}

    function FCLayer(i, o)
        return new(rand(o,i), zeros(i), zeros(o), zeros(o))
    end
end

function forward(l::FCLayer, x::Array{Float64,1})
    @assert ndims(x) == 1 && size(x) == (size(l.W)[2],)
    l.last_input  = x
    l.last_output = l.W * x # matrix multiplication
    l.last_output
end

function backward(l::FCLayer, loss::Array{Float64,1})
    @assert size(loss) == (size(l.W)[1],)
    l.last_loss = loss
    println("At FC loss is:")
    println(loss)
    l.W'*loss
end

function gradient(l::FCLayer)
    @assert size(l.last_loss) == (size(l.W)[1],)
    l.last_loss * l.last_input'
end

function getParam(l::FCLayer)
    l.W
end

function setParam(l::FCLayer, theta::Array{Float64})
    @assert size(l.W) == size(theta)
    l.W = theta
end

l = FCLayer(10,20)
forward(l, rand(10))

20-element Array{Float64,1}:
 3.00715
 2.20949
 1.87556
 2.18064
 2.56177
 2.95496
 2.66149
 2.28718
 1.37536
 2.92693
 1.62535
 2.49764
 3.1417 
 2.42541
 2.63822
 2.48069
 2.31803
 2.09132
 1.8487 
 2.63835

In [5]:
# Define the ReLu layers
type ReLu <: Nonlinearity
    alpha       :: Float64
    last_input  :: Array{Float64}
    last_output :: Array{Float64}
    last_loss   :: Array{Float64}
    function ReLu(alpha::Float64 = 1.0)
        @assert alpha >= 0.
        return new(alpha, Float64[], Float64[], Float64[])
    end
end

function forward(l::ReLu, x::Array{Float64})
    l.last_input  = x
    l.last_output = map(y -> max(0., y*l.alpha), x)
    l.last_output
end

function backward(l::ReLu, loss::Array{Float64})
    @assert size(l.last_input) == size(loss)
    println("At ReLu loss is:")
    println(loss)
    l.last_loss = loss
    map(idx -> l.last_input[idx]>=0 ? l.last_input[idx]*l.alpha*loss[idx] : 0., 1:length(l.last_input))
end

function gradient(l::ReLu)
    0
end

function getParam(l::ReLu)
    0
end

function setParam(l::ReLu, theta)
    nothing
end

l = ReLu()
#println(forward(l, [1.,0.,-1.,2.]))
#println(backward(l, [3.0,2.0,1.,1.0]))

ReLu(1.0,Float64[],Float64[],Float64[])

In [10]:
type CrossEntropyLoss <: LossCriteria
    last_loss  :: Array{Float64}
    last_input :: Array{Float64}
    function CrossEntropyLoss()
        return new(Float64[], Float64[])
    end
end    

function forward(l::CrossEntropyLoss, y::Array{Float64,1}, label::Array{Float64, 1})
    """
    [label]  label[i] == 1 iff the data is classified to class i
    [y]      final input to the loss layer
    """
    class = convert(Int64,label[1]) + 1
    #println("y is:")
    #println(y)
    #println("y - max : ")
    #println(y-maximum(y))
    #println("after: ")
    #println( -log(e .^ (y-maximum(y)) ./ sum(e .^ (y-maximum(y))))[class])
    return ( -log(e .^ (y-maximum(y)) ./ sum(e .^ (y-maximum(y))))[class])
end

function backward(l::CrossEntropyLoss, x::Array{Float64,1}, label::Array{Float64, 1})
    """
    [label]  label[i] == 1 iff the data is classified to class i
    [y]      final input to the loss layer
    """
    class = convert(Int64,label[1]) + 1
    println("x is:")
    println(x)
    max = maximum(x)
    y = e.^(x-max) / sum(e.^(x-max))
    println("y is:")
    println(y)
    map(j -> class==j ? y[class]*(1-y[class]) : -y[class]*y[j], 1:length(x))
end
l = CrossEntropyLoss()
#println(forward(l, [1.,2.,0.], [2.]))
#println(backward(l, [1.,2.,0.], [2.]))



CrossEntropyLoss(Float64[],Float64[])

In [11]:
abstract NN
type SequentialNet <: NN
    layers :: Array{Layer}
    lossfn :: LossCriteria
    function SequentialNet(layers::Array{Layer}, lossfn::LossCriteria)
        return new(layers, lossfn)
    end
end

function forward(net::SequentialNet, x::Array{Float64}, label::Array)
    local inp = x
    for i = 1:length(net.layers)
        inp = forward(net.layers[i], inp)
    end
    forward(net.lossfn, inp, label)
end

function backward(net::SequentialNet, label)
    dldy = backward(net.lossfn, net.layers[end].last_output, label)
    for i = length(net.layers):-1:1
        dldy = backward(net.layers[i], dldy)
    end
    dldy
end



backward (generic function with 4 methods)

In [12]:
layers = [
    FCLayer(784, 196),
    ReLu(),
    FCLayer(196, 49),
    ReLu(),
    FCLayer(49, 10)
]
criteria = CrossEntropyLoss()
net = SequentialNet(layers, criteria)



SequentialNet(Layer[FCLayer([0.533476 0.303003 … 0.784888 0.302405; 0.349903 0.736807 … 0.681029 0.412359; … ; 0.650014 0.487649 … 0.663823 0.927065; 0.80566 0.328152 … 0.177403 0.376933],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]),ReLu(1.0,Float64[],Float64[],Float64[]),FCLayer([0.632915 0.651604 … 0.958586 0.764563; 0.983465 0.00543561 … 0.60189 0.754944; … ; 0.334489 0.937567 … 0.205088 0.49887; 0.223643 0.438633 … 0.000682089 0.0844673],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]),ReLu(1.0,Float64[],Float64[],Float64[]),FCLayer([0.366336 0.197067 … 0.609101 0.3

In [None]:
function sgd(net::SequentialNet, batch_X, batch_Y, lr::Float64 = 0.0001)
    batch_size = size(batch_X)[1]
    ttl_loss   = 0.
    for b = 1:batch_size
        X, Y = batch_X[b,:], batch_Y[b,:]
        loss = forward(net, X, Y) # Propogate the input and output, calculate the loss
        #println(net.layers[1].last_output)
        backward(net, Y) # Propagate the dldy
        for l = 1:length(net.layers)
            layer = net.layers[l]
            #println(layer.last_loss)
            setParam(layer, getParam(layer) - lr * gradient(layer) / batch_size )
        end
        ttl_loss += loss
    end
    ttl_loss
end

function train(net::SequentialNet, X, Y)
    batch_size, N = 64, size(Y)[1]
    batch=0
    for epo = 1:100
        println("Epo $(epo):")
        for bid = 0:ceil(length(X)/batch_size)-1
            batch += 1
            sidx::Int = convert(Int64, bid*batch_size+1)
            eidx::Int = convert(Int64, min(N, (bid+1)*batch_size))
            println("$(sidx)  $(eidx)")
            batch_X = X[sidx:eidx,:]
            batch_Y = Y[sidx:eidx,:]
            loss = sgd(net, batch_X, batch_Y)
            println("Loss is:")
            println(loss)
            #println("[Epo $(epo) : batch $(batch)]: loss = $(loss)")
        end
    end
end

if size(trainX)[1] != 60000
    trainX = trainX'
end
@assert size(trainX)[1] == size(trainY)[1]
println(size(trainX), size(trainY))

train(net, trainX, trainY)

(60000,784)(60000,)




Epo 1:
1  64
x is:
[2.76072e7,3.00977e7,3.30141e7,3.41204e7,3.59269e7,2.94417e7,3.615e7,3.11444e7,3.42941e7,2.89352e7]
y is:
[0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0]
At FC loss is:
[-0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0]
At ReLu loss is:
[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
At FC loss is:
[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
At ReLu loss is:
[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0

ErrorException: schedule: Task not runnable

ERROR (unhandled task failure): schedule: Task not runnable
 in enq_work(::Task) at ./event.jl:77
 in uv_writecb_task(::Ptr{Void}, ::Int32) at ./stream.jl:873
 in jlcapi_uv_writecb_task_22134 at /Applications/Julia-0.5.app/Contents/Resources/julia/lib/julia/sys.dylib:?
 in process_events(::Bool) at ./libuv.jl:84
 in wait() at ./event.jl:188
 in wait(::Condition) at ./event.jl:27
 in wait_readnb(::Base.PipeEndpoint, ::Int64) at ./stream.jl:303
 in eof(::Base.PipeEndpoint) at ./stream.jl:58
 in watch_stream(::Base.PipeEndpoint, ::String) at /Users/jennyw/.julia/v0.5/IJulia/src/stdio.jl:45
 in (::IJulia.##19#23)() at ./task.jl:360


In [170]:
minimum(trainX[1,:])

0.0