# MLP on MINST Dataset

In [1]:
Pkg.update()
Pkg.add("MNIST")

INFO: Updating METADATA...
INFO: Computing changes...
INFO: No packages to install, update or remove
INFO: Nothing to be done


In [1]:
using MNIST
features = trainfeatures(1)
label = trainlabel(1)

trainX, trainY = traindata()
testX, testY = testdata()

(
[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0],

[7.0,2.0,1.0,0.0,4.0,1.0,4.0,9.0,5.0,9.0  …  7.0,8.0,9.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0])

In [2]:
abstract Layer
abstract Nonlinearity <: Layer
abstract LossCriteria <: Layer

In [55]:
# Define the Fully Connected layers
type FCLayer <: Layer
    W           :: Array{Float64}
    last_input  :: Array{Float64}
    last_output :: Array{Float64}
    last_loss   :: Array{Float64}

    function FCLayer(i, o)
        return new(rand(o,i), zeros(i), zeros(o), zeros(o))
    end
end

function forward(l::FCLayer, x::Array{Float64,1})
    @assert size(x) == (size(l.W)[2],)
    l.last_input  = x
    l.last_output = l.W * x # matrix multiplication
    l.last_output
end

function backward(l::FCLayer, loss::Array{Float64,1})
    @assert size(loss) == (size(l.W)[1],)
    l.last_loss = loss
    l.W'*loss 
end

function gradient(l::FCLayer)
    @assert size(loss) == (size(l.W)[1],)
    l.W .* l.last_loss
end

function getParam(l::FCLayer)
    l.W
end

function setParam(l::FCLayer, theta::Array{Float64})
    @assert size(l.W) == size(theta)
    l.W = theta
end

FCLayer(10,20)




FCLayer([0.479665 0.0555237 … 0.913258 0.0387839; 0.0890121 0.664964 … 0.31888 0.77842; … ; 0.547448 0.339538 … 0.408708 0.869741; 0.268743 0.395923 … 0.65829 0.431577],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])

In [56]:
# Define the ReLu layers
type ReLu <: Nonlinearity
    alpha       :: Float64
    last_input  :: Array{Float64}
    last_output :: Array{Float64}
    last_loss   :: Array{Float64}
    function ReLu(alpha::Float64 = 1.0)
        @assert alpha >= 0.
        return new(alpha, Float64[], Float64[], Float64[])
    end
end

function forward(l::ReLu, x::Array{Float64})
    l.last_input  = x
    l.last_output = map(y -> max(0., y*l.alpha), x)
    l.last_output
end

function backward(l::ReLu, loss::Array{Float64})
    @assert size(l.last_input) == size(loss)
    l.last_loss = loss
    map(idx -> l.last_input[idx]>=0 ? l.last_input[idx]*l.alpha*loss[idx] : 0., indices(x))
end

function gradient(l::ReLu)
    0
end

function getParam(l::ReLu)
    0
end

function setParam(l::ReLu, theta::Array{Float64})
    nothing
end

ReLu()



ReLu(1.0,Float64[],Float64[],Float64[])

In [57]:
type CrossEntropyLoss <: LossCriteria
    last_loss  :: Array{Float64}
    last_input :: Array{Float64}
    function CrossEntropyLoss()
        return new(Float64[], Float64[])
    end
end    

function forward(l::CrossEntropyLoss, y::Array{Float64,1}, label::Int32)
    """
    [label]  label[i] == 1 iff the data is classified to class i
    [y]      final input to the loss layer
    """
    return -log(e .^ y ./ sum(e .^ y))[label]
end

function backward(l::CrossEntropyLoss, x::Array{Float64,1}, label::Int32)
    """
    [label]  label[i] == 1 iff the data is classified to class i
    [y]      final input to the loss layer
    """
    y = e.^x / sum(e.^x)
    map(j -> label==j ? y[label]*(1-y[label]) : -y[label]*y[j], indices(x))
end
CrossEntropyLoss()



CrossEntropyLoss(Float64[],Float64[])

In [58]:
abstract NN
type SequentialNet <: NN
    layers :: Array{Layer}
    lossfn :: LossCriteria
    function SequentialNet(layers::Array{Layer}, lossfn::LossCriteria)
        return new(layers, lossfn)
    end
end

function forward(net::SequentialNet, x::Array{Float64}, label::Array)
    local inp = x
    for i = 1:length(net.layers)
        inp = forward(net.layers[i], inp)
    end
    forward(net.lossfn, inp, label)
end

function backward(net::SequentialNet, label::Array)
    @assert size(dldy) == size(net.inputs[end])
    dldy = backward(net.lossfn, net.inputs[end], label)
    for i = length(net.layers):-1:1
        net.losses[i] = backward(net.layers[i], dldy)
    end
    dldy
end



backward (generic function with 5 methods)

In [59]:
layers = [
    FCLayer(784, 196),
    ReLu(),
    FCLayer(196, 49),
    ReLu(),
    FCLayer(49, 10)
]
criteria = CrossEntropyLoss()
net = SequentialNet(layers, criteria)



SequentialNet(Layer[FCLayer([0.608597 0.868805 … 0.344706 0.166482; 0.521523 0.801306 … 0.345923 0.520167; … ; 0.220836 0.366705 … 0.152793 0.399286; 0.272182 0.79883 … 0.727381 0.99018],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]),ReLu(1.0,Float64[],Float64[],Float64[]),FCLayer([0.85175 0.163842 … 0.828514 0.91785; 0.92656 0.354392 … 0.52468 0.893171; … ; 0.548751 0.966283 … 0.736235 0.284618; 0.519856 0.931976 … 0.510095 0.857101],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]),ReLu(1.0,Float64[],Float64[],Float64[]),FCLayer([0.0552645 0.356208 … 0.78092 0.306351; 0.

In [60]:
function sgd(net::SequentialNet, batch_X, batch_Y, lr::Float64 = 0.001)
    batch_size = size(batch_X)[1]
    ttl_loss   = 0.
    for b = 1:batch_size
        X, Y = batch_X[b,:], batch_Y[b,:]
        println("$(size(X)) $(size(Y))")
        loss = forward(net, X, Y) # Propogate the input and output, calculate the loss
        backward(net, Y) # Propagate the dldy
        for l = 1:length(net.layers)
            layer = net.layers[l]
            setParam(layer, getParam(layer) - lr * gradient(layer) / batch_size )
        end
        ttl_loss += loss
    end
    ttl_loss
end

function train(net::SequentialNet, X, Y)
    batch_size, N = 64, size(Y)[1]
    batch=0
    for epo = 1:100
        println("Epo $(epo):")
        for bid = 0:ceil(length(X)/batch_size)-1
            batch += 1
            sidx::Int = convert(Int64, bid*batch_size+1)
            eidx::Int = convert(Int64, min(N, (bid+1)*batch_size))
            println("$(sidx)  $(eidx)")
            batch_X = X[sidx:eidx,:]
            batch_Y = Y[sidx:eidx,:]
            loss = sgd(net, batch_X, batch_Y)
            println("[Epo $(epo) : batch $(batch_id)]: loss = $(loss)")
        end
    end
end

if size(trainX)[1] != 60000
    trainX = trainX'
end
@assert size(trainX)[1] == size(trainY)[1]
println(size(trainX), size(trainY))

train(net, trainX, trainY)

(60000,784)(60000,)




Epo 1:
1  64
(784,) (1,)


LoadError: LoadError: MethodError: no method matching forward(::CrossEntropyLoss, ::Array{Float64,1}, ::Array{Float64,1})
Closest candidates are:
  forward(::CrossEntropyLoss, ::Array{Float64,1}, !Matched::Int32) at In[57]:10
  forward(!Matched::SequentialNet, ::Array{Float64,N}, ::Array{T,N}) at In[58]:11
  forward(!Matched::FCLayer, ::Array{Float64,1}) at In[55]:14
  ...
while loading In[60], in expression starting on line 42

In [35]:
wrong()

LoadError: LoadError: UndefVarError: wrong not defined
while loading In[35], in expression starting on line 1

Epo 1:


LoadError: LoadError: ArgumentError: invalid index: 1.0
while loading In[21], in expression starting on line 1

In [15]:
a = [1,2,3,3,4]
a[1:2], size(a), ndims(a), length(a)

([1,2],(5,),1,5)