# MLP on MINST Dataset

In [1]:
Pkg.update()
Pkg.add("MNIST")

INFO: Updating METADATA...
INFO: Computing changes...
INFO: No packages to install, update or remove
INFO: Nothing to be done


In [2]:
using MNIST
features = trainfeatures(1)
label = trainlabel(1)

trainX, trainY = traindata()
testX, testY = testdata()

(
[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0],

[7.0,2.0,1.0,0.0,4.0,1.0,4.0,9.0,5.0,9.0  …  7.0,8.0,9.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0])

In [3]:
abstract Layer
abstract Nonlinearity <: Layer
abstract LossCriteria <: Layer

In [4]:
# Define the Fully Connected layers
type FCLayer <: Layer
    W           :: Array{Float64}
    last_input  :: Array{Float64}
    last_output :: Array{Float64}
    last_loss   :: Array{Float64}

    function FCLayer(i, o)
        return new(rand(o,i), zeros(i), zeros(o), zeros(o))
    end
end

function forward(l::FCLayer, x::Array{Float64,1})
    @assert ndims(x) == 1 && size(x) == (size(l.W)[2],)
    l.last_input  = x
    l.last_output = l.W * x # matrix multiplication
    l.last_output
end

function backward(l::FCLayer, loss::Array{Float64,1})
    @assert size(loss) == (size(l.W)[1],)
    l.last_loss = loss
    l.W'*loss
end

function gradient(l::FCLayer)
    @assert size(l.last_loss) == (size(l.W)[1],)
    l.last_loss * l.last_input'
end

function getParam(l::FCLayer)
    l.W
end

function setParam(l::FCLayer, theta::Array{Float64})
    @assert size(l.W) == size(theta)
    l.W = theta
end

l = FCLayer(10,20)
forward(l, rand(10))

20-element Array{Float64,1}:
 1.81926
 2.91193
 2.00697
 1.53081
 1.5217 
 2.29203
 1.97438
 2.02591
 2.29147
 2.62403
 2.32256
 2.60383
 1.44785
 2.04666
 1.51117
 2.74444
 2.72597
 2.61056
 2.44449
 1.85639

In [5]:
# Define the ReLu layers
type ReLu <: Nonlinearity
    alpha       :: Float64
    last_input  :: Array{Float64}
    last_output :: Array{Float64}
    last_loss   :: Array{Float64}
    function ReLu(alpha::Float64 = 1.0)
        @assert alpha >= 0.
        return new(alpha, Float64[], Float64[], Float64[])
    end
end

function forward(l::ReLu, x::Array{Float64})
    l.last_input  = x
    l.last_output = map(y -> max(0., y*l.alpha), x)
    l.last_output
end

function backward(l::ReLu, loss::Array{Float64})
    @assert size(l.last_input) == size(loss)
    l.last_loss = loss
    map(idx -> l.last_input[idx]>=0 ? l.last_input[idx]*l.alpha*loss[idx] : 0., 1:length(l.last_input))
end

function gradient(l::ReLu)
    0
end

function getParam(l::ReLu)
    0
end

function setParam(l::ReLu, theta)
    nothing
end

l = ReLu()
println(forward(l, [1.,0.,-1.,2.]))
println(backward(l, [3.0,2.0,1.,1.0]))

[1.0,0.0,0.0,2.0]
[3.0,0.0,0.0,2.0]


In [18]:
type CrossEntropyLoss <: LossCriteria
    last_loss  :: Array{Float64}
    last_input :: Array{Float64}
    function CrossEntropyLoss()
        return new(Float64[], Float64[])
    end
end    

function forward(l::CrossEntropyLoss, y::Array{Float64,1}, label::Array{Float64, 1})
    """
    [label]  label[i] == 1 iff the data is classified to class i
    [y]      final input to the loss layer
    """
    class = convert(Int64,label[1]) + 1
    #println("y - max : ")
    #println(y-maximum(y))
    #println("after: ")
    #println( -log(e .^ (y-maximum(y)) ./ sum(e .^ (y-maximum(y))))[class])
    return -log(e .^ (y-maximum(y)) ./ sum(e .^ (y-maximum(y))))[class]
end

function backward(l::CrossEntropyLoss, x::Array{Float64,1}, label::Array{Float64, 1})
    """
    [label]  label[i] == 1 iff the data is classified to class i
    [y]      final input to the loss layer
    """
    class = convert(Int64,label[1]) + 1
    y = e.^x / sum(e.^x)
    map(j -> class==j ? y[class]*(1-y[class]) : -y[class]*y[j], 1:length(x))
end
l = CrossEntropyLoss()
println(forward(l, [1.,2.,0.], [2.]))
println(backward(l, [1.,2.,0.], [2.]))

2.40760596444438
[-0.022033,-0.059892,0.0819251]




In [19]:
abstract NN
type SequentialNet <: NN
    layers :: Array{Layer}
    lossfn :: LossCriteria
    function SequentialNet(layers::Array{Layer}, lossfn::LossCriteria)
        return new(layers, lossfn)
    end
end

function forward(net::SequentialNet, x::Array{Float64}, label::Array)
    local inp = x
    for i = 1:length(net.layers)
        inp = forward(net.layers[i], inp)
    end
    forward(net.lossfn, inp, label)
end

function backward(net::SequentialNet, label)
    dldy = backward(net.lossfn, net.layers[end].last_output, label)
    for i = length(net.layers):-1:1
        dldy = backward(net.layers[i], dldy)
    end
    dldy
end



backward (generic function with 4 methods)

In [20]:
layers = [
    FCLayer(784, 196),
    ReLu(),
    FCLayer(196, 49),
    ReLu(),
    FCLayer(49, 10)
]
criteria = CrossEntropyLoss()
net = SequentialNet(layers, criteria)



SequentialNet(Layer[FCLayer([0.411501 0.808912 … 0.165549 0.64; 0.534688 0.607825 … 0.324792 0.111297; … ; 0.448119 0.668701 … 0.142683 0.692745; 0.329792 0.95917 … 0.1028 0.417672],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]),ReLu(1.0,Float64[],Float64[],Float64[]),FCLayer([0.499103 0.414951 … 0.288925 0.773726; 0.200904 0.786102 … 0.222144 0.315565; … ; 0.210435 0.411364 … 0.12396 0.892609; 0.765774 0.721375 … 0.862695 0.265068],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]),ReLu(1.0,Float64[],Float64[],Float64[]),FCLayer([0.307165 0.209421 … 0.724684 0.213504; 0.31

In [21]:
function sgd(net::SequentialNet, batch_X, batch_Y, lr::Float64 = 0.0001)
    batch_size = size(batch_X)[1]
    ttl_loss   = 0.
    for b = 1:batch_size
        X, Y = batch_X[b,:], batch_Y[b,:]
        loss = forward(net, X, Y) # Propogate the input and output, calculate the loss
        println(net.layers[1].last_output)
        backward(net, Y) # Propagate the dldy
        for l = 1:length(net.layers)
            layer = net.layers[l]
            println(layer.last_loss)
            setParam(layer, getParam(layer) - lr * gradient(layer) / batch_size )
        end
        ttl_loss += loss
    end
    ttl_loss
end

function train(net::SequentialNet, X, Y)
    batch_size, N = 64, size(Y)[1]
    batch=0
    for epo = 1:100
        println("Epo $(epo):")
        for bid = 0:ceil(length(X)/batch_size)-1
            batch += 1
            sidx::Int = convert(Int64, bid*batch_size+1)
            eidx::Int = convert(Int64, min(N, (bid+1)*batch_size))
            println("$(sidx)  $(eidx)")
            batch_X = X[sidx:eidx,:]
            batch_Y = Y[sidx:eidx,:]
            loss = sgd(net, batch_X, batch_Y)
            println("[Epo $(epo) : batch $(batch)]: loss = $(loss)")
        end
    end
end

if size(trainX)[1] != 60000
    trainX = trainX'
end
@assert size(trainX)[1] == size(trainY)[1]
println(size(trainX), size(trainY))

train(net, trainX, trainY)

(60000,784)(60000,)




Epo 1:
1  64
[13721.9,13447.4,13655.9,13971.9,13265.8,13537.1,13258.3,15460.4,13645.4,13512.5,14253.1,12952.3,14414.6,13638.8,13659.6,13206.2,12684.4,13988.6,12761.8,13559.7,13502.9,13014.5,13594.2,13664.6,14400.2,14891.1,14622.9,13276.6,12912.8,13428.0,14238.5,13750.3,12941.4,14947.5,14793.0,13975.3,13258.9,14746.3,13306.6,12749.2,13395.8,13198.5,14259.0,14454.3,13924.4,14146.5,12718.4,13590.2,14444.8,13691.7,13610.7,13207.2,13729.1,14088.8,13143.3,15138.6,14157.4,13411.0,14069.8,13854.7,13568.0,13535.2,14698.3,12238.7,14354.5,13258.7,14834.9,12630.7,14328.4,14048.5,13118.4,14252.0,13184.2,13636.6,14013.0,14357.2,13487.3,14125.0,14286.6,13668.1,13481.2,13324.5,13429.7,13840.6,13938.1,12005.7,13349.2,12807.5,13440.4,12912.2,14007.5,12540.3,14201.2,14648.3,13330.4,13538.3,13605.4,13198.4,13565.7,12992.9,13348.7,14154.6,12995.6,12792.0,13606.5,13155.7,13530.7,12657.2,14410.2,13829.8,13656.7,13155.3,12765.7,13891.7,14352.8,13333.3,14308.5,14108.6,13671.4,13613.7,13210.2,13611.0,14335.6,13

LoadError: LoadError: InterruptException:
while loading In[21], in expression starting on line 43

In [170]:
minimum(trainX[1,:])

0.0