# This notebook is based on: 
https://github.com/denizyuret/Knet.jl

## Boston housing 
Data: 506 x 13, Val: 506 x 1

In [1]:
using Knet

In [2]:
predict(w, x) = w[1]*x.+ w[2]

predict (generic function with 1 method)

In [3]:
loss(w,x,y) = mean(abs2, y-predict(w,x))

loss (generic function with 1 method)

In [4]:
lossgrad = grad(loss)

(::gradfun) (generic function with 1 method)

In [5]:
function train(w, data; lr=0.05)
    for (x,y) in data
        dw = lossgrad(w,x,y)
        for i in 1:length(w)
            w[i] -= lr * dw[i]
        end
    end
    return w
end

train (generic function with 1 method)

In [6]:
include(Knet.dir("data", "housing.jl"))

In [7]:
x, y = housing()

([-0.419367 -0.416927 … -0.407361 -0.41459; 0.284548 -0.48724 … -0.48724 -0.48724; … ; 0.440616 0.440616 … 0.402826 0.440616; -1.0745 -0.491953 … -0.864446 -0.668397], [24.0 21.6 … 22.0 11.9], [-0.419367 -0.416927 … -0.407361 -0.41459; 0.284548 -0.48724 … -0.48724 -0.48724; … ; 0.440616 0.440616 … 0.402826 0.440616; -1.0745 -0.491953 … -0.864446 -0.668397], [24.0 21.6 … 22.0 11.9])

In [17]:
w = Any[ 0.1*randn(1,13), 0.0 ]

2-element Array{Any,1}:
  [0.0175724 0.0725907 … -0.0555173 0.021984]
 0.0                                         

In [18]:
for i=1:25; train(w, [(x,y)]); println(loss(w,x,y)); end

462.3609491883407
375.414403819394
307.72704730569035
253.4172590986987
209.5779074516341
174.14233756029174
145.48586008495732
122.30423814549633
103.54602498677421
88.36268406076513
76.06914083575553
66.112131093815
58.04472579553214
51.50580822361503
46.203532569439446
41.90198638065298
38.41043256988101
35.57462928148026
33.26982418307588
31.39509859599102
29.86880018770742
28.624853814418746
27.609780992390085
26.78029136692606
26.101336016010507


## Beloved <3 MNIST
Data: 60k x 28x28x1, Val: 10k x 28x28x1

In [19]:
predict(w,x) = w[1]*mat(x) .+ w[2] 
#= mat is for conversion 28x28x1 x N -> 784 x N =#

predict (generic function with 1 method)

In [26]:
loss(w,x,y) = nll(predict(w,x), y)
#= nll is for conversion to 10 x N matric of output =#

loss (generic function with 1 method)

In [27]:
lossgrad = grad(loss)

(::gradfun) (generic function with 1 method)

In [22]:
include(Knet.dir("data", "mnist.jl"))
X_train, y_train, X_test, y_test = mnist()

[1m[36mINFO: [39m[22m[36mCloning cache of GZip from https://github.com/JuliaIO/GZip.jl.git
[39m[1m[36mINFO: [39m[22m[36mInstalling GZip v0.3.0
[39m[1m[36mINFO: [39m[22m[36mPackage database updated
[39m[1m[36mINFO: [39m[22m[36mMETADATA is out-of-date — you may not have the latest version of GZip
[39m[1m[36mINFO: [39m[22m[36mUse `Pkg.update()` to get the latest versions of your packages
[39m[1m[36mINFO: [39m[22m[36mPrecompiling module GZip.
[39m[1m[36mINFO: [39m[22m[36mLoading MNIST...
[39m  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 9680k  100 9680k    0     0  1950k      0  0:00:04  0:00:04 --:--:-- 2195k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1610k  100 1610k    0     0   353k      0  0:00:04  0:00:04 --:--:

(Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

...

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], UInt8[0x05, 0x0a, 0x04, 0x01, 0x09, 0x02, 0x01, 0x03, 0x01, 0x04  …  0x09, 0x02, 0x09, 0x05, 0x01, 0x08, 0x03, 0x05, 0x06, 0x08], Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

...

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 

In [28]:
d_train = minibatch(X_train, y_train, 100)
d_test = minibatch(X_test, y_test, 100)

Knet.MB(Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], UInt8[0x07 0x02 … 0x05 0x06], 100, 10000, false, 1:10000, [28, 28, 1, 10000], [10000], Array{Float32,4}, Array{UInt8,1})

In [29]:
w = Any[ 0.1f0*randn(Float32, 10, 784), zeros(Float32,10,1) ]
println((:epoch, 0, :trn, accuracy(w,d_train,predict), :tst, accuracy(w, d_test, predict)))

(:epoch, 0, :trn, 0.0887, :tst, 0.0914)


In [30]:
for epoch=1:10
    train(w, d_train, lr=0.1)
    println((:epoch, epoch, :trn, accuracy(w, d_train, predict), :tst, accuracy(w, d_test, predict)))
end

(:epoch, 1, :trn, 0.8901, :tst, 0.8976)
(:epoch, 2, :trn, 0.9022666666666667, :tst, 0.9073)
(:epoch, 3, :trn, 0.9083, :tst, 0.9119)
(:epoch, 4, :trn, 0.91125, :tst, 0.914)
(:epoch, 5, :trn, 0.91355, :tst, 0.9163)
(:epoch, 6, :trn, 0.9149666666666667, :tst, 0.9181)
(:epoch, 7, :trn, 0.9166333333333333, :tst, 0.9184)
(:epoch, 8, :trn, 0.9178, :tst, 0.9191)
(:epoch, 9, :trn, 0.9185666666666666, :tst, 0.92)
(:epoch, 10, :trn, 0.91955, :tst, 0.9204)


## Multilayer perceptron

In [31]:
function predict(w,x)
    x = mat(x)
    for i=1:2:length(w)-2
        x = relu.(w[i]*x .+ w[i+1])
    end
    return w[end-1]*x .+w[end]
end

predict (generic function with 1 method)

In [32]:
w = Any[ 0.1f0*randn(Float32,64,784), zeros(Float32,64,1),
         0.1f0*randn(Float32,10,64),  zeros(Float32,10,1) ]

4-element Array{Any,1}:
 Float32[0.219337 -0.104153 … -0.0799293 0.0184274; 0.0706019 0.0579153 … 0.14288 0.0447306; … ; 0.0528971 0.138414 … 0.160881 0.00744592; -0.112984 -0.079526 … -0.0136026 0.0441804]    
 Float32[0.0; 0.0; … ; 0.0; 0.0]                                                                                                                                                          
 Float32[0.0236533 -0.0320957 … -0.145905 -0.0154045; -0.200496 0.11305 … 0.0355011 0.0879171; … ; -0.0103154 0.093543 … -0.143558 -0.0291289; -0.0186122 -0.0963684 … 0.128764 -0.110733]
 Float32[0.0; 0.0; … ; 0.0; 0.0]                                                                                                                                                          

In [33]:
function train(model, data, optimizer)
    for (x,y) in data
        grads = lossgrad(model, x,y)
        update!(model, grads, optimizer)
    end
end

train (generic function with 2 methods)

In [34]:
o = optimizers(w, Adam)
println((:epoch, 0, :trn, accuracy(w,d_train,predict), :tst, accuracy(w,d_test,predict)))
for epoch=1:25
    train(w, d_train, o)
    println((:epoch, epoch, :trn, accuracy(w,d_train, predict), :tst, accuracy(w,d_test,predict)))
end    

(:epoch, 0, :trn, 0.08376666666666667, :tst, 0.0867)
(:epoch, 1, :trn, 0.9261, :tst, 0.9258)
(:epoch, 2, :trn, 0.9454833333333333, :tst, 0.9448)
(:epoch, 3, :trn, 0.9569166666666666, :tst, 0.955)
(:epoch, 4, :trn, 0.9641333333333333, :tst, 0.9596)
(:epoch, 5, :trn, 0.9691333333333333, :tst, 0.9638)
(:epoch, 6, :trn, 0.97335, :tst, 0.9662)
(:epoch, 7, :trn, 0.97665, :tst, 0.9684)
(:epoch, 8, :trn, 0.9795666666666667, :tst, 0.9704)
(:epoch, 9, :trn, 0.9818666666666667, :tst, 0.9725)
(:epoch, 10, :trn, 0.9837, :tst, 0.9732)
(:epoch, 11, :trn, 0.9854666666666667, :tst, 0.9745)
(:epoch, 12, :trn, 0.9868833333333333, :tst, 0.9743)
(:epoch, 13, :trn, 0.9880166666666667, :tst, 0.9752)
(:epoch, 14, :trn, 0.9888833333333333, :tst, 0.9749)
(:epoch, 15, :trn, 0.9894166666666667, :tst, 0.9755)
(:epoch, 16, :trn, 0.99025, :tst, 0.9753)
(:epoch, 17, :trn, 0.99095, :tst, 0.975)
(:epoch, 18, :trn, 0.9916, :tst, 0.9753)
(:epoch, 19, :trn, 0.9924333333333333, :tst, 0.9757)
(:epoch, 20, :trn, 0.9930666666

## CNN with LeNet model

In [35]:
function predict(w, input)
    x1 = pool(relu.(conv4(w[1], input) .+ w[2]))
    x2 = pool(relu.(conv4(w[3], x1) .+ w[4]))
    x3 = relu.(w[5]*mat(x2) .+ w[6])
    return w[7]*x3 .+ w[8]
end

predict (generic function with 1 method)

In [36]:
w = Any[ xavier(Float32,5,5,1,20),  zeros(Float32,1,1,20,1),
         xavier(Float32,5,5,20,50), zeros(Float32,1,1,50,1),
         xavier(Float32,500,800),   zeros(Float32,500,1),
         xavier(Float32,10,500),    zeros(Float32,10,1) ]

8-element Array{Any,1}:
 Float32[-0.14364 -0.153023 … 0.157237 -0.0098967; -0.125013 0.00953279 … -0.157869 0.0244551; … ; -0.114217 -0.126083 … -0.0454054 -0.203539; -0.19696 0.125229 … -0.189031 -0.180972]

Float32[-0.0493266 0.160552 … 0.154774 0.104121; -0.181237 0.110905 … 0.0269676 -0.184835; … ; 0.151213 0.160673 … 0.156837 0.0675961; 0.0296122 0.192673 … 0.191431 0.0679472]

Float32[-0.202591 0.163627 … -0.0694741 0.00246605; -0.0946854 -0.0325526 … -0.157454 0.0560656; … ; -0.0685295 -0.146887 … -0.0374788 -0.101907; 0.0058462 -0.064341 … 0.102944 0.168806]

...

Float32[0.149431 0.12241 … 0.195933 -0.107237; -0.0502396 0.0703593 … 0.131912 -0.0704232; … ; 0.0197356 -0.0585693 … -0.137143 0.108635; -0.174208 -0.115621 … 0.0485667 -0.0334628]

Float32[-0.0782647 0.188425 … -0.117793 -0.177152; 0.196198 -0.0849982 … -0.0551954 0.152766; … ; 0.129804 -0.0388913 … 0.0848088 0.19066; 0.19199 -0.117048 … -0.0207966 0.178987]

Float32[-0.196894 0.0600062 … -0.13098 -0.069929; -0.1614

In [None]:
d_train = minibatch(X_train,y_train,100,xtype=KnetArray)
d_test = minibatch(X_test,y_test,100,xtype=KnetArray)
w = map(KnetArray, w)

In [None]:
o = optimizers(w, Adam)
println((:epoch, 0, :trn, accuracy(w,d_train,predict), :tst, accuracy(w,d_test,predict)))
for epoch=1:10
    train(w, d_train, o)
    println((:epoch, epoch, :trn, accuracy(w,d_train, predict), :tst, accuracy(w,d_test,predict)))
end