# Julia 深度學習：卷積神經網路模型簡介

本範例有可選用套件 CuArrays，請在執行以下範例前先安裝。

```
] add CuArrays
```

In [1]:
# using Pkg
# Pkg.add("CuArrays")

In [2]:
# if error occurred during precompling Flux, close jupyter and re-open as administrator
using Flux
using Flux.Data: DataLoader
using Flux: @epochs, onecold, onehotbatch, throttle, logitcrossentropy
using MLDatasets
using Statistics

## 載入資料

In [3]:
train_X, train_y = MNIST.traindata(Float32)
test_X, test_y = MNIST.testdata(Float32)
println("Training data X: type = $(typeof(train_X)), size = $(size(train_X))")
println("Training data y: type = $(typeof(train_y)), size = $(size(train_y))")
println("Testing data X: type = $(typeof(test_X)), size = $(size(test_X))")
println("Testing data y: type = $(typeof(test_y)), size = $(size(test_y))")

Training data X: type = Array{Float32,3}, size = (28, 28, 60000)
Training data y: type = Array{Int64,1}, size = (60000,)
Testing data X: type = Array{Float32,3}, size = (28, 28, 10000)
Testing data y: type = Array{Int64,1}, size = (10000,)


In [4]:
train_X = reshape(train_X, 28, 28, 1, :)
test_X = reshape(test_X, 28, 28, 1, :)
train_y = onehotbatch(train_y, 0:9)
test_y = onehotbatch(test_y, 0:9)
println("Training data X: type = $(typeof(train_X)), size = $(size(train_X))")
println("Testing data X: type = $(typeof(test_X)), size = $(size(test_X))")
println("Training data y: type = $(typeof(train_y)), size = $(size(train_y))")
println("Testing data y: type = $(typeof(test_y)), size = $(size(test_y))")

Training data X: type = Array{Float32,4}, size = (28, 28, 1, 60000)
Testing data X: type = Array{Float32,4}, size = (28, 28, 1, 10000)
Training data y: type = Flux.OneHotMatrix{Array{Flux.OneHotVector,1}}, size = (10, 60000)
Testing data y: type = Flux.OneHotMatrix{Array{Flux.OneHotVector,1}}, size = (10, 10000)


In [5]:
batchsize = 1024
train = DataLoader(train_X, train_y, batchsize=batchsize, shuffle=true)
test = DataLoader(test_X, test_y, batchsize=batchsize)

DataLoader((Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

...

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], Bool[0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 0]), 1024, 10000, true, 10000, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10  …  9991, 9992, 9993, 9994, 9995, 9996, 9997, 9998, 9999, 10000], false)

## CNN (convolutional neural networks) 模型



#### `Flux.MaxPool`

<img src="maxpoolExplain.png" width=400/>

- `MaxPool(k; pad = 0, stride = k)`
    - Max pooling layer. k is the size of the window for each dimension of the input.
    
- [Max-pooling](https://computersciencewiki.org/index.php/Max-pooling_/_Pooling)
    - a sample-based discretization process
    - to down-sample an input representation
    - to help over-fitting by providing an abstracted form of the representation
    - reduces the computational cost 
    - `MeanPool` works in a similar way but using a `mean` filter instead of the `max`.
 
#### convolutonal layer

<img src="ConvExplain.png" width=400/>

- `Conv((3, 3), 1=>16, pad=(1,1), relu)`
    - Apply a Conv layer to a **1**-channel input using a **3×3** window size, giving us a **16**-channel output. Output is activated with **ReLU**.
- `Conv((3, 3), 16=>32, pad=(1,1), relu)`
    - Apply a Conv layer to a **16**-channel input using a **3×3** window size, giving us a **32**-channel output. Output is activated with **ReLU**.

Feature map

- 一般卷積網路過程中，除了Input image不稱為Feature map外，中間產生的圖我們都稱之為Feature map，原因很簡單就是這些中間產生的圖都是為了「描繪出該任務所應該產生對應的特徵資料」
    
一個卷積計算基本上有幾個部份:

- 輸入的圖: 假設大小是W × W。
- Filter (kernel map)大小是 ks × ks
- Stride: kernel map在移動時的步伐長度 S
- 輸出的圖大小為 new_height × new_width

Padding
- 卷積後的圖內縮的格數。[更多](https://medium.com/@chih.sheng.huang821/%E5%8D%B7%E7%A9%8D%E7%A5%9E%E7%B6%93%E7%B6%B2%E8%B7%AF-convolutional-neural-network-cnn-%E5%8D%B7%E7%A9%8D%E8%A8%88%E7%AE%97%E4%B8%AD%E7%9A%84%E6%AD%A5%E4%BC%90-stride-%E5%92%8C%E5%A1%AB%E5%85%85-padding-94449e638e82)

In [6]:
model = Chain(
    Conv((3, 3), 1=>16, pad=(1,1), relu),
    MaxPool((2,2)),
    Conv((3, 3), 16=>32, pad=(1,1), relu),
    MaxPool((2,2)),
    Conv((3, 3), 32=>32, pad=(1,1), relu),
    MaxPool((2,2)),
    flatten,
    Dense(288, 10),
    softmax)

Chain(Conv((3, 3), 1=>16, relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Conv((3, 3), 16=>32, relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Conv((3, 3), 32=>32, relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), flatten, Dense(288, 10), softmax)

## 使用 CUDA

In [7]:
using CuArrays

In [20]:
model = model |> gpu
train_X = train_X |> gpu
train_y = train_y |> gpu
test_X = test_X |> gpu
test_y = test_y |> gpu
# there is a warning, feel free to ignore it (by ZK 2020-05-29)

10×10000 Flux.OneHotMatrix{CuArray{Flux.OneHotVector,1,Nothing}}:
 0  0  0  1  0  0  0  0  0  0  1  0  0  …  0  0  0  0  0  1  0  0  0  0  0  0
 0  0  1  0  0  1  0  0  0  0  0  0  0     0  0  0  0  0  0  1  0  0  0  0  0
 0  1  0  0  0  0  0  0  0  0  0  0  0     0  0  0  0  0  0  0  1  0  0  0  0
 0  0  0  0  0  0  0  0  0  0  0  0  0     0  0  0  0  0  0  0  0  1  0  0  0
 0  0  0  0  1  0  1  0  0  0  0  0  0     0  0  0  0  0  0  0  0  0  1  0  0
 0  0  0  0  0  0  0  0  1  0  0  0  0  …  1  0  0  0  0  0  0  0  0  0  1  0
 0  0  0  0  0  0  0  0  0  0  0  1  0     0  1  0  0  0  0  0  0  0  0  0  1
 1  0  0  0  0  0  0  0  0  0  0  0  0     0  0  1  0  0  0  0  0  0  0  0  0
 0  0  0  0  0  0  0  0  0  0  0  0  0     0  0  0  1  0  0  0  0  0  0  0  0
 0  0  0  0  0  0  0  1  0  1  0  0  1     0  0  0  0  1  0  0  0  0  0  0  0

## 損失函數

In [21]:
loss(x, y) = logitcrossentropy(model(x), y)

loss (generic function with 1 method)

## Callback 函式

In [22]:
function test_loss()
    l = 0f0
    for (x, y) in test
        l += loss(x, y)
    end
    l/length(test)
end
evalcb() = @show(test_loss())

evalcb (generic function with 1 method)

## 模型訓練

#### hyper parameter

In [23]:
# ZK's setting
learining_rate = 0.002;
decay = 0.1;
decay_step = 1;
clip = 1e-4;
optimizer = Flux.Optimiser(ExpDecay(learining_rate, decay, decay_step, clip), ADAM(learining_rate));

In [24]:
epochs = 20
@epochs epochs Flux.train!(loss, params(model), train, ADAM(0.005), cb=throttle(evalcb, 10))
# @epochs epochs Flux.train!(loss, params(model), train, optimizer, cb=throttle(evalcb, 10))

┌ Info: Epoch 1
└ @ Main C:\Users\HSI\.julia\packages\Flux\Fj3bt\src\optimise\train.jl:121


TaskFailedException: TaskFailedException:
MethodError: no method matching gemm!(::Val{false}, ::Val{false}, ::Int64, ::Int64, ::Int64, ::Float32, ::Ptr{Float32}, ::CUDAdrv.CuPtr{Float32}, ::Float32, ::Ptr{Float32})
Closest candidates are:
  gemm!(::Val, ::Val, ::Int64, ::Int64, ::Int64, ::Float32, ::Ptr{Float32}, !Matched::Ptr{Float32}, ::Float32, ::Ptr{Float32}) at C:\Users\HSI\.julia\packages\NNlib\FAI3o\src\gemm.jl:35
  gemm!(::Val, ::Val, ::Int64, ::Int64, ::Int64, !Matched::Float64, !Matched::Ptr{Float64}, !Matched::Ptr{Float64}, !Matched::Float64, !Matched::Ptr{Float64}) at C:\Users\HSI\.julia\packages\NNlib\FAI3o\src\gemm.jl:35
  gemm!(::Val, ::Val, ::Int64, ::Int64, ::Int64, !Matched::Complex{Float64}, !Matched::Ptr{Complex{Float64}}, !Matched::Ptr{Complex{Float64}}, !Matched::Complex{Float64}, !Matched::Ptr{Complex{Float64}}) at C:\Users\HSI\.julia\packages\NNlib\FAI3o\src\gemm.jl:35
  ...
Stacktrace:
 [1] macro expansion at C:\Users\HSI\.julia\packages\NNlib\FAI3o\src\impl\conv_im2col.jl:58 [inlined]
 [2] (::NNlib.var"#343#threadsfor_fun#160"{Array{Float32,3},Float32,Float32,Array{Float32,5},Array{Float32,5},CuArray{Float32,5,CuArray{Float32,4,Nothing}},DenseConvDims{3,(3, 3, 1),1,16,(1, 1, 1),(1, 1, 1, 1, 0, 0),(1, 1, 1),false},Int64,Int64,Int64,UnitRange{Int64}})(::Bool) at .\threadingconstructs.jl:61
 [3] (::NNlib.var"#343#threadsfor_fun#160"{Array{Float32,3},Float32,Float32,Array{Float32,5},Array{Float32,5},CuArray{Float32,5,CuArray{Float32,4,Nothing}},DenseConvDims{3,(3, 3, 1),1,16,(1, 1, 1),(1, 1, 1, 1, 0, 0),(1, 1, 1),false},Int64,Int64,Int64,UnitRange{Int64}})() at .\threadingconstructs.jl:28

## 模型評估

In [13]:
accuracy(x, y) = mean(onecold(model(x)) .== onecold(y))

accuracy (generic function with 1 method)

In [14]:
accuracy(test_X, test_y)

0.1537