# Julia 深度學習：卷積神經網路模型簡介

## 作業 033：訓練 CNN 學習門牌號碼資料集

訓練一個 CNN 模型來學習門牌號碼資料集。

In [None]:
using Flux
using Flux.Data: DataLoader
using Flux: @epochs, onecold, onehotbatch, throttle, logitcrossentropy
using MLDatasets
using Statistics

┌ Info: Precompiling Flux [587475ba-b771-5e3f-ad9e-33799f191a9c]
└ @ Base loading.jl:1260
┌ Info: Precompiling MLDatasets [eb30cadb-4394-5ae3-aed4-317e484a6458]
└ @ Base loading.jl:1260


In [None]:
using CuArrays

## 讀取資料

In [None]:
train_X, train_y = SVHN2.traindata(Float32, 1:20000)
test_X,  test_y  = SVHN2.testdata(Float32, 1:2000)

In [None]:
println(size(train_X))
println(size(train_y))
println(size(test_X))
println(size(test_y))

In [None]:
train_y = onehotbatch(train_y, 1:10)
test_y = onehotbatch(test_y, 1:10)

## hyper parameters

In [None]:
epochs = 20;
batchsize = 64;
learining_rate = 0.002;
decay = 0.1;
decay_step = 1;
clip = 1e-4;
optimizer = Flux.Optimiser(ExpDecay(learining_rate, decay, decay_step, clip), ADAM(learining_rate));

## data loader

In [None]:
train = DataLoader(train_X, train_y, batchsize=batchsize, shuffle=true);
test = DataLoader(test_X, test_y, batchsize=batchsize);

## loss function

In [None]:
loss(x, y) = logitcrossentropy(model(x), y);

## callback function

In [None]:
function test_loss()
    l = 0f0
    for (x, y) in test
        l += loss(x, y)
    end
    l/length(test)
end

evalcb() = @show(test_loss());

## evaluation function

In [None]:
accuracy(x, y) = mean(onecold(model(x)) .== onecold(y))

## CNN 模型
Chain(Conv((3, 3), 3=>32, relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Conv((3, 3), 32=>32, relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Conv((3, 3), 32=>64, relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), flatten, Dense(1024, 256, relu), Dense(256, 10), softmax)

In [None]:
model = Chain(
    Conv((3, 3), 3=>16, pad=(1,1), relu),
    MaxPool((2, 2)),
    Conv((3, 3), 16=>32, pad=(1,1), relu),
    MaxPool((2, 2)),
    Conv((3, 3), 32=>32, pad=(1,1), relu),
    MaxPool((2, 2)),
    flatten,
    Dense(512, 10),
    logsoftmax)

## using CUDA

In [None]:
model = model |> gpu;
train_X = train_X |> gpu;
train_y = train_y |> gpu;
test_X = test_X |> gpu;
test_y = test_y |> gpu;

## training model

In [None]:
@epochs epochs Flux.train!(loss, params(model), train, optimizer, cb=throttle(evalcb, 10))

## model evaluation

In [None]:
using Printf
@printf("Accuracy: %.2f%%\n", accuracy(test_X, test_y)*100)