In [1]:
# Opracowane na podstawie https://github.com/FluxML/model-zoo/blob/3e91af32ebfad628b616618b11bfff2f9f519bec/vision/conv_mnist/conv_mnist.jl
using MLDatasets, Flux
train_data = MLDatasets.MNIST(split=:train)
test_data  = MLDatasets.MNIST(split=:test)

function loader(data; batchsize::Int=1)
    x4dim = reshape(data.features, 28, 28, 1, :) # insert trivial channel dim
    yhot  = Flux.onehotbatch(data.targets, 0:9)  # make a 10×60000 OneHotMatrix
    Flux.DataLoader((x4dim, yhot); batchsize, shuffle=true)
end

net = Chain(
    Conv((3, 3), 1 => 6,  relu),
    MaxPool((2, 2)),
    Conv((3, 3), 6 => 16, relu),
    MaxPool((2, 2)),
    Flux.flatten,
    Dense(400 => 84, relu), 
    Dense(84 => 10, identity),
)
#= ew. prostsza architektura:
net = Chain(
    Conv((3, 3), 1 => 6,  relu, bias=false),
    MaxPool((2, 2)),
    Flux.flatten,
    Dense(13*13*6 => 84, relu, bias=false), 
    Dense(84 => 10, identity, bias=false)
)
=#

Chain(
  Conv((3, 3), 1 => 6, relu),           [90m# 60 parameters[39m
  MaxPool((2, 2)),
  Conv((3, 3), 6 => 16, relu),          [90m# 880 parameters[39m
  MaxPool((2, 2)),
  Flux.flatten,
  Dense(400 => 84, relu),               [90m# 33_684 parameters[39m
  Dense(84 => 10),                      [90m# 850 parameters[39m
) [90m                  # Total: 8 arrays, [39m35_474 parameters, 139.773 KiB.

In [2]:
x1, y1 = first(loader(train_data)); # (28×28×1×1 Array{Float32, 3}, 10×1 OneHotMatrix(::Vector{UInt32}))
y1hat = net(x1)
@show hcat(Flux.onecold(y1hat, 0:9), Flux.onecold(y1, 0:9))

using Statistics: mean  # standard library
function loss_and_accuracy(model, data)
    (x,y) = only(loader(data; batchsize=length(data)))
    ŷ = model(x)
    loss = Flux.logitcrossentropy(ŷ, y)  # did not include softmax in the model
    acc = round(100 * mean(Flux.onecold(ŷ) .== Flux.onecold(y)); digits=2)
    (; loss, acc, split=data.split)  # return a NamedTuple
end

@show loss_and_accuracy(net, test_data);  # accuracy about 10%, before training

train_log = []
settings = (;
    eta = 1e-2,
    epochs = 3,
    batchsize = 100,
)

opt_state = Flux.setup(Descent(settings.eta), net);

hcat(Flux.onecold(y1hat, 0:9), Flux.onecold(y1, 0:9)) = [9 1]
loss_and_accuracy(net, test_data) = (loss = 2.303206f0, acc = 10.35, split = :test)


In [3]:
for epoch in 1:settings.epochs
    @time for (x,y) in loader(train_data, batchsize=settings.batchsize)
        grads = Flux.gradient(model -> Flux.logitcrossentropy(model(x), y), net)
        Flux.update!(opt_state, net, grads[1])
    end
    
    loss, acc, _ = loss_and_accuracy(net, train_data)
    test_loss, test_acc, _ = loss_and_accuracy(net, test_data)
    @info epoch acc test_acc
    nt = (; epoch, loss, acc, test_loss, test_acc) 
    push!(train_log, nt)
end

 31.648198 seconds (24.81 M allocations: 10.135 GiB, 3.15% gc time, 89.92% compilation time)


┌ Info: 1
│   acc = 87.67
│   test_acc = 88.65
└ @ Main c:\Users\jakub\Documents\JW_CNN\AWID-2024-CNN.ipynb:9


  7.464832 seconds (537.34 k allocations: 8.590 GiB, 7.59% gc time)


┌ Info: 2
│   acc = 92.36
│   test_acc = 93.09
└ @ Main c:\Users\jakub\Documents\JW_CNN\AWID-2024-CNN.ipynb:9


  6.928512 seconds (537.33 k allocations: 8.590 GiB, 5.66% gc time)


┌ Info: 3
│   acc = 93.52
│   test_acc = 94.31
└ @ Main c:\Users\jakub\Documents\JW_CNN\AWID-2024-CNN.ipynb:9


In [4]:
x1, y1 = first(loader(train_data)); # (28×28×1×1 Array{Float32, 3}, 10×1 OneHotMatrix(::Vector{UInt32}))
y1hat = net(x1)
@show hcat(Flux.onecold(y1hat, 0:9), Flux.onecold(y1, 0:9))

@show loss_and_accuracy(net, train_data);

hcat(Flux.onecold(y1hat, 0:9), Flux.onecold(y1, 0:9)) = [4 6]
loss_and_accuracy(net, train_data) = (loss = 0.20980766f0, acc = 93.52, split = :train)
