In [2]:
# Opracowane na podstawie https://minpy.readthedocs.io/en/latest/tutorial/rnn_mnist.html
using MLDatasets, Flux
train_data = MLDatasets.MNIST(split=:train)
test_data  = MLDatasets.MNIST(split=:test)

function loader(data; batchsize::Int=1)
    x1dim = reshape(data.features, 28 * 28, :) # reshape 28×28 pixels into a vector of pixels
    yhot  = Flux.onehotbatch(data.targets, 0:9) # make a 10×60000 OneHotMatrix
    Flux.DataLoader((x1dim, yhot); batchsize, shuffle=true)
end

net = Chain(
    RNN((14 * 14) => 64, tanh),
    Dense(64 => 10, identity),
)

Chain(
  Recur(
    RNNCell(196 => 64, tanh),           [90m# 16_768 parameters[39m
  ),
  Dense(64 => 10),                      [90m# 650 parameters[39m
) [90m        # Total: 6 trainable arrays, [39m17_418 parameters,
[90m          # plus 1 non-trainable, 64 parameters, summarysize [39m68.406 KiB.

In [3]:
using Statistics: mean  # standard library
function loss_and_accuracy(model, data)
    (x,y) = only(loader(data; batchsize=length(data)))
    Flux.reset!(model)
    ŷ = model(x[  1:196,:])
    ŷ = model(x[197:392,:])
    ŷ = model(x[393:588,:])
    ŷ = model(x[589:end,:])
    loss = Flux.logitcrossentropy(ŷ, y)  # did not include softmax in the model
    acc = round(100 * mean(Flux.onecold(ŷ) .== Flux.onecold(y)); digits=2)
    (; loss, acc, split=data.split)  # return a NamedTuple
end

@show loss_and_accuracy(net, test_data);  # accuracy about 10%, before training

train_log = []
settings = (;
    eta = 15e-3,
    epochs = 5,
    batchsize = 100,
)

opt_state = Flux.setup(Descent(settings.eta), net);

loss_and_accuracy(net, test_data) = (loss = 2.4667773f0, acc = 6.88, split = :test)


In [6]:
using ProgressMeter

for epoch in 1:settings.epochs
    @time for (x,y) in loader(train_data, batchsize=settings.batchsize)
        Flux.reset!(net)
        grads = Flux.gradient(model -> let
                ŷ = model(x[  1:196,:])
                ŷ = model(x[197:392,:])
                ŷ = model(x[393:588,:])
                ŷ = model(x[589:end,:])
                Flux.logitcrossentropy(ŷ, y)
            end, net)
        Flux.update!(opt_state, net, grads[1])
    end
    
    loss, acc, _ = loss_and_accuracy(net, train_data)
    test_loss, test_acc, _ = loss_and_accuracy(net, test_data)
    @info epoch acc test_acc
    nt = (; epoch, loss, acc, test_loss, test_acc) 
    push!(train_log, nt)
end

 11.869519 seconds (22.15 M allocations: 4.005 GiB, 1.37% gc time, 94.48% compilation time)


┌ Info: 1
│   acc = 89.18
│   test_acc = 89.73
└ @ Main /home/maciek/Templates/AutomaticDifferention/addons/rnn.ipynb:18


  0.587161 seconds (541.34 k allocations: 2.633 GiB, 19.55% gc time)


┌ Info: 2
│   acc = 91.64
│   test_acc = 92.0
└ @ Main /home/maciek/Templates/AutomaticDifferention/addons/rnn.ipynb:18


  0.760106 seconds (541.34 k allocations: 2.633 GiB, 31.56% gc time)


┌ Info: 3
│   acc = 92.81
│   test_acc = 92.88
└ @ Main /home/maciek/Templates/AutomaticDifferention/addons/rnn.ipynb:18


  0.529331 seconds (541.34 k allocations: 2.633 GiB, 2.89% gc time)


┌ Info: 4
│   acc = 93.59
│   test_acc = 93.61
└ @ Main /home/maciek/Templates/AutomaticDifferention/addons/rnn.ipynb:18


  0.548967 seconds (541.34 k allocations: 2.633 GiB, 2.73% gc time)


┌ Info: 5
│   acc = 94.25
│   test_acc = 94.1
└ @ Main /home/maciek/Templates/AutomaticDifferention/addons/rnn.ipynb:18


In [7]:
Flux.reset!(net)
x1, y1 = first(loader(train_data)); # (28×28×1×1 Array{Float32, 3}, 10×1 OneHotMatrix(::Vector{UInt32}))
y1hat = net(x1[  1:196,:])
y1hat = net(x1[197:392,:])
y1hat = net(x1[393:588,:])
y1hat = net(x1[589:end,:])
@show hcat(Flux.onecold(y1hat, 0:9), Flux.onecold(y1, 0:9))

@show loss_and_accuracy(net, train_data);

hcat(Flux.onecold(y1hat, 0:9), Flux.onecold(y1, 0:9)) = [6 6]
loss_and_accuracy(net, train_data) = (loss = 0.19922917f0, acc = 94.25, split = :train)
