In [1]:
include("graph.jl")
include("base_operators.jl")

include("Convolution.jl")
include("Dense.jl")
include("Flatten.jl")
include("MaxPool.jl")
include("ReLu.jl")

include("misc.jl")

onecold (generic function with 1 method)

In [2]:
using MLDatasets;
using BenchmarkTools;
using Random;
using ProgressMeter;



In [3]:
train_log = []
settings = (;
    eta = Float32(1e-2),
    epochs = 3,
    batchsize = 100
)

(eta = 0.01f0, epochs = 3, batchsize = 100)

In [4]:
train_dataset = MNIST(:train);
train_samples = size(train_dataset.features)[3];
test_dataset = MNIST(:test);
test_samples = size(test_dataset.features)[3];

In [5]:
# obraz wejściowy i rzeczywista wartość
img = Variable(train_dataset[1].features, name="img")
actual_class = Variable(train_dataset[1].targets, name="actual_class")

var actual_class
 ┣━ value:    Int64
 ┗━ gradient: Nothing

In [6]:
function net_hard(x, w1, w2, w3, w4, b1, b2, b3, b4, y)
    o1 = Conv(x, w1, b1, ReLu)
    o2 = MaxPool(o1, Constant((2,2)))
    o3 = Conv(o2, w2, b2, ReLu)
    o4 = MaxPool(o3, Constant((2,2)))
    o5 = Flatten(o4)
    o6 = Dense(w3, b3, o5, ReLu)
    o7 = Dense(w4, b4, o6, Softmax)
    E = cross_entropy_loss(o7, y)
    return topological_sort(E), o7
end

w1 = Variable(glorot_uniform((3,3,1,6)), name="w1")
b1 = Variable(zeros(Float32, 6), name="b1")

w2 = Variable(glorot_uniform((3,3,6,16)), name="w2")
b2 = Variable(zeros(Float32, 16), name="b2")

w3 = Variable(glorot_uniform((84, 400)), name = "w3")
b3 = Variable(zeros(Float32, 84), name="b3")

w4 = Variable(glorot_uniform((10, 84)), name = "w4")
b4 = Variable(zeros(Float32, 10), name="b4")

graph, y_output = net_hard(img, w1, w2, w3, w4, b1, b2, b3, b4, actual_class)

(Any[var actual_class
 ┣━ value:    Int64
 ┗━ gradient: Nothing, op ?(typeof(-)), var w4
 ┣━ value:    10×84 Matrix{Float32}
 ┗━ gradient: Nothing, var w3
 ┣━ value:    84×400 Matrix{Float32}
 ┗━ gradient: Nothing, var img
 ┣━ value:    28×28 Matrix{Float32}
 ┗━ gradient: Nothing, var w1
 ┣━ value:    3×3×1×6 Array{Float32, 4}
 ┗━ gradient: Nothing, var b1
 ┣━ value:    6-element Vector{Float32}
 ┗━ gradient: Nothing, op.?(typeof(Convolution)), op.?(typeof(ReLu)), const (2, 2)  …  var b3
 ┣━ value:    84-element Vector{Float32}
 ┗━ gradient: Nothing, op.?(typeof(+)), op.?(typeof(ReLu)), op.?(typeof(mul!)), var b4
 ┣━ value:    10-element Vector{Float32}
 ┗━ gradient: Nothing, op.?(typeof(+)), op.?(typeof(Softmax)), op.?(typeof(log)), op.?(typeof(*)), op.?(typeof(sum))], op.?(typeof(Softmax)))

In [7]:
# function net_easy(x, w1, w2, w3, y)
#     o1 = Conv(x, w1, ReLu)
#     o2 = MaxPool(o1, Constant((2,2)))
#     o3 = Flatten(o2)
#     o4 = Dense(w2, o3, ReLu)
#     o5 = Dense(w3, o4, Softmax)
#     E = cross_entropy_loss(o5, y)

#     return topological_sort(E), o5
# end

# w1 = Variable(glorot_uniform((3,3,1,6)), name="w1")
# w2 = Variable(glorot_uniform((84, 1014)), name="w2")   
# w3 = Variable(glorot_uniform((10, 84)), name="w3") 

# graph, y_output = net_easy(img, w1, w2, w3, actual_class)

In [8]:
w1_grad = zeros(Float32, 3,3,1,6)
w2_grad = zeros(Float32, 3,3,6,16)
w3_grad = zeros(Float32, 84,400)
w4_grad = zeros(Float32, 10, 84)

b1_grad = zeros(Float32, 6)
b2_grad = zeros(Float32, 16)
b3_grad = zeros(Float32, 84)
b4_grad = zeros(Float32, 10)

# w1_grad = zeros(Float32, 3,3,1,6)
# w2_grad = zeros(Float32, 84, 1014)
# w3_grad = zeros(Float32, 10, 84)

for epoch in 1:settings.epochs
    train_losses = zeros(Float32, train_samples)
    train_acc = 0
    @time @showprogress dt=1 barglyphs=BarGlyphs('|','█', ['▁' ,'▂' ,'▃' ,'▄' ,'▅' ,'▆', '▇'],' ','|',) desc="Training..." for i in 1:train_samples
    #@time for i in 1:train_samples
            img.output = reshape(train_dataset[i].features, 28,28,1,1)
            actual_class.output = onehot(train_dataset[i].targets, 0:9)
            loss_value = forward!(graph)
            train_losses[i] = loss_value
			backward!(graph)
        
            onecold(y_output.output, 0:9) == train_dataset[i].targets ? train_acc+=1 : nothing
            
            w1_grad .+= w1.gradient
            w2_grad .+= w2.gradient
            w3_grad .+= w3.gradient
            w4_grad .+= w4.gradient

            b1_grad .+= b1.gradient
            b2_grad .+= b2.gradient
            b3_grad .+= b3.gradient
            b4_grad .+= b4.gradient
            
            if i % settings.batchsize == 0
                w1.output .-= ((w1_grad/settings.batchsize)*settings.eta)
                w2.output .-= ((w2_grad/settings.batchsize)*settings.eta)
                w3.output .-= ((w3_grad/settings.batchsize)*settings.eta)
                w4.output .-= ((w4_grad/settings.batchsize)*settings.eta)
                b1.output .-= ((b1_grad/settings.batchsize)*settings.eta)
                b2.output .-= ((b2_grad/settings.batchsize)*settings.eta)
                b3.output .-= ((b3_grad/settings.batchsize)*settings.eta)
                b4.output .-= ((b4_grad/settings.batchsize)*settings.eta)

            
                w1_grad .= 0
                w2_grad .= 0
                w3_grad .= 0
                w4_grad .= 0
            
                b1_grad .= 0
                b2_grad .= 0
                b3_grad .= 0
                b4_grad .= 0
            end
	end
    avg_loss = mean(train_losses)
    train_acc = train_acc/train_samples * 100
    
    @info "Train results" epoch avg_loss train_acc
end

[32mTraining... 100%|████████████████████████████████████████| Time: 0:02:24[39m7:18[39m


145.169938 seconds (2.51 G allocations: 253.935 GiB, 7.49% gc time, 4.01% compilation time)


[36m[1m┌ [22m[39m[36m[1mInfo: [22m[39mTrain results
[36m[1m│ [22m[39m  epoch = 1
[36m[1m│ [22m[39m  avg_loss = 0.6451297f0
[36m[1m└ [22m[39m  train_acc = 80.08666666666666
[32mTraining... 100%|████████████████████████████████████████| Time: 0:02:18[39m


138.600723 seconds (2.50 G allocations: 253.224 GiB, 7.64% gc time)


[36m[1m┌ [22m[39m[36m[1mInfo: [22m[39mTrain results
[36m[1m│ [22m[39m  epoch = 2
[36m[1m│ [22m[39m  avg_loss = 0.25138658f0
[36m[1m└ [22m[39m  train_acc = 92.40333333333334
[32mTraining...  14%|█████▆                                  |  ETA: 0:02:00[39m

LoadError: InterruptException:

In [9]:
test_acc = 0;
test_losses = zeros(test_samples);

@time @showprogress dt=1 barglyphs=BarGlyphs('|','█', ['▁' ,'▂' ,'▃' ,'▄' ,'▅' ,'▆', '▇'],' ','|',) desc="Testing..." for i in 1:test_samples
    img.output = reshape(test_dataset[i].features, 28,28,1,1)
    loss_value = forward!(graph)
    test_losses[i] = loss_value
    onecold(y_output.output, 0:9) == test_dataset[i].targets ? test_acc+=1 : nothing
end
test_acc = test_acc/test_samples * 100
avg_loss = mean(test_losses)

@info "Test results" avg_loss test_acc

[32mTesting... 100%|█████████████████████████████████████████| Time: 0:00:09[39m


  9.215539 seconds (160.34 M allocations: 16.219 GiB, 7.95% gc time, 0.03% compilation time)


[36m[1m┌ [22m[39m[36m[1mInfo: [22m[39mTest results
[36m[1m│ [22m[39m  avg_loss = 7.667727813575308
[36m[1m└ [22m[39m  test_acc = 93.78
