In [1]:
include("graph.jl")
include("base_operators.jl")

include("Convolution.jl")
include("Dense.jl")
include("Flatten.jl")
include("MaxPool.jl")
include("ReLu.jl")

include("misc.jl")
include("DataLoader.jl")

In [2]:
using MLDatasets;
using BenchmarkTools;
using Random;
using ProgressMeter;



In [3]:
# Settings
eta = Float32(1e-2);
epochs = 3;
batchsize = 100;

In [4]:
train_dataset = MNIST(:train);
train_samples = size(train_dataset.features)[3];
test_dataset = MNIST(:test);
test_samples = size(test_dataset.features)[3];

In [5]:
function loader(data, batchsize, shuffle)
    x = reshape(data.features, 28, 28, 1, :)
    yhot = onehotbatch(data.targets, 0:9)
    DataLoader((x, yhot), batchsize, shuffle)
end

loader (generic function with 1 method)

In [6]:
# obraz wejściowy i rzeczywista wartość
img = Variable(reshape(train_dataset[1].features, 28, 28, 1), name="img")
actual_class = Variable(train_dataset[1].targets, name="actual_class")

var actual_class
 ┣━ value:    Int64
 ┗━ gradient: Nothing

In [7]:
w1 = Variable(glorot_uniform((3,3,1,6)), name="w1")
b1 = Variable(zeros(Float32, 6), name="b1")
w2 = Variable(glorot_uniform((3,3,6,16)), name="w2")
b2 = Variable(zeros(Float32, 16), name="b2")
w3 = Variable(glorot_uniform((84, 400)), name = "w3")
b3 = Variable(zeros(Float32, 84), name="b3")
w4 = Variable(glorot_uniform((10, 84)), name = "w4")
b4 = Variable(zeros(Float32, 10), name="b4")

var b4
 ┣━ value:    10-element Vector{Float32}
 ┗━ gradient: Nothing

In [8]:
function net_hard(x, w1, w2, w3, w4, b1, b2, b3, b4, y)
    o1 = Conv(x, w1, b1)
    o2 = ReLu(o1)
    o3 = MaxPool(o2, Constant((2,2)))
    o4 = Conv(o3, w2, b2)
    o5 = ReLu(o4)
    o6 = MaxPool(o5, Constant((2,2)))
    o7 = Flatten(o6)
    o8 = Dense(w3, b3, o7)
    o9 = ReLu(o8)
    o10 = Dense(w4, b4, o9)
    o11 = Softmax(o10)
    E = cross_entropy_loss(o11, y)
    return topological_sort(E), o11
end

graph, y_output = net_hard(img, w1, w2, w3, w4, b1, b2, b3, b4, actual_class);

In [9]:
# function net_easy(x, w1, w2, w3, y)
#     o1 = Conv(x, w1, ReLu)
#     o2 = MaxPool(o1, Constant((2,2)))
#     o3 = Flatten(o2)
#     o4 = Dense(w2, o3, ReLu)
#     o5 = Dense(w3, o4, Softmax)
#     E = cross_entropy_loss(o5, y)

#     return topological_sort(E), o5
# end

# w1 = Variable(glorot_uniform((3,3,1,6)), name="w1")
# w2 = Variable(glorot_uniform((84, 1014)), name="w2")   
# w3 = Variable(glorot_uniform((10, 84)), name="w3") 

# graph, y_output = net_easy(img, w1, w2, w3, actual_class)

In [10]:
w1_grad = zeros(Float32, 3,3,1,6)
w2_grad = zeros(Float32, 3,3,6,16)
w3_grad = zeros(Float32, 84,400)
w4_grad = zeros(Float32, 10, 84)

b1_grad = zeros(Float32, 6)
b2_grad = zeros(Float32, 16)
b3_grad = zeros(Float32, 84)
b4_grad = zeros(Float32, 10)

# w1_grad = zeros(Float32, 3,3,1,6)
# w2_grad = zeros(Float32, 84, 1014)
# w3_grad = zeros(Float32, 10, 84)
i = 0
for epoch in 1:epochs
    train_loss = 0
    train_acc = 0
    #@time @showprogress dt=1 barglyphs=BarGlyphs('|','█', ['▁' ,'▂' ,'▃' ,'▄' ,'▅' ,'▆', '▇'],' ','|',) desc="Training..." for i in 1:train_samples
    @btime for (input, target) in loader(train_dataset, batchsize, true)
            for i in 1:batchsize
                @views img.output = input[i]
                @views actual_class.output = target[i]
                train_loss += forward!(graph)
    			backward!(graph)
        
                onecold(y_output.output, 0:9) == onecold(target[i], 0:9) ? train_acc+=1 : nothing
            
                w1_grad .+= w1.gradient
                w2_grad .+= w2.gradient
                w3_grad .+= w3.gradient
                w4_grad .+= w4.gradient

                b1_grad .+= b1.gradient
                b2_grad .+= b2.gradient
                b3_grad .+= b3.gradient
                b4_grad .+= b4.gradient
            end
            w1.output .-= ((w1_grad/batchsize)*eta)
            w2.output .-= ((w2_grad/batchsize)*eta)
            w3.output .-= ((w3_grad/batchsize)*eta)
            w4.output .-= ((w4_grad/batchsize)*eta)
            b1.output .-= ((b1_grad/batchsize)*eta)
            b2.output .-= ((b2_grad/batchsize)*eta)
            b3.output .-= ((b3_grad/batchsize)*eta)
            b4.output .-= ((b4_grad/batchsize)*eta)

            
            w1_grad .= 0
            w2_grad .= 0
            w3_grad .= 0
            w4_grad .= 0
            
            b1_grad .= 0
            b2_grad .= 0
            b3_grad .= 0
            b4_grad .= 0

        end
    avg_loss = train_loss/train_samples
    train_acc = train_acc/train_samples * 100
    
    @info "Train results" epoch avg_loss train_acc
end

 66.292455 seconds (47.06 M allocations: 15.554 GiB, 1.61% gc time, 8.18% compilation time)


[36m[1m┌ [22m[39m[36m[1mInfo: [22m[39mTrain results
[36m[1m│ [22m[39m  epoch = 1
[36m[1m│ [22m[39m  avg_loss = 2.0583377f0
[36m[1m└ [22m[39m  train_acc = 27.061666666666667


 61.063779 seconds (35.37 M allocations: 14.779 GiB, 1.74% gc time)


[36m[1m┌ [22m[39m[36m[1mInfo: [22m[39mTrain results
[36m[1m│ [22m[39m  epoch = 2
[36m[1m│ [22m[39m  avg_loss = 0.5123401f0
[36m[1m└ [22m[39m  train_acc = 85.1


 61.196470 seconds (35.37 M allocations: 14.779 GiB, 1.93% gc time)


[36m[1m┌ [22m[39m[36m[1mInfo: [22m[39mTrain results
[36m[1m│ [22m[39m  epoch = 3
[36m[1m│ [22m[39m  avg_loss = 0.3107633f0
[36m[1m└ [22m[39m  train_acc = 90.735


In [11]:
test_acc = 0;
test_losses = zeros(test_samples);

@time @showprogress dt=1 barglyphs=BarGlyphs('|','█', ['▁' ,'▂' ,'▃' ,'▄' ,'▅' ,'▆', '▇'],' ','|',) desc="Testing..." for i in 1:test_samples
    img.output = reshape(test_dataset[i].features, 28,28,1)
    actual_class.output = onehot(test_dataset[i].targets, 0:9)
    loss_value = forward!(graph)
    test_losses[i] = loss_value
    onecold(y_output.output, 0:9) == test_dataset[i].targets ? test_acc+=1 : nothing
end
test_acc = test_acc/test_samples * 100
avg_loss = mean(test_losses)

@info "Test results" avg_loss test_acc

[32mTesting... 100%|█████████████████████████████████████████| Time: 0:00:06[39m


  6.328091 seconds (4.77 M allocations: 626.252 MiB, 0.87% gc time, 18.00% compilation time)


[36m[1m┌ [22m[39m[36m[1mInfo: [22m[39mTest results
[36m[1m│ [22m[39m  avg_loss = 0.2553841806242194
[36m[1m└ [22m[39m  test_acc = 92.43


In [12]:
test = 1

1

In [13]:
if test != nothing
    print(test)
end

1

In [19]:
w3.output

84×400 Matrix{Float32}:
 -0.121135   -0.0952782     0.0395386   …   0.110492    -0.106272
  0.185033   -0.0335346    -0.0318355       0.218981     0.0592905
  0.243482    0.0554826     0.113038       -0.0403944    0.186502
  0.0597456   0.119412     -0.019251        0.025647     0.036262
 -0.0879385   0.162497      0.118133       -0.155409    -0.196455
 -0.303614   -0.077527     -0.168254    …   0.00570875   0.0056641
 -0.0271131  -0.0248996     0.0529472      -0.0935155    0.0179046
  0.0783219  -0.106993     -0.0595114       0.0145297    0.0242882
  0.101682   -0.00286223   -0.170311        0.148301     0.129447
  0.018321   -0.0585142     0.0371203      -0.00258623  -0.164209
  0.183526   -0.00460497    0.00352023  …  -0.0646234    0.0449342
  0.017353   -0.0878814     0.291158        0.193833     0.00554797
  0.070461    0.160208      0.201233       -0.00345887  -0.18365
  ⋮                                     ⋱               
 -0.0823844   0.114401      0.0156093      -0.217578   

In [21]:
using Pkg
pkgs = Pkg.installed();

[33m[1m└ [22m[39m[90m@ Pkg /Applications/Julia-1.10.app/Contents/Resources/julia/share/julia/stdlib/v1.10/Pkg/src/Pkg.jl:744[39m


In [24]:
pkgs["ProgressMeter"]

v"1.10.0"