In [67]:
using Pkg
using Flux
using MLDatasets

In [68]:
train_x, train_y = CIFAR10.traindata(Float32, 1:10000)
test_x, test_y = CIFAR10.testdata(Float32, 1:10000);

In [69]:
println("Size of each image: ", size(train_x))
println("Label of 50th training datapoint: ", train_y[50])
# So here we can see that each training point is a 3D array - a 32x32 image with 3 color channels

Size of each image: (32, 32, 3, 10000)
Label of 50th training datapoint: 0


In [70]:
# Since this is a multi-class classification problem, we can use one hot encoding, just like the MNIST dataset.
# There's 10 classes just like mnist, so we encode from 0 to 9

train_y, test_y = Flux.onehotbatch(train_y, 0:9), Flux.onehotbatch(test_y, 0:9)
nclasses = length(train_y[:,1])
println("number of classes: ", nclasses)

number of classes: 10


In [71]:
# 1 VGG Block
model_VGG1 = Chain(Conv((3,3), 3=>32, relu),
              Conv((3,3), 32=>32, relu),
              MaxPool((2,2)),
              Flux.flatten,
              Dense(6272, 128),
              Dense(128,10),
              softmax)

Chain(
  Conv((3, 3), 3 => 32, relu),          [90m# 896 parameters[39m
  Conv((3, 3), 32 => 32, relu),         [90m# 9_248 parameters[39m
  MaxPool((2, 2)),
  Flux.flatten,
  Dense(6272, 128),                     [90m# 802_944 parameters[39m
  Dense(128, 10),                       [90m# 1_290 parameters[39m
  NNlib.softmax,
)[90m                   # Total: 8 arrays, [39m814_378 parameters, 3.108 MiB.

In [72]:
# 2 VGG Block
model_VGG2 = Chain(Conv((3,3), 3=>32, relu),
              Conv((3,3), 32=>32, relu),
              MaxPool((2,2)),
              Conv((3,3), 32=>64, relu),
              Conv((3,3), 64=>64, relu),
              MaxPool((2,2)),
              Flux.flatten,
              Dense(1600, 128),
              Dense(128,10),
              softmax)

Chain(
  Conv((3, 3), 3 => 32, relu),          [90m# 896 parameters[39m
  Conv((3, 3), 32 => 32, relu),         [90m# 9_248 parameters[39m
  MaxPool((2, 2)),
  Conv((3, 3), 32 => 64, relu),         [90m# 18_496 parameters[39m
  Conv((3, 3), 64 => 64, relu),         [90m# 36_928 parameters[39m
  MaxPool((2, 2)),
  Flux.flatten,
  Dense(1600, 128),                     [90m# 204_928 parameters[39m
  Dense(128, 10),                       [90m# 1_290 parameters[39m
  NNlib.softmax,
)[90m                   # Total: 12 arrays, [39m271_786 parameters, 1.039 MiB.

In [73]:
# 3 VGG Block
model_VGG3 = Chain(
              Conv((3,3), 3=>32, relu),
              Conv((3,3), 32=>32, relu),
              MaxPool((2,2)),
              Conv((3,3), 32=>64, relu),
              Conv((3,3), 64=>64, relu),
              MaxPool((2,2)),
              Conv((3,3), 64=>128, relu),
              Conv((3,3), 128=>128, relu),
              #MaxPool((2,2)),
              Flux.flatten,
              Dense(128, 128),
              Dense(128,10),
              softmax)
model_VGG3(train_x)

10×10000 Matrix{Float32}:
 0.0979358  0.0973411  0.101644   …  0.0985097  0.0961972  0.0992779
 0.0933131  0.0927926  0.0928897     0.0950712  0.0955589  0.0937156
 0.0959559  0.0934367  0.0934987     0.0937468  0.0968018  0.0946712
 0.0989377  0.0990782  0.100036      0.100753   0.099971   0.100841
 0.103142   0.105351   0.103573      0.102786   0.10369    0.104147
 0.101781   0.0997584  0.0994495  …  0.099266   0.10094    0.0999218
 0.101138   0.10113    0.101842      0.102004   0.101279   0.100691
 0.105909   0.105327   0.101732      0.102817   0.104416   0.104005
 0.101249   0.10156    0.103003      0.101102   0.0993258  0.102143
 0.100639   0.104225   0.102333      0.103945   0.101821   0.100586

In [74]:
function loss_and_accuracy(udata, wdata, model)

    ndata = size(udata,4)

    ŵ = model(udata)
    loss = Flux.crossentropy(ŵ, wdata; agg=sum)
    accuracy = sum(Flux.onecold(ŵ) .== Flux.onecold(wdata)) / ndata
    return loss, accuracy
end

train_loss, train_acc = loss_and_accuracy(train_x,train_y,model_VGG2)
test_loss, test_acc = loss_and_accuracy(test_x,test_y,model_VGG2)
println("train_loss = $train_loss, train_accuracy = $train_acc")
println("test_loss = $test_loss, test_accuracy = $test_acc")


train_loss = 23043.992, train_accuracy = 0.1037
test_loss = 23038.322, test_accuracy = 0.1005


In [78]:
batch_size = 128
train_loader = Flux.Data.DataLoader((train_x, train_y), batchsize=batch_size, shuffle=true);

In [79]:
function train(model, train_loader, optimizer, model_params,train_x,train_y,test_x,test_y,model_name)
    train_losses = []
    train_accuracy = []
    test_losses = []
    test_accuracy = []
    α = 0.001 # <- stepsize; in the ML community, it is often denoted as a `learning rate η`
    opt = optimizer(α) 
    K = 10    # <- Epoch limit; in the ML community, one full iteration over all sub loss functions is often referred to as `epoch`
    for k in 1:K
        for (u, w) in train_loader
            gs = gradient(() -> Flux.Losses.crossentropy(model(u), w), model_params) # compute gradient
            Flux.Optimise.update!(opt, model_params, gs) # update parameters
        end
        println("Epoch $k for $model_name architecture.")
        train_loss, train_acc = loss_and_accuracy(train_x, train_y,  model)
        push!(train_losses, train_loss)
        push!(train_accuracy, train_acc)
        test_loss, test_acc = loss_and_accuracy(test_x, test_y, model)
        push!(test_losses, test_loss)
        push!(test_accuracy, test_acc)
        println("  train_loss = $train_loss, train_accuracy = $train_acc")
        println("  test_loss = $test_loss, test_accuracy = $test_acc")
    end
    return train_losses, train_accuracy, test_losses, test_accuracy
end

train (generic function with 1 method)

In [77]:
vgg1_train_loss, 
vgg1_train_accuracy, 
vgg1_test_loss, 
vgg1_test_accuracy = train(model_VGG1, train_loader, ADAM, Flux.params(model_VGG1), 
    train_x, train_y,test_x, test_y, "VGG1")

Epoch 1 for VGG1 architecture.
  train_loss = 14975.256, train_accuracy = 0.4731
  test_loss = 15787.694, test_accuracy = 0.437
Epoch 2 for VGG1 architecture.
  train_loss = 12392.323, train_accuracy = 0.5601
  test_loss = 14063.291, test_accuracy = 0.4936
Epoch 3 for VGG1 architecture.
  train_loss = 10892.171, train_accuracy = 0.6234
  test_loss = 13263.6, test_accuracy = 0.5341
Epoch 4 for VGG1 architecture.
  train_loss = 9753.802, train_accuracy = 0.6541
  test_loss = 13111.36, test_accuracy = 0.5349
Epoch 5 for VGG1 architecture.
  train_loss = 9238.127, train_accuracy = 0.6749
  test_loss = 13269.293, test_accuracy = 0.5463
Epoch 6 for VGG1 architecture.
  train_loss = 8078.162, train_accuracy = 0.721
  test_loss = 13089.453, test_accuracy = 0.5617


LoadError: InterruptException:

In [None]:
vgg2_train_loss, 
vgg2_train_accuracy, 
vgg2_test_loss, 
vgg2_test_accuracy = train(model_VGG2, train_loader, ADAM ,Flux.params(model_VGG2), train_x, train_y, test_x,test_y, "VGG2")

In [None]:
vgg3_train_loss, 
vgg3_train_accuracy, 
vgg3_test_loss, 
vgg3_test_accuracy = train(model_VGG3, train_loader, ADAM ,Flux.params(model_VGG3), train_x, train_y, test_x,test_y,"VGG3")