In [1]:
using Pkg
using Flux
using MLDatasets

In [2]:
train_x, train_y = CIFAR10.traindata(Float32, 1:10000)
test_x, test_y = CIFAR10.testdata(Float32, 1:10000);

In [3]:
println("Size of each image: ", size(train_x))
println("Label of 50th training datapoint: ", train_y[50])
# So here we can see that each training point is a 3D array - a 32x32 image with 3 color channels

Size of each image: (32, 32, 3, 10000)
Label of 50th training datapoint: 0


In [4]:
# Since this is a multi-class classification problem, we can use one hot encoding, just like the MNIST dataset.
# There's 10 classes just like mnist, so we encode from 0 to 9

train_y, test_y = Flux.onehotbatch(train_y, 0:9), Flux.onehotbatch(test_y, 0:9)
nclasses = length(train_y[:,1])
println("number of classes: ", nclasses)

number of classes: 10


In [5]:
# 1 VGG Block
model_VGG1 = Chain(Conv((3,3), 3=>32, relu),
              Conv((3,3), 32=>32, relu),
              MaxPool((2,2)),
              Flux.flatten,
              Dense(6272, 128),
              Dense(128,10),
              softmax)

Chain(
  Conv((3, 3), 3 => 32, relu),          [90m# 896 parameters[39m
  Conv((3, 3), 32 => 32, relu),         [90m# 9_248 parameters[39m
  MaxPool((2, 2)),
  Flux.flatten,
  Dense(6272, 128),                     [90m# 802_944 parameters[39m
  Dense(128, 10),                       [90m# 1_290 parameters[39m
  NNlib.softmax,
)[90m                   # Total: 8 arrays, [39m814_378 parameters, 3.108 MiB.

In [6]:
# 2 VGG Block
model_VGG2 = Chain(Conv((3,3), 3=>32, relu),
              Conv((3,3), 32=>32, relu),
              MaxPool((2,2)),
              Conv((3,3), 32=>64, relu),
              Conv((3,3), 64=>64, relu),
              MaxPool((2,2)),
              Flux.flatten,
              Dense(1600, 128),
              Dense(128,10),
              softmax)

Chain(
  Conv((3, 3), 3 => 32, relu),          [90m# 896 parameters[39m
  Conv((3, 3), 32 => 32, relu),         [90m# 9_248 parameters[39m
  MaxPool((2, 2)),
  Conv((3, 3), 32 => 64, relu),         [90m# 18_496 parameters[39m
  Conv((3, 3), 64 => 64, relu),         [90m# 36_928 parameters[39m
  MaxPool((2, 2)),
  Flux.flatten,
  Dense(1600, 128),                     [90m# 204_928 parameters[39m
  Dense(128, 10),                       [90m# 1_290 parameters[39m
  NNlib.softmax,
)[90m                   # Total: 12 arrays, [39m271_786 parameters, 1.039 MiB.

In [7]:
# 3 VGG Block
model_VGG3 = Chain(
              Conv((3,3), 3=>32, relu),
              Conv((3,3), 32=>32, relu),
              MaxPool((2,2)),
              Conv((3,3), 32=>64, relu),
              Conv((3,3), 64=>64, relu),
              MaxPool((2,2)),
              Conv((3,3), 64=>128, relu),
              Conv((3,3), 128=>128, relu),
              #MaxPool((2,2)),
              Flux.flatten,
              Dense(128, 128),
              Dense(128,10),
              softmax)
model_VGG3(train_x)

10×10000 Matrix{Float32}:
 0.0949119  0.0964669  0.0981602  …  0.0989761  0.0950511  0.0975889
 0.100601   0.0986537  0.098283      0.100724   0.101251   0.0989996
 0.102874   0.103122   0.101029      0.0998586  0.1025     0.100506
 0.101101   0.103294   0.102767      0.104538   0.102708   0.103506
 0.105358   0.107742   0.104874      0.104966   0.106852   0.105607
 0.105599   0.104343   0.103054   …  0.101636   0.103419   0.103327
 0.103383   0.10146    0.0967931     0.0987216  0.100138   0.0995452
 0.094803   0.0954987  0.0983299     0.0966073  0.09752    0.0965896
 0.0929526  0.0921574  0.0968031     0.0959798  0.0936054  0.0944039
 0.0984166  0.0972626  0.0999074     0.0979919  0.0969551  0.099927

In [8]:
function loss_and_accuracy(udata, wdata, model)

    ndata = size(udata,4)

    ŵ = model(udata)
    loss = Flux.crossentropy(ŵ, wdata; agg=sum)
    accuracy = sum(Flux.onecold(ŵ) .== Flux.onecold(wdata)) / ndata
    return loss, accuracy
end

train_loss, train_acc = loss_and_accuracy(train_x,train_y,model_VGG2)
test_loss, test_acc = loss_and_accuracy(test_x,test_y,model_VGG2)
println("train_loss = $train_loss, train_accuracy = $train_acc")
println("test_loss = $test_loss, test_accuracy = $test_acc")


train_loss = 23090.205, train_accuracy = 0.1076
test_loss = 23093.945, test_accuracy = 0.1008


In [9]:
batch_size = 128
train_loader = Flux.Data.DataLoader((train_x, train_y), batchsize=batch_size, shuffle=true);

In [10]:
function train(model, train_loader, optimizer, model_params,train_x,train_y,test_x,test_y,model_name)
    train_losses = []
    train_accuracy = []
    test_losses = []
    test_accuracy = []
    α = 0.001 # <- stepsize; in the ML community, it is often denoted as a `learning rate η`
    opt = optimizer(α) 
    K = 10    # <- Epoch limit; in the ML community, one full iteration over all sub loss functions is often referred to as `epoch`
    for k in 1:K
        for (u, w) in train_loader
            gs = gradient(() -> Flux.Losses.crossentropy(model(u), w), model_params) # compute gradient
            Flux.Optimise.update!(opt, model_params, gs) # update parameters
        end
        println("Epoch $k for $model_name architecture.")
        train_loss, train_acc = loss_and_accuracy(train_x, train_y,  model)
        push!(train_losses, train_loss)
        push!(train_accuracy, train_acc)
        test_loss, test_acc = loss_and_accuracy(test_x, test_y, model)
        push!(test_losses, test_loss)
        push!(test_accuracy, test_acc)
        println("  train_loss = $train_loss, train_accuracy = $train_acc")
        println("  test_loss = $test_loss, test_accuracy = $test_acc")
    end
    return train_losses, train_accuracy, test_losses, test_accuracy
end

train (generic function with 1 method)

In [None]:
vgg1_train_loss, 
vgg1_train_accuracy, 
vgg1_test_loss, 
vgg1_test_accuracy = train(model_VGG1, train_loader, ADAM, Flux.params(model_VGG1), 
    train_x, train_y,test_x, test_y, "VGG1")

Epoch 1 for VGG1 architecture.
  train_loss = 14074.047, train_accuracy = 0.5235
  test_loss = 14921.775, test_accuracy = 0.48
Epoch 2 for VGG1 architecture.
  train_loss = 11766.3125, train_accuracy = 0.5962
  test_loss = 13501.019, test_accuracy = 0.5183
Epoch 3 for VGG1 architecture.
  train_loss = 11832.637, train_accuracy = 0.5724
  test_loss = 14412.457, test_accuracy = 0.4976
Epoch 4 for VGG1 architecture.
  train_loss = 10639.105, train_accuracy = 0.6253
  test_loss = 14192.984, test_accuracy = 0.5242
Epoch 5 for VGG1 architecture.
  train_loss = 8198.338, train_accuracy = 0.7247
  test_loss = 12679.185, test_accuracy = 0.5617
Epoch 6 for VGG1 architecture.
  train_loss = 7708.5674, train_accuracy = 0.7327
  test_loss = 13236.449, test_accuracy = 0.55


In [None]:
vgg2_train_loss, 
vgg2_train_accuracy, 
vgg2_test_loss, 
vgg2_test_accuracy = train(model_VGG2, train_loader, ADAM ,Flux.params(model_VGG2), train_x, train_y, test_x,test_y, "VGG2")

In [None]:
vgg3_train_loss, 
vgg3_train_accuracy, 
vgg3_test_loss, 
vgg3_test_accuracy = train(model_VGG3, train_loader, ADAM ,Flux.params(model_VGG3), train_x, train_y, test_x,test_y,"VGG3")