In [1]:
using Pkg
using Flux
using MLDatasets

In [2]:
train_x, train_y = CIFAR10.traindata(Float32, 1:5000)
test_x, test_y = CIFAR10.testdata(Float32, 1:2000);

In [3]:
println("Size of each image: ", size(train_x))
println("Label of 50th training datapoint: ", train_y[50])
# So here we can see that each training point is a 3D array - a 32x32 image with 3 color channels

Size of each image: (32, 32, 3, 5000)
Label of 50th training datapoint: 0


In [4]:
# Since this is a multi-class classification problem, we can use one hot encoding, just like the MNIST dataset.
# There's 10 classes just like mnist, so we encode from 0 to 9

train_y, test_y = Flux.onehotbatch(train_y, 0:9), Flux.onehotbatch(test_y, 0:9)
nclasses = length(train_y[:,1])
println("number of classes: ", nclasses)

number of classes: 10


In [5]:
# 1 VGG Block
model_VGG1 = Chain(Conv((3,3), 3=>32, relu),
              Conv((3,3), 32=>32, relu),
              MaxPool((2,2)),
              Flux.flatten,
              Dense(6272, 128),
              Dense(128,10),
              softmax)

Chain(
  Conv((3, 3), 3 => 32, relu),          [90m# 896 parameters[39m
  Conv((3, 3), 32 => 32, relu),         [90m# 9_248 parameters[39m
  MaxPool((2, 2)),
  Flux.flatten,
  Dense(6272, 128),                     [90m# 802_944 parameters[39m
  Dense(128, 10),                       [90m# 1_290 parameters[39m
  NNlib.softmax,
)[90m                   # Total: 8 arrays, [39m814_378 parameters, 3.108 MiB.

In [6]:
# 2 VGG Block
model_VGG2 = Chain(Conv((3,3), 3=>32, relu),
              Conv((3,3), 32=>32, relu),
              MaxPool((2,2)),
              Conv((3,3), 32=>64, relu),
              Conv((3,3), 64=>64, relu),
              MaxPool((2,2)),
              Flux.flatten,
              Dense(1600, 128),
              Dense(128,10),
              softmax)

Chain(
  Conv((3, 3), 3 => 32, relu),          [90m# 896 parameters[39m
  Conv((3, 3), 32 => 32, relu),         [90m# 9_248 parameters[39m
  MaxPool((2, 2)),
  Conv((3, 3), 32 => 64, relu),         [90m# 18_496 parameters[39m
  Conv((3, 3), 64 => 64, relu),         [90m# 36_928 parameters[39m
  MaxPool((2, 2)),
  Flux.flatten,
  Dense(1600, 128),                     [90m# 204_928 parameters[39m
  Dense(128, 10),                       [90m# 1_290 parameters[39m
  NNlib.softmax,
)[90m                   # Total: 12 arrays, [39m271_786 parameters, 1.039 MiB.

In [7]:
# 3 VGG Block
model_VGG3 = Chain(
              Conv((3,3), 3=>32, relu),
              Conv((3,3), 32=>32, relu),
              MaxPool((2,2)),
              Conv((3,3), 32=>64, relu),
              Conv((3,3), 64=>64, relu),
              MaxPool((2,2)),
              Conv((3,3), 64=>128, relu),
              Conv((3,3), 128=>128, relu),
              MaxPool((2,2)))
              #Dense(128, 10),
              #Dense(128,10),
              #softmax)

Chain(
  Conv((3, 3), 3 => 32, relu),          [90m# 896 parameters[39m
  Conv((3, 3), 32 => 32, relu),         [90m# 9_248 parameters[39m
  MaxPool((2, 2)),
  Conv((3, 3), 32 => 64, relu),         [90m# 18_496 parameters[39m
  Conv((3, 3), 64 => 64, relu),         [90m# 36_928 parameters[39m
  MaxPool((2, 2)),
  Conv((3, 3), 64 => 128, relu),        [90m# 73_856 parameters[39m
  Conv((3, 3), 128 => 128, relu),       [90m# 147_584 parameters[39m
  MaxPool((2, 2)),
)[90m                   # Total: 12 arrays, [39m287_008 parameters, 1.097 MiB.

In [8]:
function loss_and_accuracy(udata, wdata, model)

    ndata = size(udata,4)

    ŵ = model(udata)
    loss = Flux.crossentropy(ŵ, wdata; agg=sum)
    accuracy = sum(Flux.onecold(ŵ) .== Flux.onecold(wdata)) / ndata
    return loss, accuracy
end

train_loss, train_acc = loss_and_accuracy(train_x,train_y,model_VGG2)
test_loss, test_acc = loss_and_accuracy(test_x,test_y,model_VGG2)
println("train_loss = $train_loss, train_accuracy = $train_acc")
println("test_loss = $test_loss, test_accuracy = $test_acc")


train_loss = 11512.14, train_accuracy = 0.0994
test_loss = 4611.091, test_accuracy = 0.093


In [9]:
batch_size = 128
train_loader = Flux.Data.DataLoader((train_x, train_y), batchsize=batch_size, shuffle=true);

In [11]:
function train(model, train_loader, optimizer, model_params,train_x,train_y,test_x, test_y,model_name)
    train_losses = []
    train_accuracy = []
    test_losses = []
    test_accuracy = []
    α = 0.001     # <- stepsize; in the ML community, it is often denoted as a `learning rate η`
    opt = optimizer(α) 
    K = 10         # <- Epoch limit; in the ML community, one full iteration over all sub loss functions is often referred to as `epoch`
    for k in 1:K
        for (u, w) in train_loader
            gs = gradient(() -> Flux.Losses.crossentropy(model(u), w), model_params) # compute gradient
            Flux.Optimise.update!(opt, model_params, gs) # update parameters
        end
        println("Epoch $k for $model_name architecture.")
        train_loss, train_acc = loss_and_accuracy(train_x, train_y,  model)
        push!(train_losses, train_loss)
        push!(train_accuracy, train_acc)
        test_loss, test_acc = loss_and_accuracy(test_x, test_y, model)
        push!(test_losses, test_loss)
        push!(test_accuracy, test_acc)
        println("  train_loss = $train_loss, train_accuracy = $train_acc")
        println("  test_loss = $test_loss, test_accuracy = $test_acc")
    end
    return train_losses, train_accuracy, test_losses, test_accuracy
end

train (generic function with 1 method)

In [None]:
vgg1_train_loss, 
vgg1_train_accuracy, 
vgg1_test_loss, 
vgg1_test_accuracy = train(model_VGG1, train_loader, ADAM, Flux.params(model_VGG1), 
    train_x, train_y,test_x, test_y, "VGG1")

Epoch 1 for VGG1 architecture.
  train_loss = 7460.476, train_accuracy = 0.4698
  test_loss = 3195.1094, test_accuracy = 0.4365
Epoch 2 for VGG1 architecture.
  train_loss = 6046.1875, train_accuracy = 0.5718
  test_loss = 2961.4414, test_accuracy = 0.4745
Epoch 3 for VGG1 architecture.
  train_loss = 4968.785, train_accuracy = 0.6646
  test_loss = 2864.5508, test_accuracy = 0.487
Epoch 4 for VGG1 architecture.
  train_loss = 4068.1018, train_accuracy = 0.7268
  test_loss = 2866.0874, test_accuracy = 0.509
Epoch 5 for VGG1 architecture.
  train_loss = 3483.1042, train_accuracy = 0.777
  test_loss = 3012.62, test_accuracy = 0.5015
Epoch 6 for VGG1 architecture.
  train_loss = 2892.46, train_accuracy = 0.8096
  test_loss = 3390.0173, test_accuracy = 0.486


In [None]:
vgg2_train_loss, 
vgg2_train_accuracy, 
vgg2_test_loss, 
vgg2_test_accuracy = train(model_VGG2, train_loader, ADAM,Flux.params(model_VGG2), train_x, train_y, "VGG2")

In [None]:
vgg3_train_loss, 
vgg3_train_accuracy, 
vgg3_test_loss, 
vgg3_test_accuracy = train(model_VGG3, train_loader, ADAM,Flux.params(model_VGG3), train_x, train_y, "VGG3")