In [1]:
using Flux
using Flux.Data: DataLoader
using Flux: onehotbatch, onecold, @epochs
using Flux.Losses: logitcrossentropy
using MLDatasets

In [2]:
#ENV["DATADEPS_ALWAYS_ACCEPT"] = "true"

In [3]:
# load full train set
train_x, train_y = MLDatasets.MNIST.traindata(Float32)

# load full test set
test_x, test_y = MLDatasets.MNIST.testdata(Float32)

(Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

...

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], [7, 2, 1, 0, 4, 1, 4, 9, 5, 9  …  7, 8, 9, 0, 1, 2, 3, 4, 5, 6])

In [4]:
println("Size of train set: $(size(train_x))")
println("Size of test set: $(size(test_x))")

Size of train set: (28, 28, 60000)
Size of test set: (28, 28, 10000)


We need to transform the MNIST data so we can feed it into our Flux model. 

In [5]:
# Reshape Data in order to flatten each image into a linear array
train_x = Flux.flatten(train_x)
test_x = Flux.flatten(test_x)

784×10000 Array{Float32,2}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  

In [6]:
# One-hot-encode the labels
train_y, test_y = onehotbatch(train_y, 0:9), onehotbatch(test_y, 0:9)

(Bool[0 1 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 1; 0 0 … 0 0], Bool[0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 0])

In [7]:
# Create DataLoaders (mini-batch iterators)
train_data_loader = DataLoader((train_x, train_y), batchsize=256, shuffle=true)
test_data_loader = DataLoader((test_x, test_y), batchsize=256)

DataLoader{Tuple{Array{Float32,2},Flux.OneHotMatrix{Array{Flux.OneHotVector,1}}}}((Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], Bool[0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 0]), 256, 10000, true, 10000, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10  …  9991, 9992, 9993, 9994, 9995, 9996, 9997, 9998, 9999, 10000], false)

In [8]:
function loss(data_loader, model)
    total_loss = 0.0f0
    #change num to something more meaningful
    num_elements = 0
    for (x, y) in data_loader
        ŷ = model(x)
        total_loss += logitcrossentropy(ŷ, y, agg=sum)
        num_elements +=  size(x)[end]
    end
    return total_loss / num_elements
end


function accuracy(data_loader, model)
    accuracy = 0
    num_elements = 0
    for (x, y) in data_loader
        ŷ = model(x)
        accuracy += sum(onecold(ŷ) .== onecold(y))
        num_elements += size(x)[end]
    end   
    
    return accuracy / num_elements
end

# Comment on why we need both: loss and accuracy

accuracy (generic function with 1 method)

In [9]:
# Construct model
img_size = (28,28,1)
n_classes = 10

# Our model has one input layer, one hidden layer (with 32 neurons) and output layer, 
#each neuron in the hidden layer has inputs 28x28x1 and the output layer has n_classes neurons
model = Chain( Dense(prod(img_size), 32, relu),
                  Dense(32, n_classes))

ps = Flux.params(model) # model's trainable parameters

Params([Float32[-0.004591364 -0.029729255 … 0.07734814 -0.021867776; -0.023276465 -0.042033613 … -0.008883792 -0.011712046; … ; 0.06752823 0.07467249 … 0.06259729 0.010588841; -0.02407101 -0.07415057 … -0.0541573 0.011051903], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], Float32[-0.18165545 -0.03709334 … -0.21325627 0.22060108; -0.35121942 -0.33347034 … -0.082487874 0.28123042; … ; 0.3670341 0.18998665 … -0.20228285 -0.14513235; 0.12589918 0.2982511 … -0.028587235 0.035729744], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]])

In [10]:
 ## Optimizer
η = 3e-4 

opt = ADAM(η)

ADAM(0.0003, (0.9, 0.999), IdDict{Any,Any}())

In [11]:
epochs = 20

for epoch in 1:epochs
    for (x, y) in train_data_loader
        gs = gradient(() -> logitcrossentropy(model(x), y), ps) # compute gradient
        Flux.Optimise.update!(opt, ps, gs) # update parameters
     end
        
    # Compute accuracy and loss for all of the train and test data
    train_loss = loss(train_data_loader, model)
    train_acc = accuracy(train_data_loader, model)
    test_loss = loss(test_data_loader, model)
    test_acc = accuracy(test_data_loader, model)
    println("Epoch=$epoch")
    println("  train_loss = $train_loss, train_accuracy = $train_acc")
    println("  test_loss = $test_loss, test_accuracy = $test_acc")
end

Epoch=1
  train_loss = 0.5584692, train_accuracy = 0.8673666666666666
  test_loss = 0.53656596, test_accuracy = 0.8756
Epoch=2
  train_loss = 0.37753665, train_accuracy = 0.9006833333333333
  test_loss = 0.36366037, test_accuracy = 0.9047
Epoch=3
  train_loss = 0.31881, train_accuracy = 0.9132833333333333
  test_loss = 0.3089058, test_accuracy = 0.9163
Epoch=4
  train_loss = 0.28730088, train_accuracy = 0.9206166666666666
  test_loss = 0.28172365, test_accuracy = 0.922
Epoch=5
  train_loss = 0.26431516, train_accuracy = 0.92645
  test_loss = 0.2608685, test_accuracy = 0.9266
Epoch=6
  train_loss = 0.24699712, train_accuracy = 0.9318833333333333
  test_loss = 0.24530308, test_accuracy = 0.93
Epoch=7
  train_loss = 0.23527534, train_accuracy = 0.9336666666666666
  test_loss = 0.23613752, test_accuracy = 0.9327
Epoch=8
  train_loss = 0.22115913, train_accuracy = 0.9383
  test_loss = 0.22409585, test_accuracy = 0.9349
Epoch=9
  train_loss = 0.21003546, train_accuracy = 0.9408166666666666
 