### Preliminaries

In [2]:
using Flux, Statistics
using Flux: crossentropy, normalise, onecold, onehotbatch
using Statistics: mean

In [5]:
# Read Iris data using Flux's inbuilt function
features = Flux.Data.Iris.features()
labels = Flux.Data.Iris.labels();

size(features), size(labels)

((4, 150), (150,))

In [60]:
# Normalize data
# Subract mean, divide by std dev for normed mean of 0 and std dev of 1.
normed_features = normalise(features, dims=2)

4×150 Array{Float64,2}:
 -0.900681  -1.14302   -1.38535   …   0.795669  0.432165   0.0686618
  1.03206   -0.124958   0.337848     -0.124958  0.800654  -0.124958 
 -1.34127   -1.34127   -1.39814       0.819624  0.933356   0.762759 
 -1.31298   -1.31298   -1.31298       1.05354   1.44796    0.790591 

In [61]:
# One-hot encode the outcome labels
klasses = sort(unique(labels))
onehot_labels = onehotbatch(labels, klasses)

3×150 Flux.OneHotMatrix{Array{Flux.OneHotVector,1}}:
  true   true   true   true   true  …  false  false  false  false  false
 false  false  false  false  false     false  false  false  false  false
 false  false  false  false  false      true   true   true   true   true

In [62]:
# Split into training and test sets, 2/3 for training, 1/3 for test.
train_indices = [1:3:150 ; 2:3:150]

X_train = normed_features[:, train_indices]
y_train = onehot_labels[:, train_indices]

X_test = normed_features[:, 3:3:150]
y_test = onehot_labels[:, 3:3:150]

3×50 Flux.OneHotMatrix{Array{Flux.OneHotVector,1}}:
  true   true   true   true   true  …  false  false  false  false  false
 false  false  false  false  false     false  false  false  false  false
 false  false  false  false  false      true   true   true   true   true

In [63]:
# Declare model taking 4 features as inputs and outputting 3 probabiltiies, 
# one for each species of iris.
model = Chain(
    Dense(4, 3),
    softmax
)

Chain(Dense(4, 3), NNlib.softmax)

In [64]:
# Define the objective function
loss(x, y) = crossentropy(model(x), y)

loss (generic function with 1 method)

In [65]:
# Gradient descent optimiser with learning rate 0.5.
optimiser = Descent(0.5)


Descent(0.5)

In [66]:
# Create iterator to train model over 110 epochs.
data_iterator = Iterators.repeated((X_train, y_train), 110)

# Call back
accuracy(x, y) = mean(onecold(model(x)) .== onecold(y))
evalcb() = @show(accuracy(X_test, y_test))

evalcb (generic function with 1 method)

In [67]:
println("Starting training.")
# Flux.train!(loss, params(model), data_iterator, optimiser, cb = Flux.throttle(evalcb, 10))
Flux.train!(loss, params(model), data_iterator, optimiser, cb = evalcb)
# Flux.train!(loss, params(model), data_iterator, optimiser, cb = () -> println("training"))

println("\nTraining ccuracy: $(accuracy(X_train, y_train))")

Starting training.
accuracy(X_test, y_test) = 0.48
accuracy(X_test, y_test) = 0.44
accuracy(X_test, y_test) = 0.62
accuracy(X_test, y_test) = 0.66
accuracy(X_test, y_test) = 0.68
accuracy(X_test, y_test) = 0.76
accuracy(X_test, y_test) = 0.78
accuracy(X_test, y_test) = 0.82
accuracy(X_test, y_test) = 0.8
accuracy(X_test, y_test) = 0.8
accuracy(X_test, y_test) = 0.76
accuracy(X_test, y_test) = 0.8
accuracy(X_test, y_test) = 0.8
accuracy(X_test, y_test) = 0.8
accuracy(X_test, y_test) = 0.8
accuracy(X_test, y_test) = 0.8
accuracy(X_test, y_test) = 0.8
accuracy(X_test, y_test) = 0.8
accuracy(X_test, y_test) = 0.82
accuracy(X_test, y_test) = 0.82
accuracy(X_test, y_test) = 0.82
accuracy(X_test, y_test) = 0.82
accuracy(X_test, y_test) = 0.82
accuracy(X_test, y_test) = 0.82
accuracy(X_test, y_test) = 0.82
accuracy(X_test, y_test) = 0.82
accuracy(X_test, y_test) = 0.82
accuracy(X_test, y_test) = 0.82
accuracy(X_test, y_test) = 0.82
accuracy(X_test, y_test) = 0.84
accuracy(X_test, y_test) = 0.8

In [68]:
function confusion_matrix(X, y)
    ŷ = onehotbatch(onecold(model(X)), 1:3)
    y * ŷ'
end
display(confusion_matrix(X_test, y_test))

3×3 Array{Int64,2}:
 16   0   0
  0  16   1
  0   2  15