In [2]:
using Flux, Statistics, MLDatasets, DataFrames, OneHotArrays
Iris()

dataset Iris:
  metadata   =>    Dict{String, Any} with 4 entries
  features   =>    150×4 DataFrame
  targets    =>    150×1 DataFrame
  dataframe  =>    150×5 DataFrame

In [3]:
x, y = Iris(as_df=false)[:]

(features = [5.1 4.9 … 6.2 5.9; 3.5 3.0 … 3.4 3.0; 1.4 1.4 … 5.4 5.1; 0.2 0.2 … 2.3 1.8], targets = InlineStrings.String15["Iris-setosa" "Iris-setosa" … "Iris-virginica" "Iris-virginica"])

In [4]:
y, summary(x)

(InlineStrings.String15["Iris-setosa" "Iris-setosa" … "Iris-virginica" "Iris-virginica"], "4×150 Matrix{Float64}")

In [5]:
x = Float32.(x)
y = vec(y)
onehoty = unique(y) .== permutedims(y)

3×150 BitMatrix:
 1  1  1  1  1  1  1  1  1  1  1  1  1  …  0  0  0  0  0  0  0  0  0  0  0  0
 0  0  0  0  0  0  0  0  0  0  0  0  0     0  0  0  0  0  0  0  0  0  0  0  0
 0  0  0  0  0  0  0  0  0  0  0  0  0     1  1  1  1  1  1  1  1  1  1  1  1

In [6]:
const classes =  ["Iris-setosa", "Iris-versicolor", "Iris-virginica"];
fluxonehoty = onehotbatch(y, classes)

3×150 OneHotMatrix(::Vector{UInt32}) with eltype Bool:
 1  1  1  1  1  1  1  1  1  1  1  1  1  …  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     1  1  1  1  1  1  1  1  1  1  1  1

In [7]:
W = rand(Float32, 3, 4)
b = [0.0f0, 0.0f0, 0.0f0]
m(W, b, x) = W * x .+ b

softmax(x) = exp.(x) ./ sum(exp.(x), dims=1)
model(W, b, x) = softmax(m(W, b, x))

size(model(W,b,x))

(3, 150)

In [8]:
all(0 .<= model(W, b, x) .<= 1), sum(model(W, b, x), dims=1)

(true, Float32[1.0 1.0 … 1.0 1.0])

In [9]:
fluxmodel = Chain(Dense(4 => 3), softmax)

Chain(
  Dense(4 => 3),                        [90m# 15 parameters[39m
  Main.softmax,
) 

In [10]:
_logitcrossentropy(ŷ, y) = mean(.-sum(y .* logsoftmax(ŷ; dims=1); dims=1))

function lossfunc(weights, biases, features, onehotlabels)
    ŷ = model(weights, biases, features)
    _logitcrossentropy(ŷ, onehotlabels)
end

function fluxloss(fluxmodel, features, onehotlabels)
    ŷ = fluxmodel(features)
    Flux.logitcrossentropy(ŷ, onehotlabels)
end
lossfunc(W, b, x, onehoty), fluxloss(fluxmodel, x, fluxonehoty)

(1.142283f0, 1.2121679f0)

In [11]:
argmax(onehoty, dims=1) # calculate the cartesian index of max element column-wise
maxelement_idx = [x[1] for x in argmax(onehoty; dims=1)]

1×150 Matrix{Int64}:
 1  1  1  1  1  1  1  1  1  1  1  1  1  …  3  3  3  3  3  3  3  3  3  3  3  3

In [12]:
function onecold(onehotlabels)
    maxelement_idx = [x[1] for x in argmax(onehotlabels; dims=1)]
    return vec(classes[maxelement_idx])
end

onecold(onehoty)

150-element Vector{String}:
 "Iris-setosa"
 "Iris-setosa"
 "Iris-setosa"
 "Iris-setosa"
 "Iris-setosa"
 "Iris-setosa"
 "Iris-setosa"
 "Iris-setosa"
 "Iris-setosa"
 "Iris-setosa"
 "Iris-setosa"
 "Iris-setosa"
 "Iris-setosa"
 ⋮
 "Iris-virginica"
 "Iris-virginica"
 "Iris-virginica"
 "Iris-virginica"
 "Iris-virginica"
 "Iris-virginica"
 "Iris-virginica"
 "Iris-virginica"
 "Iris-virginica"
 "Iris-virginica"
 "Iris-virginica"
 "Iris-virginica"

In [13]:
all(Flux.onecold(onehoty, classes) .== onecold(onehoty))

true

In [14]:
accuracy(W, b, x, y) = mean(onecold(model(W, b, x)) .== y)
accuracy(W, b, x, y)

0.32

In [15]:
fluxaccuracy(x, y) = mean(Flux.onecold(fluxmodel(x), classes) .== y)
fluxaccuracy(x, y)

0.3333333333333333

In [16]:

dldW, dldb, _, _ = gradient(lossfunc, W, b, x, onehoty)
W .= W .- 0.1 .* dldW
b .= b .- 0.1 .* dldb
lossfunc(W, b, x, onehoty)

1.1311362f0

In [18]:
function trainmodel!(floss, weights, biases, features, onehotlabels)
    dldW, dldb, _, _ = gradient(floss, weights, biases, features, onehotlabels)
    weights .= weights .- 0.1 .* dldW
    biases .= biases .- 0.1 .* dldb
end

for i in 1:500
    trainmodel!(lossfunc, W, b, x, onehoty)
    accuracy(W, b, x, y) >= 0.98 && break
end

@show accuracy(W, b, x, y)

accuracy(W, b, x, y) = 

0.98


0.98

In [19]:
lossfunc(W, b, x, onehoty)

0.6834657f0

In [20]:
fluxloss(fluxmodel, x, fluxonehoty)

1.2121679f0

In [31]:

function trainfluxmodel(floss, model, features, onehotlabels)
    dldm, _, _ = gradient(floss, model, features, onehotlabels)
    @. model[1].weight = model[1].weight - 0.1 * dldm[:layers][1][:weight]
    @. model[1].bias = model[1].bias - 0.1 * dldm[:layers][1][:bias]
end

for i in 1:500
    trainfluxmodel(fluxloss, fluxmodel, x, fluxonehoty)
    fluxaccuracy(x, y) >= 0.98 && break
end

@show fluxaccuracy(x, y)

fluxaccuracy(x, y) = 0.6666666666666666


0.6666666666666666

In [32]:
fluxloss(fluxmodel, x, fluxonehoty)

0.8667208f0