Here we do and exploration of the IRIS data set to see is the bias term is significant...

In [12]:
] activate .

In [13]:
using DelimitedFiles
using Flux,  Statistics
using Flux: onehotbatch, onecold, crossentropy, throttle
using Base.Iterators: repeated

In [14]:
cd(@__DIR__)

isfile("iris.data") ||
  download("https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data","iris.data")

data= readdlm("iris.data",',');

In [9]:
classes= data[:,end];
features= data[:,1:4];

In [11]:
u_classes=unique(classes)
decoded_classes=[findfirst(x->x==c,u_classes) for c in classes];

In [4]:
X = convert(Array{Float64, 2}, features');
Y = onehotbatch(decoded_classes, 1:3);

In [5]:
for repetitions in 500 : 500 : 3500 
    println("Repetitions: ", repetitions)
    lossesWithoutBais = []
    for i in 1: 30
        W1= param(rand(5,4))
        W2= param(rand(3,5))
        W = Flux.Params([W1, W2])
        
        function modelWithoutBais(x)
            layer1= W1 * x
            layer2= W2 * layer1
            softmax(layer2)
        end
        currentModel = modelWithoutBais
        loss(x, y) = crossentropy(currentModel(x), y)

        accuracy(x, y) = mean(onecold(currentModel(x)) .== onecold(y))

        dataset = repeated((X, Y), repetitions)
        opt = ADAM(W)

        Flux.train!(loss, dataset, opt)
        push!(lossesWithoutBais, loss(X, Y))
    end
    println("Loss = ", mean(lossesWithoutBais))
end


Repetitions: 500
Loss = 0.6872719703507694 (tracked)
Repetitions: 1000
Loss = 0.422628938690908 (tracked)
Repetitions: 1500
Loss = 0.27181547171747245 (tracked)
Repetitions: 2000
Loss = 0.1898477374686207 (tracked)
Repetitions: 2500
Loss = 0.12760106784943112 (tracked)
Repetitions: 3000
Loss = 0.09598870641249985 (tracked)
Repetitions: 3500
Loss = 0.09018862994700202 (tracked)


In [6]:
for repetitions in 500 : 500 : 3500 
    println("Repetitions: ", repetitions)
    lossesWithBais = []
    for i in 1: 30
        W1= param(rand(5,4))
        W2= param(rand(3,5))

        B1= param(rand(5))
        B2= param(rand(3))

        W = Flux.Params([W1, W2, B1, B2])
        function modelWithBais(x)
            layer1= W1 * x .+ B1
            layer2= W2 * layer1 .+ B2
            softmax(layer2)
        end

        currentModel = modelWithBais
        loss(x, y) = crossentropy(currentModel(x), y)

        accuracy(x, y) = mean(onecold(currentModel(x)) .== onecold(y))

        dataset = repeated((X, Y), repetitions)
        opt = ADAM(W)

        Flux.train!(loss, dataset, opt)

        push!(lossesWithBais, loss(X, Y))
    end
    println("Loss = ", mean(lossesWithBais))
end

Repetitions: 500
Loss = 0.6786520849579296 (tracked)
Repetitions: 1000
Loss = 0.41155734902170066 (tracked)
Repetitions: 1500
Loss = 0.2543700982866663 (tracked)
Repetitions: 2000
Loss = 0.17009766037887833 (tracked)
Repetitions: 2500
Loss = 0.12448394492268515 (tracked)
Repetitions: 3000
Loss = 0.08256327975300788 (tracked)
Repetitions: 3500
Loss = 0.07442579291225397 (tracked)
