# Hyperparameter Optimization with PSO

In [1]:
using Flux
using MLDatasets
using Random

In [2]:
train_x, train_y = CIFAR10.traindata(Float32, 1:5000)
test_x, test_y = CIFAR10.testdata(Float32, 1:5000);

In [3]:
train_y, test_y = Flux.onehotbatch(train_y, 0:9), Flux.onehotbatch(test_y, 0:9);

In [4]:
function loss_and_accuracy(udata, wdata, model)
    ndata = size(udata,4)
    ŵ = model(udata)
    loss = Flux.crossentropy(ŵ, wdata; agg=sum)
    accuracy = sum(Flux.onecold(ŵ) .== Flux.onecold(wdata)) / ndata
    return loss, accuracy
end

loss_and_accuracy (generic function with 1 method)

In [5]:
batch_size = 8
train_loader = Flux.Data.DataLoader((train_x, train_y), batchsize=batch_size, shuffle=true);

In [6]:
mutable struct Particle 
    position::Vector{Int}
    best_position::Vector{Int}
    best_accuracy::Float32
    velocity::Vector{Float32}
end

In [10]:
function hyper_parameterized(particle)
    return Chain(
              Conv((particle.position[1],particle.position[1]), 3=>particle.position[2], relu, pad=SamePad()),
              Conv((particle.position[3],particle.position[3]), particle.position[2]=>particle.position[4], relu, pad=SamePad()),
              MaxPool((particle.position[5],particle.position[5])),
              Dropout(0.2),
              Conv((particle.position[6],particle.position[6]), particle.position[4]=>particle.position[7], relu, pad=SamePad()),
              Conv((particle.position[8],particle.position[8]), particle.position[7]=>particle.position[9], relu, pad=SamePad()),
              MaxPool((particle.position[10],particle.position[10])),
              Dropout(0.2),
              Conv((particle.position[11],particle.position[11]), particle.position[9]=>particle.position[12], relu, pad=SamePad()),
              Conv((particle.position[13],particle.position[13]), particle.position[12]=>particle.position[14], relu, pad=SamePad()),
              MaxPool((2,2)),
              Dropout(0.2),
              Flux.flatten,
              Dense(particle.position[15]^4 * particle.position[14],particle.position[16],relu),
              Dropout(0.2),
              Dense(particle.position[16],10),
              softmax)
end

hyper_parameterized (generic function with 1 method)

In [11]:
function hyper_parameter_optimization(train_x, train_y, test_x, test_y, numparticles, ω, c1,c2)
    lo = [2,1,2,1,2,2,1,2,1,2,2,2,2,2,2,10]
    hi = [10,48,10,100,4,10,100,10,100,4,10,256,10,256,4,1000]
    particles = []
    best_accuracy = 0
    # init particles - search space is 16 variables
        #1   #2  #3   #4  #5   #6   #7  #8   #9  #10  #11  #12 #13 #14 #15  #16
    # [SF1, n1, SF2, n2, SP1, SF3, n3, SF4, n4, SP2, SF5, n5, SF6, n6, SP3, SD1]
    for i in 1:numparticles
        SF1 = rand(2:10)
        n1 = rand(3:48)
        SF2 = rand(2:10)
        n2 = rand(n1:100)
        SP1 = rand(2:4)
        SF3 = rand(2:10)
        n3 = rand(n2:100)
        SF4 = rand(2:10)
        n4 = rand(n3:100)
        SP2 = rand(2:4)
        SF5 = rand(2:10)
        n5 = rand(n4:256)
        SF6 = rand(2:10)
        n6 = rand(n5:256)
        SP3 = rand(2:4)
        SD1 = rand(10:1000)
        push!(particles, Particle([SF1,n1,SF2,n2,SP1,SF3,n3,SF4,n4,SP2,SF5,n5,SF6,n6,SP3,SD1],
                [SF1,n1,SF2,n2,SP1,SF3,n3,SF4,n4,SP2,SF5,n5,SF6,n6,SP3,SD1], 0, rand(Float32,16)))
    end
    # find best model to work towards
    for p in particles
        m = hyper_parameterized(p)
        for (u,w) in train_loader
            loss = Flux.Losses.crossentropy(m(u), w)
            println(loss)
        end
        p.best_accuracy = this_acc
        if p.best_accuracy > best_accuracy
            best_accuracy = p.best_accuracy
            swarm_best_position = p.position
            println(swarm_best_position)
        end
    end
    
    println("Particles initalized")
    
    for p in particles
        # vector math for additional speed boost
        # ω - inertia weight, how much the previous velocity impacts the current position
        # c1 - how much the particle pays attention to its own best position
        # c2 - how much the particle pays attention to the swarm's best position
        p.velocity = (ω .* p.velocity) .+ (rand((-1.0:1.0)) * c1 .* (p.best_position .- p.position)) .+
                (rand((-1.0:1.0)) * c2 .* (swarm_best_position .- p.position))
        p.position = clamp.(trunc.(Int, p.position + p.velocity),1,1000)
        model = hyper_parameterized(p)
        this_loss, this_acc = loss_and_accuracy(train_x,train_y, model)
                
        if p.best_accuracy < this_acc
            p.best_accuracy = this_acc
            p.best_position = p.position
        end
                
        if best_accuracy < this_acc
            best_accuracy = this_acc
            swarm_best_position = p.position
            println(best_accuracy)
        end
        
        println(swarm_best_position)
    end
    return swarm_best_position
end

hyper_parameter_optimization (generic function with 1 method)

In [None]:
b_p = hyper_parameter_optimization(train_x, train_y,test_x, test_y, 4, 0.5, 0.3, 0.4)