In [1]:
module MNIST
using Knet,Compat,GZip

function predict(w,x)
    for i=1:2:length(w)
        x = w[i]*x .+ w[i+1]
        if i<length(w)-1
            x = relu(x) # max(0,x)
        end
    end
    return x
end

function loss(w,x,ygold)
    ypred = predict(w,x)
    ynorm = logp(ypred,1) # ypred .- log(sum(exp(ypred),1))
    -sum(ygold .* ynorm) / size(ygold,2)
end

lossgradient = grad(loss)

function train(w, dtrn; lr=.5, epochs=10)
    for epoch=1:epochs
        for (x,y) in dtrn
            g = lossgradient(w, x, y)
            for i in 1:length(w)
                w[i] -= lr * g[i]
            end
        end
    end
    return w
end

function err(w,dtrn)
    cost = 0.0
    for (x, ygold) in dtrn
        cost += loss(w,x,ygold)
    end
    cost
end

function accuracy(w, dtst)
    ncorrect = ninstance = 0
    for (x, ygold) in dtst
        ypred = predict(w, x)
        ncorrect += sum(ygold .* (ypred .== maximum(ypred,1)))
        ninstance += size(ygold,2)
    end
    return ncorrect/ninstance
end

function weights(h...; atype=Array{Float32}, winit=0.1)
    w = Any[]
    x = 28*28
    for y in [h..., 10]
        push!(w, convert(atype, winit*randn(y,x)))
        push!(w, convert(atype, zeros(y, 1)))
        x = y
    end
    return w
end

function minibatch(x, y, batchsize; atype=Array{Float32}, xrows=784, yrows=10, xscale=255)
    xbatch(a)=convert(atype, reshape(a./xscale, xrows, div(length(a),xrows)))
    ybatch(a)=(a[a.==0]=10; convert(atype, sparse(convert(Vector{Int},a),1:length(a),one(eltype(a)),yrows,length(a))))
    xcols = div(length(x),xrows)
    xcols == length(y) || throw(DimensionMismatch())
    data = Any[]
    for i=1:batchsize:xcols-batchsize+1
        j=i+batchsize-1
        push!(data, (xbatch(x[1+(i-1)*xrows:j*xrows]), ybatch(y[i:j])))
    end
    return data
end

function loaddata()
    info("Loading MNIST...")
    gzload("train-images-idx3-ubyte.gz")[17:end],
    gzload("t10k-images-idx3-ubyte.gz")[17:end],
    gzload("train-labels-idx1-ubyte.gz")[9:end],
    gzload("t10k-labels-idx1-ubyte.gz")[9:end]
end

function gzload(file; path=Pkg.dir("Knet","data",file), url="http://yann.lecun.com/exdb/mnist/$file")
    isfile(path) || download(url, path)
    f = gzopen(path)
    a = @compat read(f)
    close(f)
    return(a)
end

if !isdefined(:xtrn)
    (xtrn,xtst,ytrn,ytst)=loaddata()
end

function vector2weight(x)
    pos = 1
    for i in eachindex(w)
        n = length(w[i])
        w[i] = reshape(view(x,pos:pos+n-1),size(w[i]))
        pos += n
    end
end
    
function weight2vector(x)
    x = randn(sum(length.(w)),1)
end
    
function objective(x)
    vector2weight(x)
    # err(w,dtrn)
    Float64(1-accuracy(w,dtrn))
end
export objective

batchsize = 64
dtrn = minibatch(MNIST.xtrn, MNIST.ytrn, batchsize)
dtst = minibatch(MNIST.xtst, MNIST.ytst, batchsize)

w = weights(200)
x0 = weight2vector(w)
export x0
end # module

[1m[34mINFO: Knet using GPU 1
[0m[1m[34mINFO: Loading MNIST...
[0m

MNIST

In [2]:
module NCS
export Particle,init_pop,optimize
const r = 0.99
const λrange= 0.1
const epoch = 100
type Particle{T,N}
    x::Array{T,N} # position
    v::T # value
    vn::T # normalized value
    x′::Array{T,N} # trial position
    v′::T  # trial value
    vn′::T # normalized tial value
    σ::Array{T,N} # diagnol elements of Σ (sampling covariance matrix)
    Corr::T # correlation value
    Corr′::T # tiral correlation value
    flag::T # successful trial in an epoch
    λ::T # λ>0 is a parameter to balance exploration and exploitation.
end
Base.show(p::Particle) = println("x=",p.x," f(x)=",p.v)
function init_pop(f,l,u; N=10)
    g = [Particle(l+(u-l).*rand(size(l)),Inf,Inf,
        l+(u-l).*rand(size(l)),Inf,Inf,
        (u-l)/N,Inf,Inf,0.0,1.0) for i=1:N]
    for p in g
        p.v = f(p.x)
    end
    g
end
function BD(p,q)
    Corr = Corr′ = 0  
    @inbounds for d in eachindex(p.x)
        Δx = p.x[d] - q.x[d]
        Δx′ = p.x′[d] - q.x[d]
        σ2 = p.σ[d]^2; σ2′ = q.σ[d]^2
        c = (σ2+σ2′)/2
        Corr += 1/8*Δx^2/c + 1/2*(log(c) - 0.5*(log(σ2) + log(σ2′)))
        Corr′+= 1/8*Δx′^2/c + 1/2*(log(c) - 0.5*(log(σ2) + log(σ2′)))
    end
    Corr,Corr′
end
function fitness_values(f,g)
    # Threads.@threads
    for p in g
        p.x′ = p.x + p.σ.*randn(size(p.x))
        p.v′ = f(p.x′)
    end
end

function normalize_fitness_values(g)
    v_min = minimum(min(p.v,p.v′) for p in g)
    for p in g
        p.vn = p.v - v_min;
        p.vn′ = p.v′ - v_min;
        p.vn,p.vn′ = p.vn/(p.vn+p.vn′),p.vn′/(p.vn+p.vn′)
    end
end
function correlation_values(g)
    for p in g
        p.Corr = p.Corr′ = Inf
        for q in g
            c,c′ = BD(p,q)
            if c < p.Corr
                p.Corr = c
            end
            if c′ < p.Corr′
                p.Corr′ = c′
            end
        end
    end
end
function normalize_correlation_values(g)    
    for p in g
        p.Corr,p.Corr′ = p.Corr/(p.Corr+p.Corr′+1e-20),
        p.Corr′/(p.Corr+p.Corr′+1e-20)
    end
end
function selection(g)
    for p in g
            if p.λ*p.Corr′ > p.vn′
            p.x = copy(p.x′)
            p.v = p.v′
            p.flag += 1.0
        end
    end
end         

function update_parameters(g,t,T)
    for p in g
        p.λ = 1 + λrange*(1-t/T)*randn()
    end
    if mod(t, epoch) == 0
        for p in g         
            if p.flag/epoch > 0.2
                p.σ /= r;
            elseif p.flag/epoch < 0.2
                p.σ *= r;
            end
            p.flag = 0.0
        end
    end
end
function best(p0,g)
    for p in g
            if p.v < p0.v
            p0.x = copy(p.x)
            p0.v = p.v
        end
    end
end
function optimize(f,g; T=10,disp=false)
    p0 = deepcopy(g[1])
    for t = 0:T
        fitness_values(f,g)
        normalize_fitness_values(g)
        correlation_values(g)
        normalize_correlation_values(g)
        selection(g)
        best(p0,g)
        update_parameters(g,t,T)
        if disp == true
           println("loss=",p0.v)
        end
    end
    p0.x,p0.v
end
end

NCS

In [3]:
@everywhere using MNIST,NCS
# @everywhere include("mnist.jl");include("NCS.jl")
# using NCS,MNIST

In [None]:
g = init_pop(objective,-2ones(x0),2ones(x0); N=10)
best_x,best_loss  = optimize(objective,g; T=100,disp=true)