In [1]:
addprocs(10)
workers()

in readline at /home/ubuntu/.julia/v0.5/IJulia/src/stdio.jl:61


10-element Array{Int64,1}:
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11

In [2]:
using Iterators
using Pipe
using Compat

macro printval(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr," = ", $ee)))
end

macro pz(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr,"\t\t",typeof($ee), "\t", size($ee))))
end

push!(LOAD_PATH, ".")
push!(LOAD_PATH, "../util/")

4-element Array{ByteString,1}:
 "/home/ubuntu/build/julia-master/usr/local/share/julia/site/v0.5"
 "/home/ubuntu/build/julia-master/usr/share/julia/site/v0.5"      
 "."                                                              
 "../util/"                                                       

In [3]:
@everywhere using Distances

, use AbstractFloat instead.
, use AbstractFloat instead.
, use AbstractFloat instead.
, use AbstractFloat instead.
AbstractFloat instead.
AbstractFloat instead.
.


In [4]:
const START_MARKER = "**START**"
const END_MARKER = "**END**"

atis_data = open("atis_data.jsz","r") do fh
    deserialize(fh)    
end

@assert START_MARKER in atis_data["indexed_words"]
@assert START_MARKER in keys(atis_data["word_indexes"])
@assert END_MARKER in atis_data["indexed_words"]
@assert END_MARKER in keys(atis_data["word_indexes"])

In [5]:
# τ = Pheromone
# η = Prior
# α = pheremone weight
# β = prior weight
# ρ = pheremone evaporation
function get_edge_probs(τ::AbstractMatrix,η::AbstractMatrix, α, β)
    edges = τ.^α .* η.^β
    edges./=sum(edges,1) #Normalise to probabilities
    @inbounds edges[isnan(edges)]=0.0 #The terminal node will have a sum of zero
    edges
end
    
function get_edge_probs(τ::AbstractSparseMatrix,η::AbstractSparseMatrix, α, β)
    edges = τ.^α .* η.^β
    sums = sum(edges,1)
    
    I,J,V = findnz(edges)
    for idx in 1:length(V)
        @inbounds V[idx] /= sums[J[idx]]
    end
    sparse(I,J,V)
end



get_edge_probs (generic function with 2 methods)

In [6]:
@everywhere function select_node(x, edge_probs)
    cutoff = rand()
    total = 0.0
    ys, _, probs = findnz(edge_probs[:,x]'')
    @assert(length(ys)>0, "No options from point $x")
    for (y,prob) in zip(ys,probs)
        total+=prob
        if total>=cutoff
            return y
        end
    end
    
    @assert(total>=cutoff, "x=$x, total= $total, cutoff=$cutoff") #Should Never Reach this point
end


@everywhere function run_ant(edge_probs, start_index, end_index)
    path::Vector{Int} = [start_index] #1 is always the start node. Attached to it are unigram probs
    while (path[end]!=end_index)
        push!(path, select_node(path[end], edge_probs))
    end
    path    
end



function optimise(fit_fun, η, α, β,ρ, n_ants, n_gens, start_index, end_index)
    assert(α>=1.0)
    assert(β>=1.0)
    assert(0.0<=ρ<1.0)
    
    max_fit::Float64 = -Inf;
    fittest_path::Vector{Int} = []
    τ = ceil(η) #Since we take the element wise product with η, there is no point setting any elements that are zero in η
    
    for gen_ii in 1:n_gens
        if gen_ii%10==1 || gen_ii<10
            print("Gen: $gen_ii - fit: $max_fit\n")
        end
        
        edge_probs = get_edge_probs(τ,η, α, β)
        #edge_probs = SharedArray(Float64, size(τ))
        #edge_probs[:]=get_edge_probs(τ,η, α, β)
        
        
        path_fits::Vector{Tuple{Vector{Int},Float64}} = map(1:n_ants) do ant_id
            path::Vector{Int} = run_ant(edge_probs, start_index, end_index)
            fit::Float64 = fit_fun(path)
            (path, fit)
        end
        
    
        τ.*=(1.0-ρ) #Evaporation
        for (path, fit) in path_fits
            if fit>max_fit
                max_fit = fit
                fittest_path = path
                
                if fit==Inf #Perfect Fit
                    return fittest_path, max_fit
                end
            end

            
            @simd for node_ii in 1:length(path)-1
                @inbounds node = path[node_ii]
                @inbounds next_node = path[node_ii+1]
                @inbounds τ[next_node, node] += fit
            end
        end
    end #Next Generation
    
    fittest_path, max_fit
end

optimise (generic function with 1 method)

In [7]:
function lookup_sowe(data, sent)
    sum([data["LL"][:,data["word_indexes"][word]] for word in split(sent)]) 
end

lookup_sowe (generic function with 1 method)

In [8]:
function optimise(data::Dict, target_sent::AbstractString, α, β, ρ, n_ants, n_gens)
    target::Vector{Float64} = lookup_sowe(data,target_sent)
    optimise(data, target, α, β,ρ, n_ants, n_gens)
end

@inline function optimise(data::Dict, target::AbstractVector, α, β,ρ, n_ants, n_gens)
    #rLLs = Dict([id=>put!(RemoteRef(), data["LL"]) for id in workers()])
    LL = SharedArray(Float64, size(atis_data["LL"]))
    LL[:]=atis_data["LL"][:]
    @inline function fitness(path)
        #Fitter is larger
        @inbounds path_end = sum([LL[:,ii] for ii in path]) 
        (1.0/cosine_dist(path_end, target))/length(path)
    end
    
    start_index = data["word_indexes"][START_MARKER]
    end_index = data["word_indexes"][END_MARKER]
    
    
    #return fitness, data["bigrams"], α, β,ρ, n_ants, n_gens, start_index, end_index
    path, score = optimise(fitness, data["bigrams"], α, β,ρ, n_ants, n_gens, start_index, end_index)
    score,[data["indexed_words"][ii] for ii in path]
end

optimise (generic function with 3 methods)

In [9]:
@time optimise(atis_data,"what is the longest flight", 1, 1, 0.3, 10000,20)
#code_warntype(optimise, map(typeof,params))

Gen: 1 - fit: -Inf
Gen: 2 - fit: 2.841247204959871
Gen: 3 - fit: 2.841247204959871
Gen: 4 - fit: 2.841247204959871
Gen: 5 - fit: 2.841247204959871
Gen: 6 - fit: 2.841247204959871
Gen: 7 - fit: 2.841247204959871
Gen: 8 - fit: 2.841247204959871
Gen: 9 - fit: 2.841247204959871
Gen: 11 - fit: 2.841247204959871
 69

(2.841247204959871,Any["**START**","what","is","the","flight","**END**"])

In [15]:
optimise(atis_data,"what is the shortest flight", 1, 1, 0.1, 10000,20)

Gen: 1 - fit: -Inf
Gen: 2 - fit: 2.9412664090410554
Gen: 3 - fit: 2.9412664090410554
Gen: 4 - fit: 2.9412664090410554
Gen: 5 - fit: 2.9412664090410554
Gen: 6 - fit: 2.9412664090410554
Gen: 7 - fit: 2.9412664090410554
Gen: 8 - fit: 2.9412664090410554
Gen: 9 - fit: 2.9412664090410554
Gen: 11 - fit: 2.9412664090410554


LoadError: LoadError: InterruptException:
while loading In[15], in expression starting on line 1

In [None]:
optimise(atis_data,"what is the first flight from washington to washington", 1, 1, 0.1, 10000,20)

Gen: 1 - fit: -Inf
Gen: 2 - fit: 1.1713349727898572
Gen: 3 - fit: 1.9561430275898823
Gen: 4 - fit: 1.9561430275898823
Gen: 5 - fit: 1.9561430275898823
Gen: 6 - fit: 1.9561430275898823
Gen: 7 - fit: 1.9561430275898823
Gen: 8 - fit: 1.9561430275898823
Gen: 9 - fit: 1.9561430275898823
Gen: 11 - fit: 1.9561430275898823


In [11]:
optimise(atis_data,"what are the ground transport options", 1, 1, 0.1, 1000,20)

Gen: 1 - fit: -Inf
Gen: 2 - fit: 0.9565541331530678
Gen: 3 - fit: 1.183091133033715
Gen: 4 - fit: 1.183091133033715
Gen: 5 - fit: 1.183091133033715
Gen: 6 - fit: 1.183091133033715
Gen: 7 - fit: 1.183091133033715
Gen: 8 - fit: 1.183091133033715
Gen: 9 - fit: 1.183091133033715
Gen: 11 - fit: 1.183091133033715


(1.183091133033715,Any["**START**","what","is","the","ground","transportation","**END**"])

In [12]:
optimise(atis_data,"where can i go", 1, 1, 0.1, 1000,20)

Gen: 1 - fit: -Inf
Gen: 2 - fit: 0.9867084973755195
Gen: 3 - fit: 0.9867084973755195
Gen: 4 - fit: 0.9867084973755195
Gen: 5 - fit: 0.9867084973755195
Gen: 6 - fit: 0.9867084973755195
Gen: 7 - fit: 0.9867084973755195
Gen: 8 - fit: 0.9867084973755195
Gen: 9 - fit: 0.9867084973755195
Gen: 11 - fit: 0.9867084973755195


(0.9867084973755195,Any["**START**","can","you","**END**"])

In [13]:
atis_data["word_indexes"][START_MARKER]
    

1120