In [1]:
using Iterators
using Pipe
using Compat

macro printval(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr," = ", $ee)))
end

macro pz(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr,"\t\t",typeof($ee), "\t", size($ee))))
end

push!(LOAD_PATH, ".")
push!(LOAD_PATH, "../util/")

4-element Array{ByteString,1}:
 "/home/ubuntu/build/julia-master/usr/local/share/julia/site/v0.5"
 "/home/ubuntu/build/julia-master/usr/share/julia/site/v0.5"      
 "."                                                              
 "../util/"                                                       

In [2]:
#addprocs(11)

In [3]:
@everywhere using Distances

In [4]:
const START_MARKER = "**START**"
const END_MARKER = "**END**"

atis_data = open("atis_data.jsz","r") do fh
    deserialize(fh)    
end

@assert START_MARKER in atis_data["indexed_words"]
@assert START_MARKER in keys(atis_data["word_indexes"])
@assert END_MARKER in atis_data["indexed_words"]
@assert END_MARKER in keys(atis_data["word_indexes"])

In [5]:
# τ = Pheromone
# η = Prior
# α = pheremone weight
# β = prior weight
# ρ = pheremone evaporation

@everywhere @inline function select_index{V<:AbstractVector}(edge_probs::V)
    cutoff = rand()
    total = 0.0
    @assert(length(edge_probs)>0, "No probs given")
    for (ii,prob) in enumerate(edge_probs)
        total+=prob
        if total>=cutoff
            return ii
        end
    end
    
    @assert(total>=cutoff, "total= $total, cutoff=$cutoff, $edge_probs") #Should Never Reach this point
end


@everywhere function evaporate!{M<:AbstractArray}(τ::M, ρ)
    τ.*=(1.0-ρ) #Evaporation
end

    

@everywhere function deposit!{M<:AbstractVector}(τ::M, path, fit)
    #Are You In or Out?  Double Crosser or Devout
    #Order actually means nothing in this problem
    #Cooccurance doesn't really many anything either
    #Cooccurance is a problem for the prior that need 
    #to be conerned with all answers not just this one
    for node in path
        @inbounds τ[node] += fit./length(path)
    end
    τ
end

@everywhere function get_edge_probs(τ,η::Function, α, β)
    τ_scaled = (τ).^α
    function get_prob(state)
        η_scaled = (η(state)).^β
        tot = τ_scaled .* τ_scaled
        tot./sum(tot)
    end
end

@everywhere @inline function select_node(edge_probs::Function, path=[])
    select_index(edge_probs(path))
end


In [21]:
#If you pass in a start index of greater than zero, then atre using a starting index, otherswise not
@everywhere function run_ant(edge_probs, end_prob_dist)
    path = Int[]
    len = select_index(end_prob_dist)
    for _ in 1:len
        push!(path, select_node(edge_probs, path))
    end
    path    
end


function optimise(fit_fun, τ, η, end_prob_dist;  α=1, β=1,ρ=0, n_ants=100, n_gens=10, callback=Union{})
    assert(α>=1.0)
    assert(β>=1.0)
    assert(0.0<=ρ<1.0)
    
    max_fit::Float64 = -Inf;
    fittest_path::Vector{Int} = []
    
    for gen_ii in 1:n_gens

        edge_probs = get_edge_probs(τ,η, α, β)

        path_fits::Vector{Tuple{Vector{Int},Float64}} = map(1:n_ants) do ant_id
            path::Vector{Int} = run_ant(edge_probs, end_prob_dist)
            fit::Float64 = fit_fun(path)
            (path, fit)
        end
        
        ranked_path_fits = select!(path_fits,1:10, by=pf->-pf[2])
        scales = [10,5,5,5,5,2,2,2,1,1]
        
        evaporate!(τ,ρ)
        for ((path, fit),scale) in zip(ranked_path_fits,scales)
        #for (path, fit) in path_fits
            if fit>max_fit
                max_fit = fit
                fittest_path = path
                
                if fit==Inf #Perfect Fit
                    return fittest_path, max_fit
                end
            end
            deposit!(τ,path, scale)
        end
        
        if callback |> typeof <:Function
            callback(gen_ii, τ,fittest_path,max_fit,path_fits)
        end
        
    end #Next Generation
    
    fittest_path, max_fit, τ
end


function optimise(fit_fun, n_dims::Int64, η::Function, end_prob_dist; kwargs...)
    τ = [rand() for nn in 1:n_dims]
    
    optimise(fit_fun, τ, η, end_prob_dist;kwargs...)
end


optimise (generic function with 2 methods)

In [22]:
@fastmath function get_prior_fun(uni_occur,bi_occur)
    n_edges=length(uni_occur)
    function prior(ws)
        if length(ws)==0
            uni_occur
        elseif length(ws)==1
            bi_occur[:,ws[1]]
        else
            (uni_occur + sum([bi_occur[:,w] for w in ws]))/(length(ws)+1) #Just average them cos its cheap
        end
    end    
end


get_prior_fun (generic function with 1 method)

In [33]:
[10*(2^(1/x)-1) for x in 1:100]

100-element Array{Float64,1}:
 10.0      
  4.14214  
  2.59921  
  1.89207  
  1.48698  
  1.22462  
  1.0409   
  0.905077 
  0.800597 
  0.717735 
  0.650411 
  0.594631 
  0.547661 
  ⋮        
  0.0781858
  0.0773137
  0.0764609
  0.0756266
  0.0748104
  0.0740116
  0.0732297
  0.0724641
  0.0717144
  0.07098  
  0.0702605
  0.0695555

In [24]:
function lookup_sowe(data, sent)
    sum([data["LL"][:,data["word_indexes"][word]] for word in split(sent)]) 
end

function lookup_words(data, path)
    [data["indexed_words"][ii] for ii in path]
end

lookup_words (generic function with 1 method)

In [25]:
function built_optimise(data::Dict, target_sent::AbstractString, log=False; kwargs...)
    target::Vector{Float64} = lookup_sowe(data,target_sent)
    built_optimise(data, target, log; kwargs...)
end

function built_optimise(data::Dict, target::AbstractVector, log=False; kwargs...)
    LL =data["LL"]
    indexed_words=data["indexed_words"]
    @inline function fitness(path)
        #Fitter is larger
        @inbounds path_end = sum([LL[:,ii] for ii in path]) 
        (1.0/euclidean(path_end, target))/length(path)
    end
    
    τlog=[]
    if log
        log = (gen_ii, τ,fittest_path,max_fit, path_fits) -> begin
            print("oGen: $gen_ii - fit: $max_fit ")
            #print(lookup_words(data, fittest_path))
            path_lens = [length(pf[1]) for pf in path_fits]
            print("mean_len = $(mean(path_lens))")
            print(" max_len = $(maximum(path_lens))")
            println()
            push!(τlog,copy(τ))
            
        end
    end
    
    length_dist = data["length_prob"]
    #length_dist = ws->0.5
    prior = get_prior_fun(data["unioccur"], data["bioccur"])
    
    n_dims = size(LL,2)
    path, score, τ = optimise(fitness,n_dims, prior, length_dist; callback=log, kwargs...)
    push!(τlog,τ)
    score,lookup_words(data, path), τlog
end

built_optimise (generic function with 4 methods)

In [26]:
@time score, words, τlog = built_optimise(atis_data,"what is the shortest flight", true;
α=3, β=1, ρ=0.3, n_ants=100, n_gens=10);
words

oGen: 1 - fit: 0.18625147694876484 mean_len = 10.23 max_len = 24
oGen: 2 - fit: 0.37058687583307776 mean_len = 9.42 max_len = 22
oGen: 3 - fit: 0.37058687583307776 mean_len = 10.16 max_len = 26
oGen: 4 - fit: 0.37058687583307776 mean_len = 9.37 max_len = 22
oGen: 5 - fit: 0.37058687583307776 mean_len = 9.76 max_len = 27
oGen: 6 - fit: 0.37058687583307776 mean_len = 9.23 max_len = 29
oGen: 7 - fit: 0.37058687583307776 mean_len = 10.19 max_len = 48
oGen: 8 - fit: 0.37058687583307776 mean_len = 10.08 max_len = 29
oGen: 9 - fit: 0.37058687583307776 mean_len = 10.06 max_len = 28
oGen: 10 - fit: 0.37058687583307776 mean_len = 9.31

1-element Array{Any,1}:
 "but"

 max_len = 32
  1.934872 seconds (911.71 k allocations: 1.301 GB, 14.15% gc time)


In [27]:
using UnicodePlots
using IJulia

for τ in τlog
    nw = zip(τ,atis_data["indexed_words"]) |> collect
    
    freq,name = zip(select!(nw, 1:min(20,length(nw)), by=fw->-fw[1])...) |> collect
    IJulia.display(barplot(name|> collect,freq|> collect))
end


             ┌────────────────────────────────────────┐ 
         but │▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪ 5.859659553518819 │ 
     section │▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪ 5.823603291624304 │ 
      period │▪▪▪▪▪▪▪▪▪▪▪▪ 3.487016453541801          │ 
    computer │▪▪▪▪▪▪▪▪▪▪▪▪ 3.4860504219368402         │ 
        rate │▪▪▪▪▪▪▪▪▪▪▪▪ 3.4814091970935426         │ 
       being │▪▪▪▪▪▪▪▪▪▪▪▪ 3.4424183267444017         │ 
         got │▪▪▪▪▪▪▪▪▪▪▪▪ 3.4391509836051632         │ 
     renting │▪▪▪▪▪▪▪▪▪▪▪▪ 3.2320867462969405         │ 
    stopover │▪▪▪▪▪▪▪▪▪ 2.521801723349646             │ 
    provided │▪▪▪▪▪▪▪▪▪ 2.5198436205674444            │ 
      answer │▪▪▪▪▪▪▪▪▪ 2.4304628565774564            │ 
    breaking │▪▪▪▪▪▪▪ 1.8362689725628474              │ 
     hundred │▪▪▪▪▪▪ 1.611735739055153                │ 
   frankfurt │▪▪▪▪▪▪ 1.5646103300598875               │ 
    tomorrow │▪▪▪▪▪ 1.5260654506508966                │ 
    economic │▪▪▪▪▪ 1.5143283396081455                │ 
         and │▪▪▪▪▪ 1.506848315

             ┌────────────────────────────────────────┐ 
         but │▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪ 28.359659553518817 │ 
     section │▪▪▪▪▪▪▪▪▪▪▪▪▪ 18.323603291624302        │ 
        rate │▪▪▪ 4.731409197093543                   │ 
    computer │▪▪▪ 3.9860504219368402                  │ 
    provided │▪▪▪ 3.7698436205674444                  │ 
      period │▪▪ 3.487016453541801                    │ 
       being │▪▪ 3.4424183267444017                   │ 
         got │▪▪ 3.4391509836051632                   │ 
     renting │▪▪ 3.2320867462969405                   │ 
    stopover │▪▪ 2.521801723349646                    │ 
      answer │▪▪ 2.4304628565774564                   │ 
    breaking │▪ 1.8362689725628474                    │ 
     hundred │▪ 1.611735739055153                     │ 
   frankfurt │▪ 1.5646103300598875                    │ 
    tomorrow │▪ 1.5260654506508966                    │ 
    economic │▪ 1.5143283396081455                    │ 
         and │▪ 1.5068483152237

             ┌────────────────────────────────────────┐ 
         but │▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪ 63.059659553518834 │ 
     section │▪▪▪▪▪▪▪ 21.6236032916243                │ 
        rate │▪▪ 4.731409197093543                    │ 
    computer │▪ 3.9860504219368402                    │ 
    provided │▪ 3.7698436205674444                    │ 
      period │▪ 3.487016453541801                     │ 
       being │▪ 3.4424183267444017                    │ 
         got │▪ 3.4391509836051632                    │ 
     renting │▪ 3.2320867462969405                    │ 
    stopover │▪ 2.521801723349646                     │ 
      answer │▪ 2.4304628565774564                    │ 
    breaking │▪ 1.8362689725628474                    │ 
     hundred │▪ 1.611735739055153                     │ 
   frankfurt │ 1.5646103300598875                     │ 
    tomorrow │ 1.5260654506508966                     │ 
    economic │ 1.5143283396081455                     │ 
         and │ 1.50684831522371

             ┌────────────────────────────────────────┐ 
         but │▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪ 101.0596595535189 │ 
     section │▪▪▪▪ 21.6236032916243                   │ 
        rate │▪ 4.731409197093543                     │ 
    computer │▪ 3.9860504219368402                    │ 
    provided │▪ 3.7698436205674444                    │ 
      period │▪ 3.487016453541801                     │ 
       being │▪ 3.4424183267444017                    │ 
         got │▪ 3.4391509836051632                    │ 
     renting │▪ 3.2320867462969405                    │ 
    stopover │▪ 2.521801723349646                     │ 
      answer │▪ 2.4304628565774564                    │ 
    breaking │ 1.8362689725628474                     │ 
     hundred │ 1.611735739055153                      │ 
   frankfurt │ 1.5646103300598875                     │ 
    tomorrow │ 1.5260654506508966                     │ 
    economic │ 1.5143283396081455                     │ 
         and │ 1.50684831522371

             ┌────────────────────────────────────────┐ 
         but │▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪ 139.0596595535189 │ 
     section │▪▪▪ 21.6236032916243                    │ 
        rate │▪ 4.731409197093543                     │ 
    computer │▪ 3.9860504219368402                    │ 
    provided │▪ 3.7698436205674444                    │ 
      period │▪ 3.487016453541801                     │ 
       being │▪ 3.4424183267444017                    │ 
         got │▪ 3.4391509836051632                    │ 
     renting │ 3.2320867462969405                     │ 
    stopover │ 2.521801723349646                      │ 
      answer │ 2.4304628565774564                     │ 
    breaking │ 1.8362689725628474                     │ 
     hundred │ 1.611735739055153                      │ 
   frankfurt │ 1.5646103300598875                     │ 
    tomorrow │ 1.5260654506508966                     │ 
    economic │ 1.5143283396081455                     │ 
         and │ 1.50684831522371

             ┌────────────────────────────────────────┐ 
         but │▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪ 177.05965955351877 │ 
     section │▪▪ 21.6236032916243                     │ 
        rate │▪ 4.731409197093543                     │ 
    computer │ 3.9860504219368402                     │ 
    provided │ 3.7698436205674444                     │ 
      period │ 3.487016453541801                      │ 
       being │ 3.4424183267444017                     │ 
         got │ 3.4391509836051632                     │ 
     renting │ 3.2320867462969405                     │ 
    stopover │ 2.521801723349646                      │ 
      answer │ 2.4304628565774564                     │ 
    breaking │ 1.8362689725628474                     │ 
     hundred │ 1.611735739055153                      │ 
   frankfurt │ 1.5646103300598875                     │ 
    tomorrow │ 1.5260654506508966                     │ 
    economic │ 1.5143283396081455                     │ 
         and │ 1.50684831522371

             ┌────────────────────────────────────────┐ 
         but │▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪ 215.05965955351866 │ 
     section │▪▪ 21.6236032916243                     │ 
        rate │ 4.731409197093543                      │ 
    computer │ 3.9860504219368402                     │ 
    provided │ 3.7698436205674444                     │ 
      period │ 3.487016453541801                      │ 
       being │ 3.4424183267444017                     │ 
         got │ 3.4391509836051632                     │ 
     renting │ 3.2320867462969405                     │ 
    stopover │ 2.521801723349646                      │ 
      answer │ 2.4304628565774564                     │ 
    breaking │ 1.8362689725628474                     │ 
     hundred │ 1.611735739055153                      │ 
   frankfurt │ 1.5646103300598875                     │ 
    tomorrow │ 1.5260654506508966                     │ 
    economic │ 1.5143283396081455                     │ 
         and │ 1.50684831522371

             ┌────────────────────────────────────────┐ 
         but │▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪ 253.05965955351863 │ 
     section │▪▪ 21.6236032916243                     │ 
        rate │ 4.731409197093543                      │ 
    computer │ 3.9860504219368402                     │ 
    provided │ 3.7698436205674444                     │ 
      period │ 3.487016453541801                      │ 
       being │ 3.4424183267444017                     │ 
         got │ 3.4391509836051632                     │ 
     renting │ 3.2320867462969405                     │ 
    stopover │ 2.521801723349646                      │ 
      answer │ 2.4304628565774564                     │ 
    breaking │ 1.8362689725628474                     │ 
     hundred │ 1.611735739055153                      │ 
   frankfurt │ 1.5646103300598875                     │ 
    tomorrow │ 1.5260654506508966                     │ 
    economic │ 1.5143283396081455                     │ 
         and │ 1.50684831522371

             ┌────────────────────────────────────────┐ 
         but │▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪ 291.0596595535188 │ 
     section │▪▪ 21.6236032916243                     │ 
        rate │ 4.731409197093543                      │ 
    computer │ 3.9860504219368402                     │ 
    provided │ 3.7698436205674444                     │ 
      period │ 3.487016453541801                      │ 
       being │ 3.4424183267444017                     │ 
         got │ 3.4391509836051632                     │ 
     renting │ 3.2320867462969405                     │ 
    stopover │ 2.521801723349646                      │ 
      answer │ 2.4304628565774564                     │ 
    breaking │ 1.8362689725628474                     │ 
     hundred │ 1.611735739055153                      │ 
   frankfurt │ 1.5646103300598875                     │ 
    tomorrow │ 1.5260654506508966                     │ 
    economic │ 1.5143283396081455                     │ 
         and │ 1.50684831522371

             ┌────────────────────────────────────────┐ 
         but │▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪ 329.0596595535191 │ 
     section │▪ 21.6236032916243                      │ 
        rate │ 4.731409197093543                      │ 
    computer │ 3.9860504219368402                     │ 
    provided │ 3.7698436205674444                     │ 
      period │ 3.487016453541801                      │ 
       being │ 3.4424183267444017                     │ 
         got │ 3.4391509836051632                     │ 
     renting │ 3.2320867462969405                     │ 
    stopover │ 2.521801723349646                      │ 
      answer │ 2.4304628565774564                     │ 
    breaking │ 1.8362689725628474                     │ 
     hundred │ 1.611735739055153                      │ 
   frankfurt │ 1.5646103300598875                     │ 
    tomorrow │ 1.5260654506508966                     │ 
    economic │ 1.5143283396081455                     │ 
         and │ 1.50684831522371

             ┌────────────────────────────────────────┐ 
         but │▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪ 329.0596595535191 │ 
     section │▪ 21.6236032916243                      │ 
        rate │ 4.731409197093543                      │ 
    computer │ 3.9860504219368402                     │ 
    provided │ 3.7698436205674444                     │ 
      period │ 3.487016453541801                      │ 
       being │ 3.4424183267444017                     │ 
         got │ 3.4391509836051632                     │ 
     renting │ 3.2320867462969405                     │ 
    stopover │ 2.521801723349646                      │ 
      answer │ 2.4304628565774564                     │ 
    breaking │ 1.8362689725628474                     │ 
     hundred │ 1.611735739055153                      │ 
   frankfurt │ 1.5646103300598875                     │ 
    tomorrow │ 1.5260654506508966                     │ 
    economic │ 1.5143283396081455                     │ 
         and │ 1.50684831522371

In [None]:
using UnicodePlots

In [None]:
[lookup_sowe(atis_data, "washington washington") lookup_sowe(atis_data, "washington")]


In [None]:
atis_data["unioccur"][atis_data["word_indexes"]["show"]]

In [None]:
atis_data["unioccur"][atis_data["word_indexes"]["showed"]]

In [None]:
#Fast Splitting
subset_indexes_for_len = Dict{Int,Vector{Vector{Bool}}}()
function nonempty_subset_splits{T}(xs::Vector{T})
    K = length(xs)
    ss ::Vector{Vector{Bool}} = if haskey(subset_indexes_for_len,K)
            subset_indexes_for_len[K]
         else 
            subset_indexes_for_len[K] = Vector{Bool}[collect(pr) for (_,pr) in  zip(1:(K^2)/2, product([(true,false) for k in 1:K]...))][2:end]
            subset_indexes_for_len[K]
         end

    Tuple{Vector{T},Vector{T}}[(xs[bb],xs[~bb]) for bb in ss]
end

function nonempty_subset_splits(xs)
    nonempty_subset_splits(collect(xs))
end



function pairwise(pp)
    Task() do 
        for ii in 2:2:length(pp)
            produce((pp[ii-1],pp[ii]))
        end
        if isodd(length(pp))
            produce(pp[end]) 
        end
    end
end

_partions_of_pairs_indexes=Dict{Vector{Int64},Vector{Vector{Int64}}}()
function partions_of_pairs_indexes{Int64}(xs::Vector{Int64})
    if !haskey(_partions_of_pairs_indexes, xs)
        _partions_of_pairs_indexes[xs] = 
            if length(xs)<=2
                Vector{Int64}[xs]
            else
                rets = Vector{Vector{Int64}}()
                sizehint!(rets, length(xs)^div(length(xs),2))
                for i1 in 1:length(xs-1)
                    for i2 in i1+1:length(xs)
                        head = (xs[[i1,i2]])
                        tails = partions_of_pairs_indexes(xs[[1:i1-1;i1+1:i2-1;i2+1:end]])

                        for tail in tails
                                push!(rets, [head; tail])
                            end
                        end
                end
                rets
            end
    end
    _partions_of_pairs_indexes[xs] 
end

function partions_of_pairs(xs::Vector{Int64})
    partions_of_pairs_indexes(xs)
end
#Fast Splitting
subset_indexes_for_len = Dict{Int,Vector{Vector{Bool}}}()
function nonempty_subset_splits{T}(xs::Vector{T})
    K = length(xs)
    ss ::Vector{Vector{Bool}} = if haskey(subset_indexes_for_len,K)
            subset_indexes_for_len[K]
         else 
            subset_indexes_for_len[K] = Vector{Bool}[collect(pr) for (_,pr) in  zip(1:(K^2)/2, product([(true,false) for k in 1:K]...))][2:end]
            subset_indexes_for_len[K]
         end

    Tuple{Vector{T},Vector{T}}[(xs[bb],xs[~bb]) for bb in ss]
end

function nonempty_subset_splits(xs)
    nonempty_subset_splits(collect(xs))
end



function pairwise(pp)
    Task() do 
        for ii in 2:2:length(pp)
            produce((pp[ii-1],pp[ii]))
        end
        if isodd(length(pp))
            produce(pp[end]) 
        end
    end
end

_partions_of_pairs_indexes=Dict{Vector{Int64},Vector{Vector{Int64}}}()
function partions_of_pairs_indexes{Int64}(xs::Vector{Int64})
    if !haskey(_partions_of_pairs_indexes, xs)
        _partions_of_pairs_indexes[xs] = 
            if length(xs)<=2
                Vector{Int64}[xs]
            else
                rets = Vector{Vector{Int64}}()
                sizehint!(rets, length(xs)^div(length(xs),2))
                for i1 in 1:length(xs-1)
                    for i2 in i1+1:length(xs)
                        head = (xs[[i1,i2]])
                        tails = partions_of_pairs_indexes(xs[[1:i1-1;i1+1:i2-1;i2+1:end]])

                        for tail in tails
                                push!(rets, [head; tail])
                            end
                        end
                end
                rets
            end
    end
    _partions_of_pairs_indexes[xs] 
end

function partions_of_pairs(xs::Vector{Int64})
    partions_of_pairs_indexes(xs)
end


In [None]:
using DataStructures
using DataStructuresExtended


@fastmath function get_prior_fun(uni_occur, bi_joint, prob_length, end_index)
    n_edges=length(uni_occur)
    bi_marginal = sum(bi_joint,1)
    bi_conditional = bi_joint./bi_marginal
  
    
    @inline function joint(aa::Int64)
        @inbounds ret = bi_marginal[aa]
        ret
    end
    
    @inline function joint(aa::Int64,bb::Int64)
        @inbounds ret = bi_joint[aa, bb]
        ret
    end
    
    joint_cache_stats = Dict([("hits",0), ("misses",0)])
    joint_cache = Dict{Accumulator{Int64,Int64}, Float64}() #TODO Should Use a multiset instead to index
    @inline function joint(xxs...) 
        @assert(length(xxs)>=3)
        xxs_bag = counter(Int64, xxs) 
        if !(haskey(joint_cache,xxs_bag))
            joint_cache_stats["misses"]+=1
            #Full calc
            joint_cache[xxs_bag] = mean(Float64[(joint(aas...)*joint(bbs...)) for (aas,bbs) in  nonempty_subset_splits(xxs)])
            
            
        else
            joint_cache_stats["hits"]+=1
        end
        joint_cache[xxs_bag]
    end
    
    conditional_occurance::Function
    @inbounds function conditional_occurance(ws::Vector{Int64})
        if length(ws) == 0
            uni_occur
        elseif length(ws)==1
            bi_conditional[:,ws[1]]
        else
            Float64[joint(aa,ws...) for aa in 1:n_edges]./joint(ws...)
        end
    end
    
    function prior(ws)
        if  length(ws) > length(prob_length) #If it is longer than the tail of length distro, just force temrination
            force_end_pmf = zeros(n_edges) #This PMF forces the sentence to end, if it is returned
            @inbounds force_end_pmf[end_index]=1.0
            force_end_pmf
        else 
            #Either do not end (P=1-P_end) or end (P=P_end)
            #prob_end = prob_length[length(ws)]   #Chance to end at this length
            prob_end=0.0
            prob_occurance::Vector{Float64} = conditional_occurance(ws)
            
            choices_prob = (1.0-prob_end).*prob_occurance
            #@inbounds choices_prob[end_index]=prob_end
            choices_prob
        end 
    end
    prior,joint_cache_stats
end

function get_prior_fun(data::Dict)
    get_prior_fun(data["unioccur"],data["bioccur"], data["length_prob"], data["word_indexes"][END_MARKER])
end

In [None]:
atis_prior,atis_prior_cache_stats = get_prior_fun(atis_data)
r = @time atis_prior(Int64[1,2,5,4,5,6,7]) #9.335965 seconds
println(atis_prior_cache_stats)
r

In [None]:
optimise(atis_data,"what is the shortest flight", 1, 1, 0.1, 10000,20)

In [None]:
optimise(atis_data,"what is the first flight from washington to washington", 1, 1, 0.1, 10000,20)

In [None]:
optimise(atis_data,"what are the ground transport options", 1, 1, 0.1, 1000,20)

In [None]:
optimise(atis_data,"where can i go", 1, 1, 0.1, 1000,20)

In [None]:
atis_data["word_indexes"][START_MARKER]
    

In [None]:
`git push""` |> run

In [None]:
`git commit -m="" -a` |> run