In [None]:
for ii in 1:4
    addprocs(["heathred"])
end

for ii in 1:1
    addprocs(["amon"], dir="")
    addprocs(["zeus"], dir="")
    addprocs(["jove"], dir="")
    addprocs(["ares"], dir="")
end

for ii in 1:10
    #addprocs(["uggp"], dir="")
end

addprocs(2)
workers()

In [None]:
using Iterators
using Pipe
function pz(x :: AbstractArray)
    println(typeof(x), ": ", size(x))
end
macro printval(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr," = ", $ee)))
end

macro pz(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr,"\t\t",typeof($ee), "\t", size($ee))))
end

In [None]:
@everywhere using RecursiveAutoencoders
@everywhere  using UnfoldingRAE

using ClusterSoup

In [None]:
using WordEmbeddings
LL,word_indexes, indexed_words =  load_embeddings("embeddings-scaled.EMBEDDING_SIZE=50.txt");
size(LL) |> println
word_indexes |> typeof |> println
indexed_words |> typeof |> println

In [None]:
training_trees = open("training_sents.jsz","r") do fs
    deserialize(fs)
end;
@assert length(@pipe training_trees |> filter(x->typeof(x)===UTF8String,_))==0
@pz training_trees

In [None]:
r_training_trees = r_chunk_data(training_trees)

In [None]:
@everywhere function unpack!(rae::RAE, θ::Vector)
    W_e_len = length(rae.W_e)
    b_e_len = length(rae.b_e)
    W_d_len = length(rae.W_d)
    b_d_len = length(rae.b_d)
    W_e_shape = size(rae.W_e)
    W_d_shape = size(rae.W_d)
    
    rae.W_e = reshape(θ[1: W_e_len],W_e_shape)
    rae.b_e = θ[W_e_len+1: W_e_len+b_e_len]
    rae.W_d = reshape(θ[W_e_len+b_e_len+1: W_e_len+b_e_len+W_d_len],W_d_shape)
    rae.b_d = θ[W_e_len+b_e_len+W_d_len+1: end]
    
    rae
end

@everywhere function pack(rae::RAE)
    pack(rae.W_e,rae.b_e, rae.W_d,rae.b_d)
end

@everywhere function pack(∇W_e::Matrix{Float64}, ∇b_e::Vector{Float64}, ∇W_d::Matrix{Float64}, ∇b_d::Vector{Float64})
    [∇W_e[:], ∇b_e, ∇W_d[:], ∇b_d] 
end


In [None]:

function loss!(θ::Vector)  
    #warn("loss! not defined")
    grad = similar(θ)
    #loss_and_loss_grad!(θ::Vector)
    cached_loss_and_loss_grad!(θ, grad)
end

function loss_grad!(θ::Vector, storage::Vector) 
    #warn("loss_grad not defined")
    cached_loss_and_loss_grad!(θ, grad)
end


#rae_outer = RAE(LL,word_indexes,indexed_words);
#r_rae_outers = put!(workers(), rae_outer)
function loss_and_loss_grad!(θ::Vector, grad::Vector)   
    grad[:] = 0
    @inbounds for r_rae_outer in r_rae_outers
        update_remote(r_rae_outer, rae->unpack!(rae, θ) )
    end
    
    function get_remote_loss_grad_function(r_rae_outer::RemoteRef)
        @assert r_rae_outer.where == myid()
        rae = fetch(r_rae_outer)        
        function loss_and_loss_grad(tree::(Any,Any))
            Δs, err = UnfoldingRAE.loss_and_loss_grad(rae, tree)
            [pack(Δs...), err]
        end
    end
    loss_and_loss_grads = map(r_rae_outers) do r_raeouter
        remotecall(r_raeouter.where, get_remote_loss_grad_function, r_raeouter)
    end
    
    
    ret = prechunked_mapreduce(r_training_trees, loss_and_loss_grads, (+)) 
    grad[:] = ret[1:end-1]
    err=ret[end]
    
    grad[:]/=length(training_trees)
    err/=length(training_trees)
    err
end

In [None]:

loss_and_loss_grad_cache = Dict{Vector{Float64},(Float64, Vector{Float64})}()
loss_and_loss_grad_cache_hits = 0
loss_and_loss_grad_cache_misses = 0
function cached_loss_and_loss_grad!(θ::Vector, grad::Vector)
    global loss_and_loss_grad_cache
    global loss_and_loss_grad_cache_hits
    global loss_and_loss_grad_cache_misses
    if haskey(loss_and_loss_grad_cache,θ)
        loss_and_loss_grad_cache_hits+=1
        err, grad[:] = loss_and_loss_grad_cache[θ]
        err
    else
        loss_and_loss_grad_cache_misses+=1
        err = loss_and_loss_grad!(θ, grad)
        loss_and_loss_grad_cache[θ] = (err, grad)
        err
    end
end

In [None]:
using NLopt

f_call_count = 0
function tracking_loss_and_loss_grad!(θ::Vector, grad::Vector)
    global f_call_count
    f_call_count+=1
    f_val = cached_loss_and_loss_grad!(θ, grad)   
    println(f_call_count, '\t',f_val,'\t',norm(grad))
    f_val
end
#:LD_MMA, :LD_CCSAQ, :LD_LBFGS, :LD_SLSQP, :LD_VAR2, :LD_VAR1, :LD_TNEWTON_RESTART
opt = Opt(:LD_MMA, length(pack(rae_outer)))

#ftol_abs!(opt,1e-9)
maxtime!(opt, 30)
min_objective!(opt, tracking_loss_and_loss_grad!)
(optf,optx,ret) = optimize!(opt,pack(rae_outer))


In [None]:
optf

In [None]:
#MMA
#167	623.6986598947503	3.472825423124687e12

In [None]:
using Optim #https://github.com/JuliaOpt/Optim.jl
f=DifferentiableFunction(loss!,loss_grad!,cached_loss_and_loss_grad!)

res = optimize(f, res.minimum, method=:l_bfgs, show_trace = true, store_trace = true, iterations = 30);
@printval res.f_calls 
@printval res.g_calls 
@printval res.iterations
@printval res.f_minimum
@printval res.gr_converged
@printval res.x_converged 

@printval res.trace
@printval loss_and_loss_grad_cache_hits
@printval loss_and_loss_grad_cache_misses

In [None]:
res

In [None]:
θ=pack(rae_outer)
@time loss_and_loss_grad!(θ, zeros(θ))

In [None]:
Profile.clear()