In [1]:
ENV["LINES"] = 30
ENV["COLUMNS"] = 300

300

In [104]:
using Pipe
function pz(x :: AbstractArray)
    println(typeof(x), ": ", size(x))
end
macro printval(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr," = ", $ee)))
end

In [3]:
using PyCall
@pyimport nltk
function tokenize(sentence::String)
    convert(Array{String,1},nltk.word_tokenize(sentence))
end



tokenize (generic function with 1 method)

In [4]:
include("load_embeddings.jl")

load_embeddings (generic function with 1 method)

In [5]:
LL,word_indexes, indexed_words =  load_embeddings("embeddings-scaled.EMBEDDING_SIZE=50.txt");
size(LL) |> println
word_indexes |> typeof |> println
indexed_words |> typeof |> println

(50,268810)
Dict{String,Int64}
Array{String,1}


In [6]:
typealias Embedding Vector{Float64}
typealias Embeddings Matrix{Float64}
typealias Words Union(AbstractArray{ASCIIString,1},AbstractArray{String,1})
type RAE
    L::Matrix{Float64}
    word_index::Dict{String,Int}
    indexed_words::Vector{String}
    
    W_e::Matrix{Float64}
    b_e::Vector{Float64}
    W_d::Matrix{Float64}
    b_d::Vector{Float64}
   
end


function RAE(L::Matrix{Float64},word_index::Dict{String,Int}, indexed_words::Vector{String})
    emb_width = size(L,1)
    
    W_e =0.01*randn(emb_width,emb_width*2) 
    b_e = 0.01*randn(emb_width) 
    #W_d = 0.01*randn(emb_width*2,emb_width)
    W_d = pinv(W_e) #Cheat (Actually why can't I always do this to initialize?);
    b_d = 0.01*randn(emb_width*2)
    
    RAE(L,word_index, indexed_words, W_e, b_e, W_d, b_d)
end


function get_word_index(rae::RAE, input::String, show_warn=true)
    if haskey(rae.word_index, input)
        ii = rae.word_index[input]
    elseif haskey(rae.word_index, lowercase(input))
        ii = rae.word_index[lowercase(input)]
    else
        ii = rae.word_index["*UNKNOWN*"]
        if show_warn
            println("$input not found. Defaulting.")
        end
    end
    ii
end


function eval_word_embedding(rae::RAE, input::String, show_warn=true)
    k=get_word_index(rae, input, show_warn)
    rae.L[:,k]
end

function eval_word_embeddings(rae::RAE, inputs::Words, show_warn=false)
    ks = @pipe inputs |> map(ii -> get_word_index(rae,ii, show_warn), _)
    rae.L[:,ks]
end


function eval_merges(rae::RAE, c_ijs::Embeddings)
    tanh(rae.W_e*c_ijs.+rae.b_e)
end

function eval_merges(rae::RAE, c_is::Embeddings, c_js::Embeddings)
    @assert size(c_is)==size(c_js)
    eval_merges(rae,[c_is;c_js])
end

function eval_scores(rae::RAE, c_is::Embeddings, c_js::Embeddings,
                      pps=eval_merges(rae, c_is, c_js)::Embeddings,
                      ĉ_ijs = unfold_merges(rae,pps)::Embeddings)
     c_ijs = [c_is;c_js]
     
     1/2*sum((c_ijs-ĉ_ijs).^2,1)
end

function reconstruct(rae::RAE, pp::Embedding)
    ĉ_ij = tanh(rae.W_d*pp+rae.b_d)
    ĉ_i = ĉ_ij[1:end/2]
    ĉ_j = ĉ_ij[end/2+1:end]
    ĉ_i, ĉ_j
end

function unfold_merges(rae::RAE, pps::Embeddings)
    ĉ_ijs = tanh(rae.W_d*pps .+ rae.b_d)
end

unfold_merges (generic function with 1 method)

In [8]:
type ActData
    c_ij::Embedding
    pp::Embedding
    ĉ_ij::Embedding
end


#data_tree(left::Embedding, data::ActData, right)

In [9]:
function eval_to_tree(rr::RAE,sentence::String)
    eval_to_tree(rr, tokenize(sentence))
end

function eval_to_tree(rr::RAE, sentence::Words)
    tree = tuple(sentence...)
    cs = eval_word_embeddings(rr, sentence)
    act_tree = tuple_of_cols(cs)
    score_total = 0.0
    while(size(cs,2)>1)
        c_is = cs[:, 1:end-1]
        c_js = cs[:, 2:end]
        
        pps = eval_merges(rr, c_is, c_js)
        ĉ_ijs = unfold_merges(rr,pps)
        scores = eval_scores(rr, c_is, c_js, pps,ĉ_ijs)
        im = indmax(scores)
        
        score_total+=scores[im]
        c_ij=[c_is; c_js][:,im]
        pp = pps[:,im]
        ĉ_ij = ĉ_ijs[:,im]
        act = ActData(c_ij, pp, ĉ_ij)
        act_node = (act_tree[im], act, act_tree[im+1])
        
        cs = [cs[:,1:im-1] pp cs[:,im+2:end]]
        tree = tuple(tree[1:im-1]..., (tree[im], tree[im+1]), tree[im+2:end]...)
        act_tree = tuple(act_tree[1:im-1]..., act_node, act_tree[im+2:end]...)
    end
    
    #Note The final step in tree creates a tuple containing one element, as first and last parts are empty
    tree[1], act_tree[1], cs[:], score_total
end


eval_to_tree (generic function with 2 methods)

In [18]:
function zero_col(W::Matrix)
    zeros(size(W,1),1)
end

function tuple_of_cols(a::Matrix)
    @pipe [a[:,col_ii] for col_ii in 1:size(a,2)] |> tuple(_...)
end

function BPTS(rae::RAE, nontree::Embedding, δ_above::Matrix)
    #Note a tree. but a terminal state
    (0,0,0,0)
end

function BPTS(rae::RAE, tree::(Any,ActData, Any), δ_above=zero_col(rae.W_e))
    act=tree[2]
    ∇s, δ_input = eval_scores_gradient(rae,act,δ_above)
    δ_left  = δ_input[1:end/2,:]
    δ_right = δ_input[end/2+1:end,:]

    ∇s_left = BPTS(rae, tree[1], δ_left)
    ∇s_right = BPTS(rae, tree[3], δ_right)
    tuple([l+c+r for (c,l,r) in zip(∇s_left,∇s, ∇s_right)]...)
end



function eval_scores_gradient(rae::RAE, 
                              act::ActData,
                              δ_parent=zero_col(rae.W_e))
    #Notice: While this is good to go for multiple concurrent, 
    #It does't actually do so, as a tree is the 
    
    
    c_ijs::Embeddings = act.c_ij''
    pps::Embeddings = act.pp''
    ĉ_ijs::Embeddings = act.ĉ_ij''
    
    #http://neuralnetworksanddeeplearning.com/chap2.h)tml
    N = size(c_ijs,2)
    
    da = (ĉ_ijs - c_ijs)
    dz_d = (1-ĉ_ijs.^2)
    δ_d = da.*dz_d #Output Error

    ∇W_d = 1/N*δ_d*pps'
    ∇b_d = 1/N*sum(δ_d,2)[:]
    
    
    dz_e = (1-pps.^2)
    δ_e = (rae.W_d'*δ_d).*(dz_e .+ δ_parent) #Hidden layer error
        

    ∇W_e = 1/N*δ_e*c_ijs'
    ∇b_e = 1/N*sum(δ_e,2)[:]
    
    ∇s = (∇W_e, ∇b_e, ∇W_d, ∇b_d)
    
    #input error, ie parent error for layer below
    dz_p = (1-c_ijs.^2)
    δ_input = (rae.W_e'*δ_e - da).*dz_p
    
    ∇s, δ_input
end


eval_scores_gradient (generic function with 2 methods)

In [19]:
rr = RAE(LL,word_indexes,indexed_words);

sent = "the boy destroyed the house"
sent_toks = tokenize(sent)

tree, act_tree, pp, score_total = eval_to_tree(rr,sent_toks);

In [20]:
using Iterators
@pyimport nltk.corpus as nltk_corpus
n_training = 10
#training_sents = @pipe nltk_corpus.brown[:sents]() |> take(_,n_training)  |> collect |> convert(Vector{Vector{String}},_);
training_sents = @pipe nltk_corpus.brown[:sents]() |> filter(s->length(s)<=15, _) |> take(_,n_training)  |> collect |> convert(Vector{Vector{String}},_);


In [119]:
using Optim #https://github.com/JuliaOpt/Optim.jl

rae_outer = RAE(LL,word_indexes,indexed_words);

function unpack!(rae::RAE, θ::Vector)
    W_e_len = length(rae.W_e)
    b_e_len = length(rae.b_e)
    W_d_len = length(rae.W_d)
    b_d_len = length(rae.b_d)
    W_e_shape = size(rae.W_e)
    W_d_shape = size(rae.W_d)
    
    rae.W_e = reshape(θ[1: W_e_len],W_e_shape)
    rae.b_e = θ[W_e_len+1: W_e_len+b_e_len]
    rae.W_d = reshape(θ[W_e_len+b_e_len+1: W_e_len+b_e_len+W_d_len],W_d_shape)
    rae.b_d = θ[W_e_len+b_e_len+W_d_len+1: end]
    
    rae
end

function pack(rae::RAE)
    [rae.W_e[:],rae.b_e, rae.W_d[:],rae.b_d[:]] 
end

function pack(∇W_e::Matrix{Float64}, ∇b_e::Vector{Float64}, ∇W_d::Matrix{Float64}, ∇b_d::Vector{Float64})
    [∇W_e[:], ∇b_e, ∇W_d[:], ∇b_d] 
end

#--------------------------------------------------------

function loss!(θ::Vector)  
    rae = unpack!(rae_outer, θ)
    @pipe training_sents |> map( ss-> eval_to_tree(rae, ss)[end], _) |> mean
end

function loss_grad!(θ::Vector, storage::Vector) 
    error("loss_grad! CALLED")
    storage[:] = 0
    storage = zeros(storage)
    rae = unpack!(rae_outer, θ)
    for ss in training_sents
        tree, act_tree, pp, err_total = eval_to_tree(rae, ss)
        ∇s=BPTS(rae,act_tree)
        storage+=pack(∇s...)
    end
    storage/=length(training_sents)
end

function loss_and_loss_grad!(θ::Vector, storage::Vector)   
    storage[:] = 0
    rae = unpack!(rae_outer, θ)
    err = 0.0
    for ss in training_sents
        tree, act_tree, pp, err_total = eval_to_tree(rae, ss)
        ∇s=BPTS(rae,act_tree)
        storage+=pack(∇s...)
        err+=err_total
    end
    storage/=length(training_sents)
    @printval(norm(storage))
    err/=length(training_sents)
    err
end

f=DifferentiableFunction(loss!,loss_grad!,loss_and_loss_grad!)
#Must provide Graident as finite difference requires ~length(θ) calls to f
res = optimize(f, pack(rae_outer), method=:l_bfgs, show_trace = true,iterations = 20)
rae_outer = unpack!(rae_outer, res.minimum);
print("---------------------------")

Iter     Function value   Gradient norm 
norm(storage) = 200.03786878409898
     0     1.593671e+01     0.000000e+00
norm(storage) = 200.03786878409898


LoadError: Error not defined
while loading In[119], in expression starting on line 67

In [127]:
Pkg.rm("Optim")
Pkg.add("Optim",v"0.3.1")

INFO: No packages to install, update or remove
INFO: Package database updated
INFO: No packages to install, update or remove
INFO: Package database updated
INFO: METADATA is out-of-date — you may not have the latest version of Optim
INFO: Use `Pkg.update()` to get the latest versions of your packages


In [51]:
#==
    method::ASCIIString
    initial_x::Array{T,N}
    minimum::Array{T,N}
    f_minimum::Float64
    iterations::Int
    iteration_converged::Bool
    x_converged::Bool
    xtol::Float64
    f_converged::Bool
    ftol::Float64
    gr_converged::Bool
    grtol::Float64
    trace::OptimizationTrace
    f_calls::Int
    g_calls::Int
=#
@printval res.f_calls 
@printval res.g_calls 
@printval res.x_converged 
@printval res.iterations
@printval res.f_minimum
@printval res.gr_converged


res.f_calls = 73
res.g_calls = 73
res.x_converged = false
res.iterations = 1
res.f_minimum = 16.210608181836093
res.gr_converged = false


https://github.com/JuliaLang/julia/blob/master/doc/manual/profile.rst Actual instructions on profiling



In [None]:
Profile.clear()
@profile f(pack(rae_outer))


In [None]:
using ProfileView
ProfileView.view()

In [None]:
#tree data in tree is not use, other than it's structure.
#((("the","house"),("destroyed",("the","boy")))  is equivalent to ((("",""),("",("",""))) 



function unfold(rae::RAE, tree::(String,String), pp::Embedding)
    ĉ_is, ĉ_js = reconstruct(rae, pp)
    [ĉ_is ĉ_js]
end


function unfold(rae::RAE, tree::(Any,String), pp::Embedding)
    p̂_is, ĉ_js = reconstruct(rae, pp)
    ĉ_is = unfold(rae, tree[1], p̂_is)
    [ĉ_is ĉ_js]
end

function unfold(rae::RAE, tree::(String,Any), pp::Embedding)
    ĉ_is, p̂_js = reconstruct(rae, pp)
    ĉ_js = unfold(rae, tree[2], p̂_js)
    [ĉ_is ĉ_js]
    
end

function unfold(rae::RAE, tree::(Any,Any), pp::Embedding)
    p̂_is, p̂_js = reconstruct(rae, pp)
    ĉ_is = unfold(rae, tree[1], p̂_is)
    ĉ_js = unfold(rae, tree[2], p̂_js)
    [ĉ_is ĉ_js]
end

    

In [None]:
function cosine_dist(a,b)
    (a⋅b)/(norm(a)*norm(b))
end

function neighbour_dists(cc::Vector{Float64}, globe::Matrix{Float64})
    [cosine_dist(cc, globe[:,ii]) for ii in 1:size(globe,2)]
end


function show_best(rae::RAE,ĉ::Embedding, nbest=20)
    candidates=neighbour_dists(ĉ,rae.L)   
    best_cands = [ (findfirst(candidates,score), score)
                    for score in select(candidates,1:nbest, rev=true)[1:nbest]]
    vcat([[rae.indexed_words[ii] score] for (ii,score) in best_cands]...)
end

function show_bests(rae::RAE,ĉs::Embeddings, nbest=20)
    hcat([show_best(rae,ĉs[:,ii],nbest) for ii in 1:size(ĉs,2)]...)
end


In [None]:
tree, pp, score_total = eval_to_tree(rae_outer,"easy holdings")
ĉs = unfold(rae_outer,tree,pp)

show_bests(rae_outer, ĉs)


In [None]:
function depth_inc(ele::(Int,String))
    (ele[1]+1,ele[2])
end

function unfold_struct(tree::(Any,Any))
    left_tree = unfold_struct(tree[1]) 
    left = @pipe left_tree |> map(depth_inc,_)
    right_tree = unfold_struct(tree[2]) 
    right = @pipe right_tree |> map(depth_inc,_)
    [left, right, (0,"")]
end

function unfold_struct(tree::(Any,String))
    left_tree = unfold_struct(tree[1]) 
    left = @pipe left_tree |> map(depth_inc,_)
    [left, (0,tree[2]), (0,"")]
end
function unfold_struct(tree::(String,Any))
    right_tree = unfold_struct(tree[2]) 
    right = @pipe right_tree |> map(depth_inc,_)
    [(0,tree[1]),right, (0,"")]
end
function unfold_struct(tree::(String,String))
    [(0,tree[1]), (0, tree[2]), (0,"")]
end

function print_tree(tree::(Any,Any))
    
    for (depth,word ) in unfold_struct(tree)
        println("\t"^depth, word)
    end
end