In [1]:
using Iterators
using Pipe
function pz(x :: AbstractArray)
    println(typeof(x), ": ", size(x))
end
macro printval(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr," = ", $ee)))
end

In [2]:
using RecursiveAutoencoders

In [3]:
include("load_embeddings.jl")
LL,word_indexes, indexed_words =  load_embeddings("embeddings-test.txt")
#("embeddings-scaled.EMBEDDING_SIZE=50.txt");
size(LL) |> println
word_indexes |> typeof |> println
indexed_words |> typeof |> println

(50,14)
Dict{AbstractString,Int64}
Array{AbstractString,1}


In [4]:
training_trees = open("training_sents.jsz","r") do fs
    deserialize(fs)
end;

In [5]:
type FoldData
    p::Embedding
    left::Union(FoldData,Embedding)
    right::Union(FoldData,Embedding)
end

function fold(rae::RAE, tree::Tuple{Any,Any})
    function eval_child(child::String)
        c=eval_word_embedding(rae,child,false)
        c
    end
    function eval_child(c::Embedding)
        c
    end
    function eval_child(child::Any)
        fold(rae,child)
    end
    
    function emb(data::FoldData)
        data.p
    end
    function emb(data::Embedding)
        data
    end
    
    left = eval_child(tree[1])
    right = eval_child(tree[2])
    p=eval_merge(rae, emb(left), emb(right))
    FoldData(p, left, right)   
end

fold (generic function with 1 method)

In [6]:
type UnfoldData
    p::Embedding
    parent::Union(FoldData,UnfoldData)
    ĉ_i::Embedding
    ĉ_j::Embedding
end

type UnfoldLeaf
    ĉ::Embedding
    parent::UnfoldData
    c::Embedding
end


function unfold(rae::RAE, c::Embedding, ĉ::Embedding, parent)
    UnfoldLeaf(ĉ, parent, c)
end

function unfold(rae::RAE, act::FoldData, p::Embedding, parent)
    ĉ_i, ĉ_j = reconstruct(rae,p)
    data = UnfoldData(p, parent, ĉ_i, ĉ_j)
    
    left = unfold(rae, act.left, ĉ_i, data)
    right= unfold(rae, act.right, ĉ_j, data)
    [left, right]
end

function unfold(rae::RAE, act::FoldData)
    #Handle the top case
    unfold(rae, act,act.p,act )
end

unfold (generic function with 3 methods)

In [7]:
rae = RAE(LL,word_indexes,indexed_words);

In [16]:
# tests
()
a=fold(rae,("the",("killer", "cows")))
b=unfold(rae,a);
@assert b[1].parent==b[2].parent.parent==b[3].parent.parent

a=fold(rae,("the",("killer", "cows")))
b=unfold(rae,a);
@assert b[1].parent==b[2].parent.parent==b[3].parent.parent


In [None]:
function RecursiveAutoencoders.eval_word_embeddings(rae::RAE, tree::(Any,Any))
    function eval_child(child::String)
        eval_word_embedding(rae,child,false)
    end
    function eval_child(child::Any)
        eval_word_embeddings(rae,child)
    end
    c_i = eval_child(tree[1])
    c_j = eval_child(tree[2])
    [c_i c_j]
end

In [None]:
function unfold(rae::RAE, tree::(String,String), pp::Embedding)
    ĉ_is, ĉ_js = reconstruct(rae, pp)
    [ĉ_is ĉ_js]
end


function unfold(rae::RAE, tree::(Any,String), pp::Embedding)
    p̂_is, ĉ_js = reconstruct(rae, pp)
    ĉ_is = unfold(rae, tree[1], p̂_is)
    [ĉ_is ĉ_js]
end

function unfold(rae::RAE, tree::(String,Any), pp::Embedding)
    ĉ_is, p̂_js = reconstruct(rae, pp)
    ĉ_js = unfold(rae, tree[2], p̂_js)
    [ĉ_is ĉ_js]
    
end

function unfold(rae::RAE, tree::(Any,Any), pp::Embedding)
    p̂_is, p̂_js = reconstruct(rae, pp)
    ĉ_is = unfold(rae, tree[1], p̂_is)
    ĉ_js = unfold(rae, tree[2], p̂_js)
    [ĉ_is ĉ_js]
end


In [None]:
function UBPTS(rae::RAE, pp::Embedding, ĉ_ij::Embedding,c_ij::Embedding, tree::(String, String))
    δ_0 = δ(ĉ_ij,c_ij)
    δ_1 = δ(pp, δ_0, rae.W_d)
end



function δ(a::Embedding, δ_above::Vector{Float64}, W::Matrix{Float64})
    #a is the ouput of this layer: a=tanh(z) where z is the input from layer below
    #W is matrix to move to above layer, from this one
    dz = 1-a.^2 #Derivitive of a=tanh(z)
    (W'*δ_above).*dz
    
    
end

function δ(ĉ_ij::Embedding,c_ij::Embedding) 
    #Output Layer
    M = length(c_ij)#==length(ĉ_ij)
    δ_above = ĉ_ij-c_ij
    δ(ĉ_ij,δ_above, eye(M))     
end


In [None]:
tree= ("killer", "cows")
c = eval_word_embeddings(rae, tree)
c= [c[:,1], c[:,2]]
p = fold(rae,tree)
ĉ = unfold(rae,tree,p)
ĉ= [ĉ[:,1], ĉ[:,2]]
grad_top(rae, ĉ, c, tree)

In [None]:
c

In [None]:
function cosine_dist(a,b)
    (a⋅b)/(norm(a)*norm(b))
end

function neighbour_dists(cc::Vector{Float64}, globe::Matrix{Float64})
    [cosine_dist(cc, globe[:,ii]) for ii in 1:size(globe,2)]
end


function show_best(rae::RAE,ĉ::Embedding, nbest=20)
    candidates=neighbour_dists(ĉ,rae.L)   
    best_cands = [ (findfirst(candidates,score), score)
                    for score in select(candidates,1:nbest, rev=true)[1:nbest]]
    vcat([[rae.indexed_words[ii] round(score,2)] for (ii,score) in best_cands]...)
end

function show_bests(rae::RAE,ĉs::Embeddings, nbest=20)
    hcat([show_best(rae,ĉs[:,ii],nbest) for ii in 1:size(ĉs,2)]...)
end


In [None]:
bs = show_bests(rae, ĉ_ij)
bs[1,:][1:2:end]

In [None]:
ĉ_ij