In [1]:
using DataStructures
using Iterators
using Pipe
function pz(x :: AbstractArray)
    println(typeof(x), ": ", size(x))
end
macro printval(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr," = ", $ee)))
end

In [2]:
using RecursiveAutoencoders

In [3]:
include("load_embeddings.jl")
LL,word_indexes, indexed_words =  load_embeddings("embeddings-test.txt")
#("embeddings-scaled.EMBEDDING_SIZE=50.txt");
size(LL) |> println
word_indexes |> typeof |> println
indexed_words |> typeof |> println

(50,14)
Dict{String,Int64}
Array{String,1}


In [4]:
training_trees = open("training_sents.jsz","r") do fs
    deserialize(fs)
end;

In [5]:
abstract Side
abstract Left<:Side
abstract Right<:Side
abstract NoSide<:Side

immutable FoldData
    p::Embedding
    left::Union(FoldData,Embedding)
    right::Union(FoldData,Embedding)
end

immutable UnfoldData{T<:Side}
    p::Embedding
    parent::Union(FoldData,UnfoldData)
    ĉ_i::Embedding
    ĉ_j::Embedding
end

immutable UnfoldLeaf{T<:Side}
    ĉ::Embedding
    parent::UnfoldData
    c::Embedding
end

In [9]:
function fold(rae::RAE, tree::(Any,Any))
    function eval_child(child::String)
        c=eval_word_embedding(rae,child,false)
        c
    end
    function eval_child(c::Embedding)
        c
    end
    function eval_child(child::Any)
        fold(rae,child)
    end
    
    function emb(data::FoldData)
        data.p
    end
    function emb(data::Embedding)
        data
    end
    
    left = eval_child(tree[1])
    right = eval_child(tree[2])
    p=eval_merge(rae, emb(left), emb(right))
    FoldData(p, left, right)   
end

fold (generic function with 1 method)

In [11]:
function unfold{T}(rae::RAE, c::Embedding, ĉ::Embedding, parent, ::Type{T})
    UnfoldLeaf{T}(ĉ, parent, c)
end


function unfold{T}(rae::RAE, act::FoldData, p::Embedding, parent, ::Type{T})
    #Side is a ignored argument. This could be replaced with a generated function
    ĉ_i, ĉ_j = reconstruct(rae,p)
    data = UnfoldData{T}(p, parent, ĉ_i, ĉ_j)
    
    left = unfold(rae, act.left, ĉ_i, data, Left)
    right= unfold(rae, act.right, ĉ_j, data, Right)
    [left; right]
end

function unfold(rae::RAE, act::FoldData)
    #Handle the top case
    unfold(rae, act,act.p,act, NoSide)
end

unfold (generic function with 3 methods)

In [12]:
# tests
rae = RAE(LL,word_indexes,indexed_words);

a=fold(rae,("killer", "cows"))
b=unfold(rae,a);
@assert b[1].parent==b[2].parent


a=fold(rae,("the",("killer", "cows")))
b=unfold(rae,a);
@assert b[1].parent==b[2].parent.parent==b[3].parent.parent

In [13]:
function δ(a::Embedding, δ_above::Vector{Float64}, W::Matrix{Float64})
    #a is the ouput of this layer: a=tanh(z) where z is the input from layer below
    #W is matrix to move to above layer, from this one
    dz = 1-a.^2 #Derivitive of a=tanh(z)
    (W'*δ_above).*dz
end

function δ(ĉ_ij::Embedding,c_ij::Embedding) 
    #Output Layer
    M = length(c_ij)# ==length(ĉ_ij)
    δ_above = ĉ_ij-c_ij
    δ(ĉ_ij,δ_above, eye(M))     
end


δ (generic function with 2 methods)

In [14]:
function sidepad(d::Vector{Float64}, ::Left)
    padding=zeros(size(d))
    [padding;d]
end
function sidepad(d::Vector{Float64}, ::Right)
    padding=zeros(size(d))
    [d, padding]
end

function UBPTS{T}(rae::RAE, leaf::UnfoldLeaf{T})
    δ_half = δ(leaf.ĉ,leaf.c)
    sidepad(δ_half,T)
end

function UBPTS(rae::RAE, node::UnfoldData, δ_above::Vector{Float64})
    δ([node.ĉ_i;node.ĉ_6], δ_above, rae.W_d)
end


UBPTS (generic function with 2 methods)

In [15]:
function UBPTS(rae::RAE, nodes::Vector)#nodes::Vector{UnfoldLeaf} )
    delta_len = 2*nodes[1].ĉ |> length
    parent_deltas = DefaultDict(UnfoldData, Vector{Float64},zeros(delta_len))
    for node in nodes
        parent_deltas[node.parent]+=UBPTS(rae,node)
    end
    UBPTS(rae,parent_deltas)
end

function UBPTS(rae::RAE, nodes::DefaultDict{UnfoldData,Vector{Float64}})
    delta_len = 2*first(nodes)[1].ĉ_i |> length
    parent_deltas = DefaultDict(Union(UnfoldData,FoldData), Vector{Float64},zeros(delta_len))
    for (node, δ_above) in nodes
        parent_deltas[node.parent]+= UBPTS(rae, node, δ_above)
    end
    UBPTS(rae,parent_deltas)
end

function UBPTS(rae::RAE, nodes::DefaultDict{FoldData,Vector{Float64}})
    @assert(length(x)==1)
    _,δ=first(x)
    δ
end



UBPTS (generic function with 5 methods)

In [16]:
methods(UBPTS)

In [17]:
UBPTS(rae, b)

LoadError: `sidepad` has no method matching sidepad(::Array{Float64,1}, ::Type{Left})
while loading In[17], in expression starting on line 1

In [None]:
function RecursiveAutoencoders.eval_word_embeddings(rae::RAE, tree::(Any,Any))
    function eval_child(child::String)
        eval_word_embedding(rae,child,false)
    end
    function eval_child(child::Any)
        eval_word_embeddings(rae,child)
    end
    c_i = eval_child(tree[1])
    c_j = eval_child(tree[2])
    [c_i c_j]
end

In [None]:
tree= ("killer", "cows")
c = eval_word_embeddings(rae, tree)
c= [c[:,1], c[:,2]]
p = fold(rae,tree)
ĉ = unfold(rae,tree,p)
ĉ= [ĉ[:,1], ĉ[:,2]]
grad_top(rae, ĉ, c, tree)

In [None]:
x = Any[2,3]

In [None]:
x

In [None]:
function cosine_dist(a,b)
    (a⋅b)/(norm(a)*norm(b))
end

function neighbour_dists(cc::Vector{Float64}, globe::Matrix{Float64})
    [cosine_dist(cc, globe[:,ii]) for ii in 1:size(globe,2)]
end


function show_best(rae::RAE,ĉ::Embedding, nbest=20)
    candidates=neighbour_dists(ĉ,rae.L)   
    best_cands = [ (findfirst(candidates,score), score)
                    for score in select(candidates,1:nbest, rev=true)[1:nbest]]
    vcat([[rae.indexed_words[ii] round(score,2)] for (ii,score) in best_cands]...)
end

function show_bests(rae::RAE,ĉs::Embeddings, nbest=20)
    hcat([show_best(rae,ĉs[:,ii],nbest) for ii in 1:size(ĉs,2)]...)
end


In [None]:
bs = show_bests(rae, ĉ_ij)
bs[1,:][1:2:end]

In [None]:
ĉ_ij