In [1]:
using DataStructures
using Base.Collections
using Iterators
using Pipe
function pz(x :: AbstractArray)
    println(typeof(x), ": ", size(x))
end
macro printval(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr," = ", $ee)))
end

macro pz(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr,"\t\t",typeof($ee), "\t", size($ee))))
end

In [2]:
using RecursiveAutoencoders

In [3]:
include("load_embeddings.jl")
LL,word_indexes, indexed_words =  load_embeddings("embeddings-test.txt")
#("embeddings-scaled.EMBEDDING_SIZE=50.txt");
size(LL) |> println
word_indexes |> typeof |> println
indexed_words |> typeof |> println

(50,14)
Dict{String,Int64}
Array{String,1}


In [4]:
training_trees = open("training_sents.jsz","r") do fs
    deserialize(fs)
end;

In [5]:
abstract Side
immutable Left<:Side
end

immutable Right<:Side
end

immutable NoSide<:Side
end

immutable FoldData
    p_out::Embedding
    left::Union(FoldData,Embedding)
    right::Union(FoldData,Embedding)
end

immutable UnfoldData{T<:Side}
    p_in::Embedding
    parent::Union(FoldData,UnfoldData)
    ĉ_i::Embedding
    ĉ_j::Embedding
    depth::Int64
end

immutable UnfoldLeaf{T<:Side}
    ĉ::Embedding
    parent::UnfoldData
    c::Embedding
    depth::Int64
end



In [6]:
function get_side{T}(::Union(UnfoldLeaf{T}, UnfoldData{T}))
    T()
end

get_side (generic function with 1 method)

In [7]:
function emb(data::FoldData)
    data.p_out
end
function emb(data::Embedding)
    data
end

function fold(rae::RAE, tree::(Any,Any))
    function eval_child(child::String)
        c=eval_word_embedding(rae,child,false)
        c
    end
    function eval_child(c::Embedding)
        c
    end
    function eval_child(child::Any)
        fold(rae,child)
    end
    
   
    left = eval_child(tree[1])
    right = eval_child(tree[2])
    p=eval_merge(rae, emb(left), emb(right))
    FoldData(p, left, right)   
end

fold (generic function with 1 method)

In [8]:
function unfold{T}(rae::RAE, c::Embedding, ĉ::Embedding, parent, ::Type{T}, depth)
    UnfoldLeaf{T}(ĉ, parent, c, depth)
end


function unfold{T}(rae::RAE, act::FoldData, p_in::Embedding, parent, ::Type{T}, depth::Int)
    #Side is a ignored argument. This could be replaced with a generated function
    ĉ_i, ĉ_j = reconstruct(rae,p_in)
    data = UnfoldData{T}(p_in, parent, ĉ_i, ĉ_j,depth)
    
    left = unfold(rae, act.left, ĉ_i, data, Left, depth+1)
    right= unfold(rae, act.right, ĉ_j, data, Right, depth+1)
    [left; right]
end

function unfold(rae::RAE, act::FoldData)
    #Handle the top case
    unfold(rae, act,act.p_out,act, NoSide,0)
end

unfold (generic function with 3 methods)

In [9]:
# tests
rae = RAE(LL,word_indexes,indexed_words);

a=fold(rae,("killer", "cows"))
b=unfold(rae,a);
@assert b[1].parent==b[2].parent


a=fold(rae,("the",("killer", "cows")))
b=unfold(rae,a);
@assert b[1].parent==b[2].parent.parent==b[3].parent.parent

In [10]:
function δ(a::Embedding, δ_above::Vector{Float64}, W::Matrix{Float64})
    #a is the ouput of this layer: a=tanh(z) where z is the input from layer below
    #W is matrix to move to above layer, from this one
    dz = 1-a.^2 #Derivitive of a=tanh(z)
    @pz W'
    @pz δ_above
    @pz dz
    
    (W'*δ_above).*dz
end

function δ(ĉ_ij::Embedding,c_ij::Embedding) 
    #Output Layer
    M = length(c_ij)# ==length(ĉ_ij)
    dz = 1-ĉ_ij
    δ_above = ĉ_ij-c_ij
    δ_above.*dz
    #δ(ĉ_ij,δ_above, eye(M))     
end


δ (generic function with 2 methods)

In [11]:
function sidepad(d::Vector{Float64}, ::Left)
    padding=zeros(size(d))
    [padding;d]
end
function sidepad(d::Vector{Float64}, ::Right)
    padding=zeros(size(d))
    [d, padding]
end

function sidepad(d::Vector{Float64}, ::NoSide)
    d
end

function unsidepad(d::Vector{Float64}, ::Left)
    d[end/2+1:end]
end
function unsidepad(d::Vector{Float64}, ::Right)
    d[1:end/2]
end

function unsidepad(d::Vector{Float64}, ::NoSide)
    d
end

unsidepad (generic function with 3 methods)

In [38]:
function UBPTS(rae::RAE, nodes::Vector{UnfoldLeaf} )
    delta_len = 2*length(nodes[1].ĉ)
    parent_deltas = Dict{UnfoldData, Vector{Float64}}()
    function add!(parent_node, delta)
        if haskey(parent_deltas, parent_node)
            parent_deltas[parent_node]+=delta
        else
            parent_deltas[parent_node]=delta
        end
    end

    
    for leaf in nodes
        δ_node = δ(leaf.ĉ,leaf.c)
        δ_padded = sidepad(δ_node, get_side(leaf))
        add!(leaf.parent, δ_padded)
    end
        
    UBPTS(rae,parent_deltas)
end

function UBPTS(rae::RAE, parent_deltas::Dict{UnfoldData,Vector{Float64}})
    foldnode = nothing
    δ_above_fold = 0
    
    pending_nodes = PriorityQueue{UnfoldData, Int64}(Base.Order.Reverse)
    enqueue!(node::UnfoldData) = pending_nodes[node] = node.depth #Priority of node.depth (syntax on julia Priority queues is weird)
    map(enqueue!, keys(parent_deltas)) #Add all that were passed, as none have been processed
    
    function pend!(parent_node::UnfoldData, δ_node::Vector{Float64})
        if !haskey(parent_deltas,parent_node)
            enqueue!(parent_node) #then also hasn't been enque
            parent_deltas[parent_node]=δ_node
        else
            #@printval parent_node|> typeof
            #@pz parent_deltas[parent_node]
            #@pz δ_node
            parent_deltas[parent_node]+=δ_node
        end
    end
        
    function pend!(node::FoldData, δ_node::Vector{Float64})
        foldnode = node
        δ_above_fold+=δ_node
    end

    ΔW_d=0 #will broadcast
    Δb_d=0 
    while !isempty(pending_nodes)
        node = dequeue!(pending_nodes)
        δ_above =  parent_deltas[node]
        #Note: node.p_in= suitable half of node.parent.ĉ_i or node.parent.ĉ_j
        #      The line below takes a lot of thinking to be sure it is right
        δ_node = δ(node.p_in, δ_above, rae.W_d)

        δ_padded = sidepad(δ_node, get_side(node))
        
        if node|>get_side != NoSide()
            ΔW_d += δ_padded*node.p_in'
            Δb_d += δ_padded
        end
        
        pend!(node.parent,δ_padded)
    end
    
    (δ_above_fold, ΔW_d, Δb_d)
end


UBPTS (generic function with 4 methods)

In [39]:
function UBPTS(rae::RAE, node::FoldData, δ_above::Vector{Float64})
    a= node.p #[emb(node.left); emb(node.right)]
    
    δ_node =  δ(a, δ_above, rae.W_e)
    ΔW_e=δ_node*a'
    Δb_e=δ_node
    
    δ_left = δ_node[1:end/2]
    δ_right = δ_node[end/2+1 : end]
    
    ΔW_e_left, Δb_e_left = UBPTS(rae, node.left, δ_left)
    ΔW_e_right, Δb_e_right = UBPTS(rae, node.right, δ_right)
    (ΔW_e+ΔW_e_left+ΔW_e_right, Δb_e+Δb_e_left+Δb_e_right)
end

function UBPTS(rae::RAE, node::Embedding, δ_above::Vector{Float64})
    0,0,0 # Nothing to learn here (at least until we start learning rae.L)
end


UBPTS (generic function with 4 methods)

In [40]:
a=fold(rae,("the",("bad",("killer", "cows"))))
b=unfold(rae,a);

δd,ΔW_d, Δb_d = UBPTS(rae, b)
println("-"^54)
@pz rae.W_d
@pz ΔW_d
@pz rae.b_d
@pz Δb_d

#ΔW_e,Δb_e = UBPTS(rae, a, δd)
#println("+"^54)
#@pz rae.W_e
#@pz ΔW_e
#@pz rae.b_e
#@pz Δb_e



W'		Array{Float64,2}	(50,100)
δ_above		Array{Float64,1}	(100,)
dz		Array{Float64,1}	(50,)
W'		Array{Float64,2}	(50,100)
δ_above		Array{Float64,1}	(100,)
dz		Array{Float64,1}	(50,)
W'		Array{Float64,2}	(50,100)
δ_above		Array{Float64,1}	(100,)
dz		Array{Float64,1}	(50,)
------------------------------------------------------
rae.W_d		Array{Float64,2}	(100,50)
ΔW_d		Array{Float64,2}	(100,50)
rae.b_d		Array{Float64,1}	(100,)
Δb_d		Array{Float64,1}	(100,)


In [23]:
rae.W_d

100x50 Array{Float64,2}:
  1.36042   -1.13715   -0.378774   …  -1.83839     0.332648  -0.921842  
  0.158677   2.0471    -0.130769      -0.767753   -0.533264   1.0391    
 -0.129824   0.871048  -1.86346       -3.23341    -0.998438   1.85973   
 -0.213552  -0.83365   -5.08658       -0.22246     1.08437    1.66004   
  0.562324  -0.821276  -4.99127        0.459804   -0.753386   0.487699  
 -0.33136    0.179639  -1.98763    …  -0.570384   -0.475553   1.05452   
 -1.21167   -0.825139   0.0462765      0.917748    1.18376   -0.848934  
 -2.68447    0.774506  -0.920876      -0.136794    0.479223   0.774488  
 -1.08257   -1.00241    1.21925       -0.611068    1.0177     2.47648   
  0.430553   0.61749   -1.07897       -0.241328   -1.21044   -0.424625  
 -2.32432    1.51338    0.401951   …   0.103728   -1.13518    0.848895  
 -0.137243  -2.46091    1.7864        -0.480136    1.24385   -0.291449  
  0.865728   0.39537    1.76111        1.6618      0.215561  -1.41962   
  ⋮                       

In [None]:
function RecursiveAutoencoders.eval_word_embeddings(rae::RAE, tree::(Any,Any))
    function eval_child(child::String)
        eval_word_embedding(rae,child,false)
    end
    function eval_child(child::Any)
        eval_word_embeddings(rae,child)
    end
    c_i = eval_child(tree[1])
    c_j = eval_child(tree[2])
    [c_i c_j]
end

In [None]:
tree= ("killer", "cows")
c = eval_word_embeddings(rae, tree)
c= [c[:,1], c[:,2]]
p = fold(rae,tree)
ĉ = unfold(rae,tree,p)
ĉ= [ĉ[:,1], ĉ[:,2]]
grad_top(rae, ĉ, c, tree)

In [None]:
x = Any[2,3]

In [None]:
x

In [None]:
function cosine_dist(a,b)
    (a⋅b)/(norm(a)*norm(b))
end

function neighbour_dists(cc::Vector{Float64}, globe::Matrix{Float64})
    [cosine_dist(cc, globe[:,ii]) for ii in 1:size(globe,2)]
end


function show_best(rae::RAE,ĉ::Embedding, nbest=20)
    candidates=neighbour_dists(ĉ,rae.L)   
    best_cands = [ (findfirst(candidates,score), score)
                    for score in select(candidates,1:nbest, rev=true)[1:nbest]]
    vcat([[rae.indexed_words[ii] round(score,2)] for (ii,score) in best_cands]...)
end

function show_bests(rae::RAE,ĉs::Embeddings, nbest=20)
    hcat([show_best(rae,ĉs[:,ii],nbest) for ii in 1:size(ĉs,2)]...)
end


In [None]:
bs = show_bests(rae, ĉ_ij)
bs[1,:][1:2:end]

In [None]:
ĉ_ij