In [2]:
using Iterators
using Pipe
function pz(x :: AbstractArray)
    println(typeof(x), ": ", size(x))
end
macro printval(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr," = ", $ee)))
end

macro pz(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr,"\t\t",typeof($ee), "\t", size($ee))))
end

In [3]:
using RecursiveAutoencoders
using UnfoldingRAE
using Base.Collections


In [4]:
include("load_embeddings.jl")
LL,word_indexes, indexed_words =  load_embeddings("embeddings-scaled.EMBEDDING_SIZE=50.txt");
size(LL) |> println
word_indexes |> typeof |> println
indexed_words |> typeof |> println

(50,268810)
Dict{String,Int64}
Array{String,1}


In [5]:
training_trees = open("training_sents.jsz","r") do fs
    deserialize(fs)
end;
@pz training_trees

training_trees		Array{Any,1}	(3913,)


In [10]:
training_trees = training_trees[1:10]

10-element Array{Any,1}:
 ((("Pierre","Vinken"),(",",((("61","years"),"old"),","))),(("will",("join",(("the","board"),(("as",("a",("nonexecutive","director"))),("Nov.","29"))))),"."))                                                                                                                                                                                                                   
 (("Mr.","Vinken"),(("is",("chairman",("of",(("Elsevier","N.V."),(",",("the",("Dutch",("publishing","group")))))))),"."))                                                                                                                                                                                                                                                        
 ((("Rudolph","Agnew"),(",",(((("55","years"),"old"),("and",(("former","chairman"),("of",("Consolidated",("Gold",("Fields","PLC"))))))),","))),(("was",("named",("*-1",(("a",("nonexecutive","director")),("of",("this",("British",("indust

In [7]:
function test()
    rr = RAE(LL,word_indexes,indexed_words);

    a=fold(rr,("killer", "cows"))
    b=unfold(rr,a);
    @assert b[1].parent==b[2].parent


    a=fold(rr,("the",("killer", "cows")))
    b=unfold(rr,a);
    @assert b[1].parent==b[2].parent.parent==b[3].parent.parent
end
test()


In [8]:

function test()
    rr = RAE(LL,word_indexes,indexed_words);
    a=fold(rr,("the",("bad",("killer", "cows"))))
    b=unfold(rr,a);

    δd,ΔW_d, Δb_d = UBPTS(rr, b)
    println("-"^54)
    @pz rr.W_d
    @pz ΔW_d
    @pz rr.b_d
    @pz Δb_d

    ΔW_e,Δb_e = UBPTS(rr, a, δd)
    println("+"^54)
    @pz rr.W_e
    @pz ΔW_e
    @pz rr.b_e
    @pz Δb_e

end
test()

------------------------------------------------------
rr.W_d		Array{Float64,2}	(100,50)
ΔW_d		Array{Float64,2}	(100,50)
rr.b_d		Array{Float64,1}	(100,)
Δb_d		Array{Float64,1}	(100,)
++++++++++++++++++++++++++++++++++++++++++++++++++++++
rr.W_e		Array{Float64,2}	(50,100)
ΔW_e		Array{Float64,2}	(50,100)
rr.b_e		Array{Float64,1}	(50,)
Δb_e		Array{Float64,1}	(50,)


In [9]:
function unpack!(rae::RAE, θ::Vector)
    W_e_len = length(rae.W_e)
    b_e_len = length(rae.b_e)
    W_d_len = length(rae.W_d)
    b_d_len = length(rae.b_d)
    W_e_shape = size(rae.W_e)
    W_d_shape = size(rae.W_d)
    
    rae.W_e = reshape(θ[1: W_e_len],W_e_shape)
    rae.b_e = θ[W_e_len+1: W_e_len+b_e_len]
    rae.W_d = reshape(θ[W_e_len+b_e_len+1: W_e_len+b_e_len+W_d_len],W_d_shape)
    rae.b_d = θ[W_e_len+b_e_len+W_d_len+1: end]
    
    rae
end

function pack(rae::RAE)
    pack(rae.W_e,rae.b_e, rae.W_d,rae.b_d)
end

function pack(∇W_e::Matrix{Float64}, ∇b_e::Vector{Float64}, ∇W_d::Matrix{Float64}, ∇b_d::Vector{Float64})
    [∇W_e[:], ∇b_e, ∇W_d[:], ∇b_d] 
end


In [17]:
# tests

function analytic_grad(rae::RAE, tree::(Any,Any))
    (Δs, err)=loss_and_loss_grad(rae, tree)
    tuple(Δs...)
end
 
function numeric_grad(rae::RAE, tree::(Any,Any), ϵ=10.0^-4)
    rae_inner = deepcopy(rae)
    θ = pack(rae_inner)
    Δθ = zeros(size(θ))
    for ii in 1:length(θ)
        ϵᵢ = zeros(size(θ))
        ϵᵢ[ii]=ϵ
        θⁱ⁺ = θ + ϵᵢ
        θⁱ⁻ = θ - ϵᵢ
        
        unpack!(rae_inner,θⁱ⁺)
        Jⁱ⁺ = loss(rae_inner, tree)
        
        unpack!(rae_inner,θⁱ⁻)
        Jⁱ⁻=loss(rae_inner, tree)
        Δθ[ii] = (Jⁱ⁺-Jⁱ⁻)/(2.0*ϵ)
    end
    unpack!(rae_inner,Δθ)
    
    (rae_inner.W_e, rae_inner.b_e, rae_inner.W_d, rae_inner.b_d)
   
end

numeric_grad (generic function with 2 methods)

In [23]:
eg_tree = training_trees[2]
rae_outer = RAE(LL,word_indexes,indexed_words);
nW_e, nb_e, nW_d, nb_d = numeric_grad(rae_outer, eg_tree, 10.0^-7);
aW_e, ab_e, aW_d, ab_d = analytic_grad(rae_outer, eg_tree);



In [26]:
ngs = pack(nW_e, nb_e, nW_d, nb_d)
ags = pack(aW_e, ab_e, aW_d, ab_d)

@printval mean(abs(ngs-ags))
@printval median(abs(ngs-ags))
@printval maximum(abs(ngs-ags))
@printval norm(ngs-ags)
println("-"^54)


for (ng,ag) in zip(ngs, ags)
    println(ng,"\t", ag, "\t", abs(ng-ag))
end




mean(abs(ngs - ags)) = 0.000978848665488066
median(abs(ngs - ags)) = 1.402950460915875e-6
maximum(abs(ngs - ags)) = 1.0739166540988663
norm(ngs - ags) = 2.4445928528720047
------------------------------------------------------
-162.24467941583498	-162.24467501932332	4.396511656068469e-6
-374.5191017401339	-374.5191044889514	2.7488175078360655e-6
194.17458162251933	194.17458378801933	2.16550000686766e-6
387.58110832759485	387.5811104022824	2.0746875293298217e-6
16.64479327700974	16.64479237745386	8.995558786750735e-7
-422.67902870207763	-422.67902786842865	8.33648982734303e-7
136.45297144648794	136.45297045819902	9.88288917369573e-7
217.64054423556445	217.6405456479646	1.412400138178782e-6
96.86452386858946	96.8645296266806	5.7580911487775666e-6
-103.88377461367782	-103.88377421866947	3.9500834247974126e-7
16.100123332307703	16.100121811976802	1.5203309011724286e-6
684.4245766046697	684.4245866721612	1.0067491530207917e-5
279.7696618017653	279.76966582901446	4.027249133287114e-6
-194.72