In [1]:
using Iterators
using DataStructures
macro printval(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr," = ", $ee)))
end

macro pz(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr,"\t\t",typeof($ee), "\t", size($ee))))
end

In [None]:
using Pipe
push!(LOAD_PATH, "../word-embeddings2")
using WordEmbeddings
we = @pipe load_word2vec_embeddings("../../Resources/example_code/word2vec/GoogleNews-vectors-negative300.bin") |> WE(_...);

In [2]:
function load_data(filepath)
    lines = open(filepath) do filehandle
        map(eachline(filehandle)) do line
            fields = split(line)
            (fields[1], fields[2:end])
        end
    end

    data = String[]
    labels = String[]
    
    for (hyper, hypos) in lines
        if haskey(we.word_index,hyper) # Skip ones we don't have 
            hypos = @pipe hypos |> filter(w->haskey(we.word_index,w), _)
            append!(data,hypos)
            @pipe hyper |> fill(_,length(hypos)) |> append!(labels, _)
        end
    end 
    data, labels
end
data_str, labels_str = load_data("HyponymGen/hyponym-generation-noun-train.txt")

LoadError: @pipe not defined
while loading In[2], in expression starting on line 21

In [3]:
data = eval_word_embeddings(we,data_str)
labels =  eval_word_embeddings(we,labels_str)

LoadError: eval_word_embeddings not defined
while loading In[3], in expression starting on line 1

In [142]:
type NN 
    Ws:: Vector{Matrix{Float64}} 
    bs:: Vector{Vector{Float64}} 
end

function NN(layer_sizes::Vector{Int}, var=0.01)
    Ws = [var*randn(layer_sizes[ii], layer_sizes[ii-1]) for ii in 2:length(layer_sizes)]
    bs = [var*randn(layer_sizes[ii]) for ii in 2:length(layer_sizes)]
    NN(Ws, bs)
end


NN (constructor with 4 methods)

In [143]:
function feedfoward(nn::NN, xs::Matrix{Float64})
    as = Matrix{Float64}[ [NaN]' for _ in 1:length(nn.Ws)+1 ] 
   
    as[1] =  xs
    for ii in 1:length(nn.Ws)
        as[ii+1] = tanh(nn.Ws[ii]*as[ii] .+ nn.bs[ii])
    end 
    as[end], as
end

function backprop(nn::NN, ys::Matrix{Float64}, as::Vector{Matrix{Float64}}, loss_diff= (ŷs,ys)->(ŷs-ys))
    function dZ(z)
        1.0-z.^2 
    end
    Δbs = Vector{Float64}[ [NaN] for _ in 1:length(nn.Ws) ] 
    ΔWs = Matrix{Float64}[ [NaN]' for _ in 1:length(nn.Ws) ] 
    ŷs = as[end]
    δ_above = loss_diff(ŷs,ys).*dZ(ŷs)
    for ii in length(nn.Ws):-1:1
        Δbs[ii] = mean(δ_above,2)[:]
        ΔWs[ii] = (δ_above * as[ii]')./size(ys,2)
        δ_above = (nn.Ws[ii]'*δ_above) .*dZ(as[ii])
    end
   
    ΔWs,Δbs
end

function loss(ŷs, ys)
    sum(0.5*(ys-ŷs).^2,2) |> mean
end

loss (generic function with 1 method)

In [144]:
colnorm(A) = [norm(A[:,ii]) for ii in 1:size(A,2)]
coldot(A,B) = [A[:,ii]⋅B[:,ii] for ii in 1:size(A,2)]

function cosine_sim(ys,ts)
    coldot(ys,ts)./(colnorm(ys).*colnorm(ts))
end

function loss_cosine(ŷs, ys)
    0.5(1.0-cosine_sim(ŷs, ys)).^2 |> mean
end

function loss_diff_cosine(ys, ts)
    df = similar(ys)
    for jj in size(df,2)
        tjs = ts[:,jj]
        yjs = ys[:,jj]
        
        normprod = norm(tjs)*norm(yjs)
        df[:,jj] = tjs./normprod + abs(yjs).*norm(tjs)./(normprod.^3)
    end
    
    df.*(1.0-cosine_sim(ys,ts))'
end

loss_diff_cosine (generic function with 1 method)

In [145]:
function unpack!(nn::NN, θ::Vector)
    endpoint=0
    for Wi in 1:length(nn.Ws)
        startpoint, endpoint =endpoint+1, endpoint+length(nn.Ws[Wi])
#        @printval startpoint
#        @printval endpoint
        
        nn.Ws[Wi][:] = θ[startpoint:endpoint]
    end
    for bi in 1:length(nn.bs)
        startpoint, endpoint =endpoint+1, endpoint+length(nn.bs[bi])
#       @printval startpoint
#       @printval endpoint
        
        nn.bs[bi][:] = θ[startpoint:endpoint]
    end
    nn
end

function pack(nn::NN)
    pack(nn.Ws, nn.bs)
end

function pack(Ws::Vector{Matrix{Float64}}, bs::Vector{Vector{Float64}})
    vcat([W[:] for W in Ws]..., [b[:] for b in bs]...)
end


pack (generic function with 2 methods)

In [146]:
xs = data
ys = labels
nn_outer = NN([size(xs,1), 1000, size(ys,1)])

function loss_and_loss_grad!(θ::Vector, grad::Vector)   
    unpack!(nn_outer, θ)
    ŷs,as = feedfoward(nn_outer, xs)
    
    grad[:] = pack(backprop(nn_outer,ys,as)...)
    loss(ŷs,ys)
end

function loss!(θ::Vector)  
    error("loss! not defined")
end

function loss_grad!(θ::Vector, storage::Vector) 
    error("loss_grad not defined")
end



#---------------------
loss_and_loss_grad_cache = Dict{Vector{Float64},(Float64, Vector{Float64})}()
loss_and_loss_grad_cache_hits = 0
loss_and_loss_grad_cache_misses = 0
function cached_loss_and_loss_grad!(θ::Vector, grad::Vector)
    global loss_and_loss_grad_cache
    global loss_and_loss_grad_cache_hits
    global loss_and_loss_grad_cache_misses
    if haskey(loss_and_loss_grad_cache,θ)
        loss_and_loss_grad_cache_hits+=1
        err, grad[:] = loss_and_loss_grad_cache[θ]
        err
    else
        loss_and_loss_grad_cache_misses+=1
        err = loss_and_loss_grad!(θ, grad)
        loss_and_loss_grad_cache[θ] = (err, grad)
        err
    end
end


cached_loss_and_loss_grad! (generic function with 1 method)

In [None]:
using Optim #https://github.com/JuliaOpt/Optim.jl
f=DifferentiableFunction(loss!,loss_grad!,cached_loss_and_loss_grad!)
θ = pack(nn_outer)
#θ=res.minimum
res = optimize(f, θ, method=:l_bfgs, show_trace = true, store_trace = true, iterations = 100);
@printval res.f_calls 
@printval res.g_calls 
@printval res.f_minimum
@printval res.gr_converged
@printval res.iterations
@printval res.x_converged 

@printval res.trace
@printval loss_and_loss_grad_cache_hits
@printval loss_and_loss_grad_cache_misses

Iter     Function value   Gradient norm 
     0     8.359377e+02     2.865633e-03
     1     5.729351e+02     3.234360e-03
     2     5.729351e+02     1.499074e-02
     3     5.706442e+02     4.146992e-03
     4     5.659163e+02     5.516405e-03
     5     5.655452e+02     2.663534e-03
     6     5.653437e+02     1.284744e-03
     7     5.635660e+02     3.933295e-03
     8     5.635601e+02     3.886766e-03
     9     5.635601e+02     5.011067e-04
    10     5.632409e+02     2.514877e-04
    11     5.631966e+02     5.213797e-04
    12     5.631146e+02     1.303561e-03
    13     5.630115e+02     1.784823e-03
    14     5.627335e+02     2.511690e-03
    15     5.625576e+02     9.455766e-04
    16     5.624943e+02     6.813472e-04
    17     5.624817e+02     3.523755e-04
    18     5.624662e+02     5.420270e-04
    19     5.623887e+02     1.272075e-03
    20     5.623511e+02     3.060274e-04
    21     5.623469e+02     2.656318e-04
    22     5.623410e+02     1.462295e-04
 

In [10]:
using NLopt

f_call_count = 0
function tracking_loss_and_loss_grad!(θ::Vector, grad::Vector)
    global f_call_count
    f_call_count+=1
    f_val = cached_loss_and_loss_grad!(θ, grad)   
    println(f_call_count, '\t',f_val,'\t',norm(grad))
    f_val
end
#:LD_MMA, :LD_CCSAQ, :LD_LBFGS, :LD_SLSQP, :LD_VAR2, :LD_VAR1, :LD_TNEWTON_RESTART
opt = Opt(:LD_LBFGS, length(pack(nn_outer)))

#ftol_abs!(opt,1e-9)
maxtime!(opt, 60*60*8)
min_objective!(opt, tracking_loss_and_loss_grad!)

θ = pack(nn_outer)


(optf,optx,ret) = optimize!(opt,θ)


1	29.54911331691178	0.1409027352625564
2	18.964675769427814	0.028419355880973256
3	18.519777410449567	0.00029673337647217064
4	18.51968846209619	0.00027067885898119325
5	18.519680472890492	0.0002709751727728131
6	18.5196718749467	0.0002719342875089056
7	18.51901295060592	0.0006561389995733447
8	18.51759783990582	0.0013718422654776835
9	18.51968846209619	0.0013718422654776835
10	18.51754852167106	0.0013844987203620648
11	29.54911331691178	0.0013844987203620648
12	18.519680472890492	0.0013844987203620648
13	18.517546527833552	0.0013848112496424945
14	18.51968846209619	0.0013848112496424945
15	18.519777410449567	0.0013848112496424945
16	18.519680472890492	0.0013848112496424945
17	18.517546484790998	0.0013848160417688582
18	18.474128506859707	0.007673183646266187
19	18.455765235178475	0.008849470746923343
20	18.364550783939734	0.010713835009363144
21	18.321668018436913	0.012773069658331328
22	18.263219363901154	0.005167691442656909
23	18.248608788733634	0.00253435202715734
24	18.2452689324

(18.184618799025195,[-0.00690174,-0.00199913,0.00864181,0.0152608,-0.012329,0.00434504,-0.0115269,0.017859,0.00977552,-0.00260883  …  0.00596257,0.0133326,-0.000748242,-0.00690143,0.00703847,-0.00447624,0.000454072,-0.00328175,-0.00111402,0.0155761],:SUCCESS)

In [12]:
unpack!(nn_outer, optx);

In [148]:
subset =  100:120
xos = data[:,subset]
yos = labels[:,subset]

ŷos,_ = feedfoward(nn_outer,xos)

(
50x21 Array{Float64,2}:
 -0.0970008  -0.0975187  -0.0957146  …  -0.099154   -0.0972537  -0.097736 
  0.0119272   0.0117871   0.0148987      0.0137214   0.010768    0.0128468
  0.0294915   0.0302096   0.0318853      0.0299919   0.0304994   0.0295272
  0.0500032   0.0506914   0.0503306      0.0508832   0.0508426   0.0512729
  0.0816521   0.0813992   0.0810905      0.0798582   0.0818979   0.0792668
 -0.0468442  -0.0481921  -0.0467234  …  -0.0453762  -0.0455229  -0.0453223
 -0.0341508  -0.035265   -0.0305738     -0.0358552  -0.0358422  -0.0348147
 -0.114272   -0.11484    -0.111599      -0.114455   -0.114552   -0.113808 
 -0.0586581  -0.0577075  -0.059309      -0.0588052  -0.0549251  -0.0595228
 -0.0284405  -0.0279953  -0.0222363     -0.0269618  -0.0242827  -0.0253828
 -0.0223799  -0.0226841  -0.02299    …  -0.0224484  -0.0240068  -0.0225635
  0.0329318   0.0329298   0.0321481      0.0337752   0.0330276   0.0346081
 -0.117781   -0.118534   -0.118035      -0.118167   -0.117037   -0.117657 

In [149]:
show_bests(we, ŷos)

20x42 Array{Any,2}:
 "package"         0.83  "package"         …  0.83  "package"         0.82
 "operation"       0.81  "operation"          0.81  "operation"       0.81
 "inspection"      0.8   "inspection"         0.8   "inspection"      0.8 
 "structure"       0.79  "structure"          0.79  "structure"       0.79
 "language"        0.79  "carriage"           0.79  "language"        0.79
 "carriage"        0.79  "language"        …  0.78  "carriage"        0.78
 "appraisal"       0.78  "appraisal"          0.78  "administration"  0.78
 "concession"      0.78  "concession"         0.78  "scale"           0.78
 "scale"           0.78  "environment"        0.78  "environment"     0.78
 "environment"     0.78  "scale"              0.78  "appraisal"       0.78
 "administration"  0.78  "administration"  …  0.78  "concession"      0.78
 "network"         0.78  "study"              0.78  "position"        0.78
 "contract"        0.78  "network"            0.77  "choice"          0.78
 "stu

In [150]:
show_bests(we, yos)

20x42 Array{Any,2}:
 "recess"         1.0   "recess"         …  1.0   "engagement"         1.0 
 "handover"       0.74  "handover"          0.75  "integrity"          0.75
 "holiday."       0.73  "holiday."          0.74  "dissolution"        0.74
 "peacemaking"    0.72  "peacemaking"       0.71  "co-operation"       0.71
 "revolt"         0.71  "revolt"            0.71  "justice"            0.71
 "conclave"       0.7   "conclave"       …  0.71  "apology"            0.71
 "triumph"        0.69  "triumph"           0.7   "execution"          0.7 
 "stoppage."      0.69  "stoppage."         0.7   "industrialisation"  0.7 
 "date"           0.69  "date"              0.7   "condemnation"       0.7 
 "rout"           0.68  "rout"              0.69  "challenge"          0.69
 "replay"         0.68  "replay"         …  0.69  "journalism"         0.69
 "blitz"          0.68  "blitz"             0.69  "participation"      0.69
 "congress"       0.68  "congress"          0.69  "isolation"       