In [1]:
using Iterators
using DataStructures
macro printval(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr," = ", $ee)))
end

macro pz(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr,"\t\t",typeof($ee), "\t", size($ee))))
end

In [2]:
using Pipe
push!(LOAD_PATH, "../word-embeddings2")
using WordEmbeddings
we = @pipe load_embeddings("../word-embeddings2/word_emb_data/hlbl-embeddings-scaled.EMBEDDING_SIZE=100.txt") |> WE(_...);

In [3]:
we.L=(we.L .- mean(we.L,2))./std(we.L,2)
we.L = we.L./(maximum(abs(we.L)))
@assert  map(ii->indexed_words[ind2sub(size(we.L),ii)[2]], find(abs(we.L).>1)) |> unique |> length == 0

In [4]:
function load_data(filepath)
    lines = open(filepath) do filehandle
        map(eachline(filehandle)) do line
            fields = split(line)
            (fields[1], fields[2:end])
        end
    end

    data = String[]
    labels = String[]
    
    for (hyper, hypos) in lines
        if haskey(we.word_index,hyper) # Skip ones we don't have 
            @pipe hypos |> append!(data,_)
            @pipe hyper |> fill(_,length(hypos)) |> append!(labels, _)
        end
    end 
    data, labels
end
data_str, labels_str = load_data("HyponymGen/hyponym-generation-noun-train.txt")

(String["A-bomb","ABM","BAR","BB","Bren","Colt","Dragunov","Excalibur","Exocet","GA"  …  "show","showing","sight","spectacle","splurge","staging","touchscreen","unveiling","viewing","window"],String["weapon","weapon","weapon","weapon","weapon","weapon","weapon","weapon","weapon","weapon"  …  "display","display","display","display","display","display","display","display","display","display"])

In [5]:
data = eval_word_embeddings(we,data_str, false)
labels =  eval_word_embeddings(we,labels_str)

100x271931 Array{Float64,2}:
  0.0103499     0.0103499     0.0103499    …  -0.0031903   -0.0031903 
 -0.00351679   -0.00351679   -0.00351679      -0.013945    -0.013945  
  0.0208102     0.0208102     0.0208102        0.00204613   0.00204613
 -0.0281272    -0.0281272    -0.0281272        0.0337074    0.0337074 
  0.00164561    0.00164561    0.00164561      -0.00313717  -0.00313717
  0.0149153     0.0149153     0.0149153    …  -0.0144046   -0.0144046 
  0.010475      0.010475      0.010475        -0.00364352  -0.00364352
  0.0248551     0.0248551     0.0248551       -0.0224566   -0.0224566 
  0.0117544     0.0117544     0.0117544       -0.00870084  -0.00870084
  0.0146903     0.0146903     0.0146903       -0.0104328   -0.0104328 
 -0.0335936    -0.0335936    -0.0335936    …  -0.0230342   -0.0230342 
  0.0171186     0.0171186     0.0171186        0.0180106    0.0180106 
 -0.0263888    -0.0263888    -0.0263888        0.0416328    0.0416328 
  ⋮                                        ⋱    

In [6]:
type NN 
    Ws:: Vector{Matrix{Float64}} 
    bs:: Vector{Vector{Float64}} 
end

function NN(layer_sizes::Vector{Int}, var=0.01)
    Ws = [var*randn(layer_sizes[ii], layer_sizes[ii-1]) for ii in 2:length(layer_sizes)]
    bs = [var*randn(layer_sizes[ii]) for ii in 2:length(layer_sizes)]
    NN(Ws, bs)
end


NN (constructor with 4 methods)

In [60]:
function feedfoward(nn::NN, xs::Matrix{Float64})
    as = Matrix{Float64}[ [NaN]' for _ in 1:length(nn.Ws)+1 ] 
   
    as[1] =  xs
    for ii in 1:length(nn.Ws)
        as[ii+1] = tanh(nn.Ws[ii]*as[ii] .+ nn.bs[ii])
    end 
    as[end], as
end

function backprop(nn::NN, ys::Matrix{Float64}, as::Vector{Matrix{Float64}})
    function dZ(z)
        1.0-z.^2 
    end
    Δbs = Vector{Float64}[ [NaN] for _ in 1:length(nn.Ws) ] 
    ΔWs = Matrix{Float64}[ [NaN]' for _ in 1:length(nn.Ws) ] 
    ŷs = as[end]
    δ_above = (ŷs-ys).*dZ(ŷs)
    for ii in length(nn.Ws):-1:1
        Δbs[ii] = mean(δ_above,2)[:]
        ΔWs[ii] = (δ_above * as[ii]')./size(ys,2)
        δ_above = (nn.Ws[ii]'*δ_above) .*dZ(as[ii])
    end
   
    ΔWs,Δbs
end

function loss(ŷs, ys)
    sum(0.5*(ys-ŷs).^2,2) |> mean
end

loss (generic function with 2 methods)

271931

In [49]:
# tests
function analytic_grad(nn, xs, ys)
    _,as = feedfoward(nn, xs)
    pack(backprop(nn,ys,as)...)
end
 
function numeric_grad(nn, xs, ys, ϵ=10.0^-7)
    nn_inner = deepcopy(nn)
    θ = pack(nn_inner)
    Δθ = zeros(θ)
    for ii in 1:length(θ)
        ϵᵢ = zeros(θ)
        ϵᵢ[ii]=ϵ
        θⁱ⁺ = θ + ϵᵢ
        θⁱ⁻ = θ - ϵᵢ
        
        unpack!(nn, θⁱ⁺)
        ŷs,as = feedfoward(nn, xs)
        Jⁱ⁺ = loss(nn, ŷs,ys)
        
        unpack!(nn, θⁱ⁻)
        ŷs,as = feedfoward(nn, xs)
        Jⁱ⁻=loss(nn, ŷs,ys)
        Δθ[ii] = (Jⁱ⁺-Jⁱ⁻)/(2.0*ϵ)
    end
    Δθ
    
   
end

numeric_grad (generic function with 2 methods)

In [50]:
xs = (rand(30,5) -0.5)*2
ys = xs[1:3,:]


nn = NN([size(xs,1), 15, size(ys,1)],0.1)
nanNN1 = deepcopy(nn)
nanNN2 = deepcopy(nn)

ng = numeric_grad(nn,xs,ys, 1e-10)
ag = analytic_grad(nn,xs,ys)


[ng ag ng-ag ng./ag]

513x4 Array{Float64,2}:
 -0.346558     -0.346559      6.95142e-7  0.999998
 -0.136824     -0.136824     -1.10774e-7  1.0     
 -0.184714     -0.18471      -4.73034e-6  1.00003 
  0.0635936     0.0635889     4.63259e-6  1.00007 
 -0.152409     -0.152406     -3.31025e-6  1.00002 
 -0.200755     -0.200754     -1.43047e-6  1.00001 
  0.0815037     0.0815023     1.35594e-6  1.00002 
  0.222946      0.222947     -1.22879e-6  0.999994
 -0.112887     -0.112891      3.0617e-6   0.999973
 -0.370002     -0.370007      5.07779e-6  0.999986
 -0.0275158    -0.0275138    -1.94774e-6  1.00007 
 -0.0929079    -0.0929103     2.41366e-6  0.999974
  0.236666      0.236665      1.41476e-6  1.00001 
  0.0520983     0.0520966     1.72177e-6  1.00003 
 -0.14676      -0.146756     -4.5412e-6   1.00003 
 -0.459486     -0.459485     -8.73511e-7  1.0     
 -0.145739     -0.145741      1.63319e-6  0.999989
 -0.344034     -0.344037      3.40424e-6  0.99999 
  0.0341616     0.034161      6.04995e-7  1.00002 
 -0.210

In [45]:
function unpack!(nn::NN, θ::Vector)
    endpoint=0
    for Wi in 1:length(nn.Ws)
        startpoint, endpoint =endpoint+1, endpoint+length(nn.Ws[Wi])
#        @printval startpoint
#        @printval endpoint
        
        nn.Ws[Wi][:] = θ[startpoint:endpoint]
    end
    for bi in 1:length(nn.bs)
        startpoint, endpoint =endpoint+1, endpoint+length(nn.bs[bi])
#       @printval startpoint
#       @printval endpoint
        
        nn.bs[bi][:] = θ[startpoint:endpoint]
    end
    nn
end

function pack(nn::NN)
    pack(nn.Ws, nn.bs)
end

function pack(Ws::Vector{Matrix{Float64}}, bs::Vector{Vector{Float64}})
    vcat([W[:] for W in Ws]..., [b[:] for b in bs]...)
end


pack (generic function with 2 methods)

In [63]:

xs = data
ys = labels
nn_outer = NN([size(xs,1), 400,400, size(ys,1)])
function loss_and_loss_grad!(θ::Vector, grad::Vector)   
    unpack!(nn_outer, θ)
    ŷs,as = feedfoward(nn_outer, xs)
    
    grad[:] = pack(backprop(nn_outer,ys,as)...)
    loss(ŷs,ys)
end

function loss!(θ::Vector)  
    error("loss! not defined")
end

function loss_grad!(θ::Vector, storage::Vector) 
    error("loss_grad not defined")
end



#---------------------
loss_and_loss_grad_cache = Dict{Vector{Float64},(Float64, Vector{Float64})}()
loss_and_loss_grad_cache_hits = 0
loss_and_loss_grad_cache_misses = 0
function cached_loss_and_loss_grad!(θ::Vector, grad::Vector)
    global loss_and_loss_grad_cache
    global loss_and_loss_grad_cache_hits
    global loss_and_loss_grad_cache_misses
    if haskey(loss_and_loss_grad_cache,θ)
        loss_and_loss_grad_cache_hits+=1
        err, grad[:] = loss_and_loss_grad_cache[θ]
        err
    else
        loss_and_loss_grad_cache_misses+=1
        err = loss_and_loss_grad!(θ, grad)
        loss_and_loss_grad_cache[θ] = (err, grad)
        err
    end
end


cached_loss_and_loss_grad! (generic function with 1 method)

In [64]:
using Optim #https://github.com/JuliaOpt/Optim.jl
f=DifferentiableFunction(loss!,loss_grad!,cached_loss_and_loss_grad!)
θ = pack(nn_outer)
#θ=res.minimum
res = optimize(f, θ, method=:l_bfgs, show_trace = true, store_trace = true, iterations = 300);
@printval res.f_calls 
@printval res.g_calls 
@printval res.iterations
@printval res.f_minimum
@printval res.gr_converged
@printval res.x_converged 

@printval res.trace
@printval loss_and_loss_grad_cache_hits
@printval loss_and_loss_grad_cache_misses

Iter     Function value   Gradient norm 


LoadError: interrupt
while loading In[64], in expression starting on line 5

In [None]:
using NLopt

f_call_count = 0
function tracking_loss_and_loss_grad!(θ::Vector, grad::Vector)
    global f_call_count
    f_call_count+=1
    f_val = cached_loss_and_loss_grad!(θ, grad)   
    println(f_call_count, '\t',f_val,'\t',norm(grad))
    f_val
end
#:LD_MMA, :LD_CCSAQ, :LD_LBFGS, :LD_SLSQP, :LD_VAR2, :LD_VAR1, :LD_TNEWTON_RESTART
opt = Opt(:LD_MMA, length(pack(nn_outer)))

#ftol_abs!(opt,1e-9)
maxtime!(opt, 60)
min_objective!(opt, tracking_loss_and_loss_grad!)

θ = pack(nn_outer)


(optf,optx,ret) = optimize!(opt,θ)


In [None]:
ENV["LINES"]=1000
using PyCall
@pyimport warnings
warnings.filterwarnings("ignore")

In [2]:
@pyimport nltk.corpus as nltk_corpus

In [None]:
#wordnet = pywrap(nltk_corpus.wordnet)

In [6]:
wordnet = nltk_corpus.wordnet

PyObject <WordNetCorpusReader in u'/home/wheel/oxinabox/nltk_data/corpora/wordnet'>

In [113]:
function get_all_hyponyms(all_words::Set{String})
    hyponyms = DefaultDict(String, Set{String}, ()->Set{String}())
    
    for word in all_words      
        names = Set{String}()
        for synset in wordnet[:synsets](word)
            union!(names, synset[:lemma_names]())
        end
        name = names ∩ all_words
        if length(names)==0
            continue 
        end

        hypo_names = Set{String}()
        for synset in wordnet[:synsets](word)
            for hypo_syn in synset[:hyponyms]()
                union!(hypo_names, hypo_syn[:lemma_names]())
            end
        end
        
        hypo_names = hypo_names ∩ all_words
        if length(hypo_names)==0
            continue 
        end
        
        for name in names
            union!(hyponyms[name], hypo_names)
        end
    end
    hyponyms
end
all_words = @pipe we.indexed_words |> filter(word->islower(word[1]),_) |> Set@time (@pipe all_words |> take(_,1000) |> Set |> get_all_hyponyms(_) )
all_hyponyms = @pipe all_words |> get_all_hyponyms(_) )

get_all_hyponyms (generic function with 1 method)

elapsed time: 4.320050115 seconds (18702324 bytes allocated)


DefaultDict{String,Set{String},Function} with 165 entries:
  "issue"              => Set{String}({"gore"})
  "disregard"          => Set{String}({"gore"})
  "turn_out"           => Set{String}({"gore"})
  "pull_together"      => Set{String}({"clam","mobilize"})
  "cut"                => Set{String}({"gore"})
  "hack"               => Set{String}({"gore"})
  "clothes"            => Set{String}({"activewear"})
  "assign"             => Set{String}({"impute"})
  "soiled"             => Set{String}({"mire"})
  "geld"               => Set{String}({"gore"})
  "live"               => Set{String}({"flash","flashing","experience"})
  "garment"            => Set{String}({"activewear"})
  "habilitate"         => Set{String}({"activewear"})
  "edit"               => Set{String}({"gore"})
  "fit"                => Set{String}({"stave"})
  "clothe"             => Set{String}({"activewear"})
  "dirty"              => Set{String}({"mire"})
  "bemire"             => Set{String}({"mire"})
  "supporter" 

In [110]:
methods(union!)