In [40]:
using Compat
using Docile
using Iterators
using Pipe
using Devectorize

macro printval(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr," = ", $ee)))
end

macro pz(ee)
    ee_expr = @sprintf "%s" string(ee)
    esc(:(println($ee_expr,"\t\t",typeof($ee), "\t", size($ee))))
end

In [2]:
function unzip(xs)
    [zip(xs...)...]
end

unzip (generic function with 1 method)

In [3]:
push!(LOAD_PATH, map(x->"../"*x, filter(fn-> !(contains(fn,".")),readdir("..")))...)


11-element Array{Union(ASCIIString,UTF8String),1}:
 "/root/buildFromSource/julia/usr/local/share/julia/site/v0.3"
 "/root/buildFromSource/julia/usr/share/julia/site/v0.3"      
 "../Corpus"                                                  
 "../doc2vec"                                                 
 "../Models"                                                  
 "../Optimisation"                                            
 "../recursive_embeddings"                                    
 "../summaristation"                                          
 "../tools"                                                   
 "../util"                                                    
 "../word-embedding3"                                         

In [4]:
using WordEmbeddings

In [5]:
training = open("../Corpus/serialised/opinosis_train_dev_plain.jsz","r") do fs
    deserialize(fs)
end
@pz training

training		Array{Array{String,1},1}	(6097,)


In [6]:
training=training[1:1000]

1000-element Array{Array{String,1},1}:
 String["being","able","to","change","the","*UNKNOWN*","sizes","is","awesome","!"]                                                                                            
 String["for","whatever","reason",",","*UNKNOWN*","decided","to","make","the","*UNKNOWN*","on","the","home","screen","than","on","the","*UNKNOWN*","."]                       
 String["i","found","myself","constantly","changing","the","angle","of","the","body"  …  "up","and","down","and","the","distance","away","from","me","."]                     
 String["i","was","an","avid","reader","but","increasing","age","has","made"  …  "and","very","light","weight","has","made","reading","fun","again","."]                      
 String["what","'s","more",",","it","'s","easy","to","change","*UNKNOWN*","size","."]                                                                                         
 String["the","*UNKNOWN*","does","not","recognize","page","numbers",",","since","they"

In [7]:

function pad{S<:String}(sent::Vector{S}, padded_length, pad_word="*STARTPAD*")
    if length(sent) <= padded_length
        ret =  fill(pad_word,padded_length)
        ret[end-length(sent)+1:end] = sent
        ret
    else
        sent
    end
end

pad (generic function with 2 methods)

In [8]:

import WordEmbeddings.WE
function WE(N::DataType,S::DataType, embedding_width::Int)
    L=Array(N,(embedding_width,0))
    word_index=Dict{S,Int}()
    indexed_words=S[]
    WE(L,word_index,indexed_words)
end



@doc "Gets the word index, or creates one if it doesn't already exist" ->
function get_word_index!{N,S, S2}(we::WE{N,S}, word::S2, word_varience = 0.01)
    if (word in keys(we.word_index))
        we.word_index[word]
    else
        index = length(we.indexed_words)+1
        we.word_index[word]=index
        push!(we.indexed_words,word)
        
        embedding = convert(Vector{N},word_varience.*randn(size(we.L,1)))
        we.L = hcat(we.L,embedding)
        index
    end
end

function add_all_words!{N,S}(we::WE{N,S}, words::Vector{S}, word_varience=0.01)
    for word in words
        get_word_index!(we, word, word_varience)
    end
    we
end
function add_all_words!{N,S}(we::WE{N,S}, paras::Vector{Vector{S}}, word_varience=0.01)
    for para in paras
        add_all_words!(we, para, word_varience)
    end
    we
end

add_all_words! (generic function with 4 methods)

In [9]:
type PVDM{N<:Number, S<:AbstractString}
    we::WE
    pe::WE #use a word embedder for Paragraphs too
    
    W::AbstractMatrix{N}
    b::AbstractVector{N}

    window_length::Int
    varience::N
end

function PVDM{N,S}(we::WE{N,S}, window_length::Int, varience=0.001)
    
    emb_width,n_words = size(we.L)
    concat_layer_width = emb_width*(window_length+1)
    const W = convert(Matrix{N}, varience*randn(n_words,concat_layer_width))
    const b = convert(Vector{N}, varience*randn(n_words))
    
    pe = WE(N,Vector{S},emb_width)
    
    PVDM{N,S}( we, pe, W,b, window_length, varience)
end
    
    

PVDM{N<:Number,S<:String} (constructor with 2 methods)

In [109]:
@doc "This assumes the number of works and paragraphs known remains constant" ->
function unpack!(pvdm::PVDM, θ::Vector)
    start=0
    item=pvdm.we.L
    len_total=length(item)
    pvdm.we.L = @pipe θ[1+start:start+len_total]|>reshape(_,size(item)...)
    
    start+=length(item)
    item=pvdm.pe.L
    len_total+=length(item)
    @printval start
    @printval len_total
    @pz item
    pvdm.pe.L = @pipe θ[1+start:start+len_total]|>reshape(_,size(item)...)
    
    start+=length(item)
    item=pvdm.W 
    len_total+=length(item)
    pvdm.W = @pipe θ[1+start:start+len_total]|>reshape(_,size(item)...)
    
    start+=length(item)
    item=pvdm.b 
    len_total+=length(item)
    pvdm.b = @pipe θ[1+start:start+len_total]
    
    pvdm
end


@doc "This assumes the number of works and paragraphs known remains constant" ->
function pack{N}(L::AbstractMatrix{N}, D::AbstractMatrix{N}, W::AbstractMatrix{N},b::AbstractVector{N})
    vcat(vec(L),vec(D), vec(W),b)
end

@doc "This assumes the number of works and paragraphs known remains constant" ->
function pack(pvdm::PVDM)
    pack(pvdm.we.L, pvdm.pe.L, pvdm.W, pvdm.b)
end


pack (generic function with 2 methods)

In [114]:
@pz pvdm2.pe.L
length(pvdm2.pe.L)

pvdm2.pe.L		Array{Float32,2}	(200,971)


194200

In [113]:
#pvdm2= deepcopy(pvdm)
θ=pack(pvdm)
unpack!(pvdm,θ)

start = 449800
len_total = 644000
item		Array{Float32,2}	(200,971)


LoadError: DimensionMismatch("new dimensions (200,971) must be consistent with array size 644000")
while loading In[113], in expression starting on line 3

In [10]:
const WINDOW_LEN = 8 
training = Vector{String}[pad(para, WINDOW_LEN+1) for para in training]

we_outer = WE(Float32,String, 200)
add_all_words!(we_outer, training)
pvdm = PVDM(we_outer, WINDOW_LEN);
we_outer=0

0

In [11]:
@doc """gets the training cases as vector of (paraIndex, [word_indexes], label_word_index),
cycling by the window length.
Adds the paragraph if it does not already have an index
""" ->
function get_para_training_cases!{S<:String}(pvdm::PVDM, para::Vector{S})
    para_ind = get_word_index!(pvdm.pe, para)
    
    Task() do 
        @assert length(para)>=pvdm.window_length+1
        for offset in 0:length(para)-(pvdm.window_length+1)
            window_iis = [1:pvdm.window_length;]+offset
            label_ii = pvdm.window_length+1+offset
            
            window_words = para[window_iis]
            label_word = para[label_ii]
                        
            windows_indexes = map(word->get_word_index(pvdm.we, word), window_words)
            label_index = get_word_index(pvdm.we, label_word)
            
            produce(Int64[para_ind, windows_indexes..., label_index])
        end
    end
    
end

get_para_training_cases! (generic function with 1 method)

In [12]:
training_indexes = @pipe chain(map(para -> get_para_training_cases!(pvdm, para), training)...) |> hcat(_...)
para_indexes_o = training_indexes[1,:] |> vec
window_indexes_o = training_indexes[2:end-1,:] 
label_indexes_o = training_indexes[end,:] |> vec;

In [13]:
function onehot{I<:Int}(indexes::Vector{I}, dim::I, N::DataType)
    ys = zeros(N,(dim,length(indexes)))
    for ii in indexes
        @inbounds ys[ii,indexes[ii]]=one(N)
    end
    ys
end

function onehot{I,N,S}(indexes::Vector{I}, pvdm::PVDM{N,S})
    onehot(label_indexes_o, length(pvdm.we.indexed_words), N)
end

onehot (generic function with 2 methods)

In [14]:
function get_input_layer(pvdm::PVDM, para_index::Int, window_indexes::Vector{Int})
    @inbounds [pvdm.pe.L[:,para_index], vec(pvdm.we.L[:,window_indexes])]
end 

function get_input_layers{N,S, I<:Int}(pvdm::PVDM{N,S}, para_indexes::Vector{I}, window_indexeses::Matrix{I})
    const emb_width = size(pvdm.we.L,1)
    const n_training = length(para_indexes)
    
    xs = Array(N,(emb_width * (pvdm.window_length+1),n_training))
    @inbounds xs[1:emb_width,:] = pvdm.pe.L[:,para_indexes]
    for training_case in 1:n_training
        @inbounds const window_indexes = window_indexeses[:,training_case]
        @inbounds xs[emb_width+1:end,training_case] = vec(pvdm.we.L[:,window_indexes])
    end
    xs
end

get_input_layers (generic function with 1 method)

In [83]:
function softmax(zs)
    (1./sum(exp(zs),1)).*exp(zs)
end

function feedforward{N,S, I<:Int}(pvdm::PVDM{N,S}, para_indexes::Vector{I}, window_indexeses::Matrix{I})
    xs = get_input_layers(pvdm, para_indexes, window_indexeses)
    
    
    #Speed optimised version of `zs = pvdm.W*xs .+ pvdm.b`
    zs = pvdm.W*xs 
    const n_training = length(para_indexes)
    for ii in 1:n_training
        @inbounds zs[:,ii]+= pvdm.b
    end
    ŷs = softmax(zs)
    ŷs, xs
end

feedforward (generic function with 1 method)

In [79]:
function backprop{N,S,I}(pvdm::PVDM{N,S}, ys::Matrix{N}, ŷs::Matrix{N}, xs::Matrix{N} , para_indexes::Vector{I}, window_indexeses::Matrix{I} )
    const emb_width = size(pvdm.we.L,1)
    const n_training = length(para_indexes)
    const window_len = pvdm.window_length 
    #Δb = zeros(pvdm.b)
    #ΔW = zeros(pvdm.W)
    ΔL = zeros(pvdm.we.L) #Word Vector Changes
    ΔD = zeros(pvdm.pe.L) #Paragraph Vector Changes
     
        
    δ_top_s = ŷs.-ys

    Δb = sum(δ_top_s,2) |> vec
    ΔW = (δ_top_s * xs')
    δ_input_s= (pvdm.W'*δ_top_s) #the activation function of the layer below dxs=d(1*D[ii];L[iis]) =1
    
    #Paragraph vector Error
    for ii in 1:n_training #Add sequentially, reather than via in a += as that would only allow one add for repreased index
        @inbounds ΔD[:,para_indexes[ii]] += δ_input_s[1:emb_width,ii]
    end
    
    #word vectors
    for ii in 1:n_training
        for ww in 1:window_len
            const offset=ww*emb_width
            @inbounds ΔL[:,window_indexeses[ww,ii]]+=δ_input_s[offset+1:offset+emb_width, ii]
        end
    end
    
   
    ΔL, ΔD, ΔW,Δb
end

backprop (generic function with 1 method)

In [80]:
ŷs,xs = feedforward(pvdm, para_indexes_o, window_indexes_o)
ys=onehot(label_indexes_o, pvdm)
ΔW,Δb, ΔL, ΔD = backprop(pvdm, ys, ŷs,xs, para_indexes_o, window_indexes_o)


test (generic function with 1 method)

In [19]:
@pz δ_input_s
emb_width = size(pvdm.we.L,1)
@pz δ_input_s[1:emb_width,1]


LoadError: δ_input_s not defined
while loading In[19], in expression starting on line 1