In [1]:
# Define hyper-parameters
const EPOCHS=3
const BATCHSIZE=64
const EMBEDSIZE=125
const NUMHIDDEN=100
const DROPOUT=0.2
const LR=0.001
const BETA_1=0.9
const BETA_2=0.999
const EPS=1e-08
const MAXLEN=150 #maximum size of the word sequence
const MAXFEATURES=30000 #vocabulary size
const outlabel=2 #output label in sentiment analysis

2

In [2]:
using Knet, JLD, ArgParse

[1m[36mINFO: [39m[22m[36mPrecompiling module ArgParse.
[39m

In [3]:
# Helper methods for prepocessing file
f(x::AbstractString, y::AbstractString) = map(x->convert(Int, x), readdlm("$y/$x"))
nz(t) = sum(t .== 0)
f(x::Int) = (x==0 ? 2 : x)

f (generic function with 2 methods)

In [4]:
# Convert 0 indexing Python arrays to 1 indexing Julia Arrays
function correct_data(data::Array)
    x_correct = fill!(similar(data), 0)
    for i in 1:size(data)[1]
        nzeros = nz(data[i,:])
        x_correct[i, nzeros+1:end] = data[i, 1:end-nzeros]
    end
    return x_correct
end

# Retrieve training and test data
function imdb_for_library(;datadir="RNN")
    x_train = correct_data(f("x_train.txt", datadir))
    y_train = f("y_train.txt", datadir)
    info("Train data loaded")
    info("Test data loaded")
    x_test = correct_data(f("x_test.txt", datadir))
    y_test = f("y_test.txt", datadir)
    return (x_train, x_test, y_train, y_test)
end

imdb_for_library (generic function with 1 method)

In [5]:
function minibatch(corpus, labels, batchsize)
    data, ygolds = Any[], Any[]
    indix = randperm(length(labels))
    for i in 1:batchsize:size(corpus)[1]
        j = min(i+batchsize-1, size(corpus)[1]) # To find surplus data
        batch = corpus[indix[i:j], :]
        push!(ygolds, labels[indix[i:j]])
        sequences = Any[]
        for r in 1:size(batch)[2]
            s1 = map(f, batch[:, r])
            push!(sequences, s1)
        end
        push!(data, sequences)
    end
    return data, ygolds
end

minibatch (generic function with 1 method)

In [6]:
function initmodel(embed_size, hidden_size, vocab_size, outsize; init=randn, ftype=Float32)
    model = Any[]
    f = (gpu()>=0 ? KnetArray{ftype} : Array{ftype})
    c = (init == randn ? 0.1 : 1)
    compound_init(x...) = f(c*init(x...))
    push!(model, compound_init(embed_size, vocab_size)) # embedding
    push!(model, compound_init(outsize, hidden_size)) # soft_w
    push!(model, f(zeros(outsize, 1))) # soft_b
    push!(model, compound_init(2hidden_size, hidden_size+embed_size)) # gru_w1
    push!(model, compound_init(hidden_size, hidden_size+embed_size)) # gru_w2
    return model
end

initmodel (generic function with 1 method)

In [7]:
#implement accessors
embed(model) = model[1]
soft_w(model) = model[2]
soft_b(model) = model[3]
GRU_w1(model) = model[4]
GRU_w2(model) = model[5]

GRU_w2 (generic function with 1 method)

In [8]:
# column based GRU
function gru(weight1, weight2, hidden, input)
    gates = sigm.(weight1*vcat(input, hidden))
    H = size(hidden, 1)
    z = gates[1:H, :]
    r = gates[1+H:2H, :]
    change = tanh.(weight2 * vcat(r .* hidden, input))
    hidden = (1 .- z) .* hidden + z .* change
    return hidden
end

gru (generic function with 1 method)

In [9]:
function forward(model, hidden, sequence)
    embedding, gru_w1, gru_w2  = embed(model), GRU_w1(model), GRU_w2(model)
    soft_W, soft_B = soft_w(model), soft_b(model)
    for idx in sequence
        input = embedding[:, idx]
        hidden = gru(gru_w1, gru_w2, hidden, input)
    end
    ypred = soft_W * hidden .+ soft_B  
end

forward (generic function with 1 method)

In [10]:
# To calculate the correct log-probabilities for given set of outputs
function logprob(outputs, ypred)
    nrows, _ = size(ypred)
    index = similar(outputs)
    @inbounds for i in 1:length(outputs)
        index[i] = (outputs[i]+1) + (i-1)*nrows
    end
    o1 = logp(ypred, 1)
    o2 = o1[index]
    o3 = sum(o2)
    return o3
end

logprob (generic function with 1 method)

In [11]:
function gruloss(model, state, sequence, ygold)
    hidden = zeros(similar(state[1], size(state[1])[1], length(ygold)))
    ypred = forward(model, hidden, sequence)
    total = logprob(ygold, ypred)
    count = length(ygold)
    val =  -total / count
    return val
end

gruloss (generic function with 1 method)

In [12]:
grugrad = grad(gruloss) # Knet takes care of grads :)

(::gradfun) (generic function with 1 method)

In [13]:
# optimization parameter initializer for each parameter of the model
oparams{T<:Number}(::KnetArray{T},otype; o...)=otype(;o...)
oparams{T<:Number}(::Array{T},otype; o...)=otype(;o...)
oparams(a::Associative,otype; o...)=Dict([k=>oparams(v,otype;o...) for (k,v) in a])
oparams(a,otype; o...)=map(x->oparams(x,otype;o...), a)

oparams (generic function with 4 methods)

In [14]:
# To calculate the accuracies for given test set
function accuracy(model, xtest, ytest)
    atype = (gpu()>=0 ? KnetArray{Float32} : Array{Float32})
    ntot = 0; ncorrect = 0
    batchsize= BATCHSIZE
    data, ygolds = minibatch(xtest, ytest, BATCHSIZE)
    hidden_size = NUMHIDDEN
    for (sequence, ygold) in zip(data, ygolds)
        hidden = atype(zeros(Float32, hidden_size, length(ygold)))
        ypred = forward(model, hidden, sequence)
        nrows, _ = size(ypred)
        index = similar(ygold)
        @inbounds for i in 1:length(ygold)
            index[i] = (ygold[i]+1) + (i-1)*nrows
        end
        ntot += length(index)
        ncorrect += (sum(reshape(findmax(Array((logp(ypred, 1))), 1)[2], length(ygold)) .== index))
    end
    return ncorrect/ntot
end

accuracy (generic function with 1 method)

In [15]:
function main(args=ARGS)
    s = ArgParseSettings()
    s.description = "Knet GRU based sentiment analysis model"
    s.exc_handler = ArgParse.debug_handler
    @add_arg_table s begin
        ("--epochs"; arg_type=Int; default=3; help="Number of training epochs")
        ("--optimization"; default="Adam"; help="Optimization algorithm")
        ("--hidden"; arg_type=Int; default=NUMHIDDEN; help="Number of GRU hidden units")
        ("--embed"; arg_type=Int; default=EMBEDSIZE; help="Number of embedding units")
        ("--batchsize"; arg_type=Int; default=BATCHSIZE; help="Batchsize of model")
        ("--lr"; arg_type=Float64; default=LR; help="Learning rate")
        ("--betas"; nargs='+'; default=[BETA_1, BETA_2]; help="Beta parameters of ADAM")
        ("--eps"; arg_type=Float64; default=EPS; help="Epsilon parameter of ADAM")
        ("--maxfeatures"; arg_type=Int; default=MAXFEATURES; help="Padded sequence length")
    end
    isa(args, AbstractString) && (args=split(args))
    o = parse_args(args, s; as_symbols=true)
    println(s.description)
    inoptim = eval(parse(o[:optimization]))
    println("opts=", [(k,v) for (k,v) in o]...)

    atype = (gpu()>=0 ? KnetArray : Array)

    info("reading the data...")
    x_train, x_test, y_train, y_test = imdb_for_library();
    odata, ygolds = minibatch(x_train, y_train, o[:batchsize])

    info("Initializing the model")
    model = initmodel(o[:embed], o[:hidden], o[:maxfeatures], outlabel)
    state = Any[ atype(zeros(Float32, o[:hidden], o[:batchsize])) ]

    opts = oparams(model, inoptim;lr=o[:lr], beta1=o[:betas][1], beta2=o[:betas][2], eps=o[:eps])
    info("Calculating the accuracies before train start")
    testacc = accuracy(model, x_test, y_test)
    trainacc = accuracy(model, x_train, y_train)
    println("Before training...Accuracies: train: $trainacc test: $testacc")

    info("Training started")
    for epoch=1:o[:epochs]
        @time for (sequence, ygold) in zip(odata, ygolds)
            grads = grugrad(model, state, sequence, ygold)
            update!(model, grads, opts)
        end
        testacc = accuracy(model, x_test, y_test)
        trainacc = accuracy(model, x_train, y_train)
        println("Epoch: $epoch Loss: $lval Train acc: $trainacc Test acc: $testacc")
    end
end

main (generic function with 2 methods)