In [1]:
using Knet, JLD

In [2]:
const LR = 0.01
const MOMENTUM = 0.9
const BATCHSIZE = 64
const ddir = "/KUFS/scratch/okirnap/benchmark/data/CNN/CNN_data.jld"

"/KUFS/scratch/okirnap/benchmark/data/CNN/CNN_data.jld"

In [3]:
# optimization parameter creator for parameters
oparams{T<:Number}(::KnetArray{T},otype; o...)=otype(;o...)
oparams{T<:Number}(::Array{T},otype; o...)=otype(;o...)
oparams(a::Associative,otype; o...)=Dict([ k=>oparams(v,otype;o...) for (k,v) in a ])
oparams(a,otype; o...)=map(x->oparams(x,otype;o...), a)

oparams (generic function with 4 methods)

In [4]:
# apply regular convolution for a given input 
conv_bias(input, weight, bias; padding=1) = conv4(weight, input;padding=padding) .+ bias

conv_bias (generic function with 1 method)

In [5]:
# Forward pass of a model
function forward(model, input, ygold; odrop=(0.25, 0.25, 0.5))
    rel1  = relu.(conv_bias(input, model[1], model[2]))
    pool1 = pool(conv_bias(rel1, model[3], model[4]))
    rel2  = relu.(pool1) 
    drop1 = dropout(rel2, odrop[1])
    rel3  = relu.(conv_bias(drop1, model[5], model[6]))
    pool2 = pool(conv_bias(rel3, model[7], model[8]))
    rel4  = relu.(pool2)
    drop2 = dropout(rel4, odrop[2])
    flaten = mat(drop2)
    fc1 = relu.(model[9] * flaten .+ model[10])
    drop3 = dropout(fc1, odrop[3])
    ypred = model[11] * drop3 .+ model[12]
    return ypred
end

forward (generic function with 1 method)

In [6]:
# To calculate the correct logprobabilities for given set of outputs
function logprob(outputs, ypred)
    nrows, ncols = size(ypred)
    index = similar(outputs)
    @inbounds for i in 1:length(outputs)
        index[i] = (outputs[i] + 1) + (i-1)*nrows
    end
    o1 = logp(ypred, 1)
    o2 = o1[index]
    o3 = sum(o2)
    return o3
end

logprob (generic function with 1 method)

In [7]:
# To calculate the loss value for given input x
function cnnloss(model, x, ygold; odrop=(0.25, 0.25, 0.5))
    ypred = forward(model, x, ygold; odrop=(0.25, 0.25, 0.5))
    total = logprob(ygold, ypred)
    count = length(ygold)
    return -total / count
end

cnnloss (generic function with 1 method)

In [8]:
cnngrad = grad(cnnloss) # Knet takes care of grads :)

(::gradfun) (generic function with 1 method)

In [9]:
function minibatch(X, y; batchsize=BATCHSIZE)
    indix = randperm(length(y))
    data = Any[]
    for i in 1:batchsize:size(X)[4]
        j = min(i+batchsize-1, size(X)[4])
        batch = X[:, :, :, indix[i:j]]
        ygold = y[indix[i:j]]
        trial = (gpu()>=0 ? KnetArray{Float32}(batch) : batch)
        push!(data,(trial, ygold))
    end
    return data
end

minibatch (generic function with 1 method)

In [10]:
# Measures the model's performance based on given correct y labels, X input
function accuracy(model, X, y)
    data = minibatch(X, y)
    ntot = ncorrect = 0
    for (x, ygold) in data
        ypred = forward(model, x, ygold; odrop=(0, 0, 0))
        nrows, ncols = size(ypred)
        index = similar(ygold)
        @inbounds for i in 1:length(ygold)
            index[i] = (ygold[i] + 1) + (i-1)*nrows
        end
        ntot += length(index)
        ncorrect += (sum(reshape(findmax(Array((logp(ypred, 1))), 1)[2], length(ygold)) .== index))
    end
    return ncorrect /ntot   
end

accuracy (generic function with 1 method)

In [11]:
# To calculate the gradients and update the model
function train!(model, data, opts)
    @time for (x, y) in data
        grads = cnngrad(model, x, y)
        update!(model, grads, opts)
    end
end

train! (generic function with 1 method)

In [12]:
function initmodel(;init=xavier, ftype=Float32, data=nothing)
    f = (gpu()>=0 ? KnetArray{ftype} : Array{ftype})
    model = Any[]
    push!(model, f(init(3, 3, 3, 50))) # conv1
    push!(model, f(zeros(1,1, 50, 1))) # bias1
    
    push!(model, f(init(3, 3, 50, 50))) # conv2
    push!(model, f(zeros(1,1, 50,1))) # bias2
    
    push!(model, f(init(3, 3, 50, 100))) # conv3
    push!(model, f(zeros(1,1,100,1))) # bias3

    push!(model, f(init(3,3, 100, 100))) # conv4
    push!(model, f(zeros(1,1,100,1))) # bias4

    push!(model, f(init(512, 6400))) # fc1
    push!(model, f(zeros(512, 1))) # bias

    push!(model, f(init(10, 512))) #soft_w
    push!(model, f(zeros(10, 1)))  # soft_b
    if data !=nothing
        (x, y) = data
        cnngrad(model, x, y) # To help julia for compilation
    end
    return model
end

initmodel (generic function with 1 method)

In [13]:
function main()
    x_train2, x_test2, y_train, y_test =
            JLD.load(ddir, "x_train", "x_test", "y_train", "y_test")
    x_train = permutedims(x_train2, [3,4,2,1])
    x_test  = permutedims(x_test2, [3,4,2,1])
    data = minibatch(x_train, y_train)
    model = initmodel(;data=data[1])
    opts = oparams(model, Momentum;lr=LR, gclip=0, gamma=MOMENTUM)
    for epoch =1:10
        shuffle!(data)
        train!(model, data, opts)
        acc1 = accuracy(model, x_train, y_train)
        acc2 = accuracy(model, x_test, y_test)
        println(":epoch $epoch :Train $acc1 :Test $acc2")
    end
end

main (generic function with 1 method)

In [None]:
main()

 17.842771 seconds (2.40 M allocations: 102.175 MiB, 10.45% gc time)
:epoch 1 :Train 0.44912 :Test 0.451
 16.849041 seconds (2.22 M allocations: 91.725 MiB, 6.56% gc time)
:epoch 2 :Train 0.60604 :Test 0.5898
 17.478889 seconds (2.20 M allocations: 91.103 MiB, 11.51% gc time)
:epoch 3 :Train 0.69682 :Test 0.6695
 20.479341 seconds (2.21 M allocations: 91.235 MiB, 22.87% gc time)
:epoch 4 :Train 0.74088 :Test 0.7137
 21.005613 seconds (2.21 M allocations: 91.217 MiB, 24.85% gc time)
:epoch 5 :Train 0.78672 :Test 0.7345
 19.847084 seconds (2.21 M allocations: 91.232 MiB, 21.01% gc time)
:epoch 6 :Train 0.81318 :Test 0.7425
 23.739767 seconds (2.21 M allocations: 91.185 MiB, 32.54% gc time)
:epoch 7 :Train 0.84502 :Test 0.7558