## Import Labels

In [None]:
using Colors
using Images
using DataFrames
using HDF5
using MLBase

In [None]:
imageSize = (20,20,1) # 20 x 20 pixel x 1 color

#Set location of data files, folders
path = "data"

#Read information about training data , IDs.
labelsInfo = readtable("$(path)/trainLabels.csv")

println("Size of data set: ", size(labelsInfo))

labelsInfoTest = readtable("$(path)/sampleSubmission.csv")

println("Size of test data set: ", size(labelsInfoTest))

In [None]:
# We need labels from 0 to N-1 for Mocha
labs = unique(labelsInfo[:Class])
dic = Dict(zip(collect(labs), 0:length(labs)-1))
create_labs(classes) = map(k -> dic[k], classes)
labelsInfo[:Labels] = create_labs(labelsInfo[:Class])
labelsInfoTest[:Labels] = create_labs(labelsInfoTest[:Class])
head(labelsInfo)

## Split on train and validation sets 

In [None]:
srand(12345)
n = length(labelsInfo[:ID])
trainSet = shuffle(1:n .> n*0.25)
labelsInfoTrain = labelsInfo[trainSet,:]

# Hold out some data from validation set
srand(12345)
n = length(labelsInfo[!trainSet,:ID])
validationSet = shuffle(1:n .> n*0.5)
labelsInfoValid = labelsInfo[!trainSet,:][validationSet,:]
labelsInfoHoldout = labelsInfo[!trainSet,:][!validationSet,:]

println("Size of the train data set: ", size(labelsInfoTrain))
println("Size of the validation data set: ", size(labelsInfoValid))
println("Size of the holdout data set: ", size(labelsInfoHoldout))

## Read Images from the filesystem

In [None]:
function readImages(typeData, labelsInfo, imageSize, path)
    w, h, c = imageSize
    n = length(labelsInfo[:ID])
    x = float32(zeros(w,h,c,n))
    for (index, idImage) in enumerate(labelsInfo[:ID]) 
        #Read image file 
        nameFile = "$(path)/$(typeData)Resized/$(idImage).Bmp"
        img = imread(nameFile)

        #Convert img to float values 
        img = convert(Array{Gray}, img)
        img = convert(Array{Float32}, img)
        #img = separate(convert(Image{RGB}, img))
        #img = convert(Array, img)
        #img = img[:,:,1:3]
        
        # Normalize data
        img = (img - mean(img)) / std(img)
        x[:,:,:,index] = reshape(img, w, h, c)
        
    end 
    
    return x
end

In [None]:
trainImages = readImages("train", labelsInfoTrain, imageSize, path)
println("Size of the train images set: ", size(trainImages))

validImages = readImages("train", labelsInfoValid, imageSize, path)
println("Size of the validation images set: ", size(validImages))

holdoutImages = readImages("train", labelsInfoHoldout, imageSize, path)
println("Size of the holdout images set: ", size(validImages))

testImages = readImages("test", labelsInfoTest, imageSize, path)
println("Size of the test images set: ", size(testImages))

## Convert images into HDF5 files

In [None]:
# Import data to HDF5 format
function convert_to_HDF5(typeData, imageSet, labelsInfo, path)
    w, h, c, n = size(imageSet)
    
    HDF5.h5open("$(path)/$(typeData).hdf5", "w") do h5
        dset_data = d_create(h5, "data", datatype(Float32), dataspace(w, h, c, n))
        dset_data[:,:,:,:] =  imageSet
        
        dset_label = d_create(h5, "label", datatype(Float32), dataspace(1,n))
        dset_label[1,:] = labelsInfo[:Labels]
    end

end

convert_to_HDF5("train", trainImages, labelsInfoTrain, path)
convert_to_HDF5("validation", validImages, labelsInfoValid, path)
convert_to_HDF5("holdout", holdoutImages, labelsInfoValid, path)
convert_to_HDF5("test", testImages, labelsInfoTest, path)

run(`echo $(path)/train.hdf5` |> "$(path)/train.txt")
run(`echo $(path)/validation.hdf5` |> "$(path)/validation.txt")
run(`echo $(path)/holdout.hdf5` |> "$(path)/holdout.txt")
run(`echo $(path)/test.hdf5` |> "$(path)/test.txt")

## Neural Network main params

In [None]:
use_gpu = false
use_native = false

if use_gpu
    ENV["MOCHA_USE_CUDA"] = "true"
elseif use_native
    ENV["MOCHA_USE_NATIVE_EXT"] = "true"
    ENV["OMP_NUM_THREADS"] = 6
else 
    blas_set_num_threads(6) 
end

using Mocha
srand(333)

In [None]:
TRAIN_BATCH = 100
EPOCH = int(round(size(trainImages)[4] / TRAIN_BATCH))
MAXITER = 180*EPOCH

NCLASSES = length(unique(labelsInfoTrain[:Class]))
nunits_fc1 = ("nunits_fc1", [2400])
nunits_fc2 = ("nunits_fc2", [1200])
base_mom = ("base_mom", [0.95])
base_lr = ("base_lr", [0.01])
regu_coef = ("regu_coef", [0.0])
conv1_nfilt = ("conv1_nfilt", [96])
conv2_nfilt = ("conv2_nfilt", [128])

n = length(nunits_fc1[2]) * length(nunits_fc2[2]) * length(base_mom[2]) * length(base_lr[2]) * length(regu_coef[2]) * length(conv1_nfilt[2]) * length(conv2_nfilt[2])
println("Grid search will run ", n, " times")
println("Solver will run for ", MAXITER, " iterations")

### Setup Neural Network configuration

In [None]:
function configure_training(nunits_fc1, nunits_fc2, conv1_nfilt, conv2_nfilt)
    data_layer  = AsyncHDF5DataLayer(name="train-data", source="$(path)/train.txt", 
                                    batch_size=TRAIN_BATCH)
    conv_layer  = ConvolutionLayer(name="conv1", n_filter=conv1_nfilt, kernel=(5,5), bottoms=[:data], tops=[:conv])
    pool_layer  = PoolingLayer(name="pool1", kernel=(2,2), stride=(2,2), bottoms=[:conv], tops=[:pool])
    conv2_layer = ConvolutionLayer(name="conv2", n_filter=conv2_nfilt, kernel=(5,5), bottoms=[:pool], tops=[:conv2])
    pool2_layer = PoolingLayer(name="pool2", kernel=(2,2), stride=(2,2), bottoms=[:conv2], tops=[:pool2])
    fc1_layer   = InnerProductLayer(name="ip1", output_dim=nunits_fc1, neuron=Neurons.ReLU(), bottoms=[:pool2], tops=[:ip1])
    fc2_layer   = InnerProductLayer(name="ip2", output_dim=nunits_fc2, neuron=Neurons.ReLU(), bottoms=[:ip1], tops=[:ip2])
    fc3_layer   = InnerProductLayer(name="ip3", output_dim=NCLASSES, bottoms=[:ip2], tops=[:out])
    loss_layer  = SoftmaxLossLayer(name="loss", bottoms=[:out,:label])
    
    backend = use_gpu ? GPUBackend() : CPUBackend()
    init(backend)

    # setup dropout for the different layers
    # we use 20% dropout on the inputs and 50% dropout in the hidden layers
    # as these values were previously found to be good defaults
    drop_input = DropoutLayer(name="drop_in", bottoms=[:data], ratio=0.1)
    drop_conv1 = DropoutLayer(name="drop_conv1", bottoms=[:pool], ratio=0.2)
    drop_conv2 = DropoutLayer(name="drop_conv2", bottoms=[:pool2], ratio=0.2)
    drop_ip1 = DropoutLayer(name="drop_ip1", bottoms=[:ip1], ratio=0.5)
    drop_ip2= DropoutLayer(name="drop_ip2", bottoms=[:ip2], ratio=0.5)

    common_layers = [conv_layer, pool_layer, conv2_layer, pool2_layer, fc1_layer, fc2_layer, fc3_layer]
    drop_layers = [drop_input, drop_conv1, drop_conv2, drop_ip1, drop_ip2]
    # put training net together, note that the correct ordering will automatically be established by the constructor
    net = Net("SVHN-train", backend, [data_layer, common_layers..., drop_layers..., loss_layer])
    
    # Configure accuracy check on validation set during training process
    full_data_layer = AsyncHDF5DataLayer(
        name="train-full-data", 
        source="$(path)/train.txt", 
        batch_size=size(testImages)[4])
    full_acc_layer = AccuracyLayer(name="full_train", bottoms=[:out, :label], report_error=true)
    train_net = Net("SVHN-train-prediction", backend, [full_data_layer, common_layers..., full_acc_layer])
    
    # Configure accuracy check on validation set during training process
    valid_batch = size(validImages)[4]
    valid_data_layer = AsyncHDF5DataLayer(
        name="validation-data", 
        source="$(path)/validation.txt", 
        batch_size=valid_batch)
    valid_acc_layer = AccuracyLayer(name="validation", bottoms=[:out, :label], report_error=true)
    valid_net = Net("SVHN-validation-prediction", backend, [valid_data_layer, common_layers..., valid_acc_layer])
    
    println(net)
    #println(valid_net)
    return(net, train_net, valid_net, common_layers) 
end

### Configure solver

In [None]:
function configure_solver(niter, base_mom, base_lr, epoch, base_dir, regu_coef=0.0)
    params = SolverParameters(
                max_iter=niter,
                regu_coef=regu_coef,
                mom_policy=MomPolicy.Fixed(base_mom),
                lr_policy=LRPolicy.Inv(base_lr, 0.0001, 0.75), 
                load_from=base_dir)

    solver = SGD(params)
    
    return(solver)
end

### Setup coffee breaks for statistics

In [None]:
function configure_coffebreaks(solver, train_net, valid_net, base_dir)
    setup_coffee_lounge(solver, save_into="$(base_dir)/statistics.jld", every_n_iter=5000)

    # report training progress every 100 iterations
    add_coffee_break(solver, TrainingSummary(show_lr=true, show_mom=true), every_n_iter=100)

    # Report train perfomance every 500 iterations
    add_coffee_break(solver, ValidationPerformance(train_net), every_n_iter=500)
    
    # Report validation perfomance every 500 iterations
    add_coffee_break(solver, ValidationPerformance(valid_net), every_n_iter=500)
    
    # save snapshots every 1000 iterations
    add_coffee_break(solver, Snapshot(base_dir), every_n_iter=1000) 
end

### Configure grid serach params

In [None]:
function predict(predict_net::Net, base_dir::String) 
    load_snapshot(predict_net, base_dir)
    
    init(predict_net)
    forward_epoch(predict_net)

    batch = []
    if isa(predict_net.states[end].layer, AccuracyLayer)
        batch = to_array(predict_net.states[end-1].blobs[1])
    else 
        batch = to_array(predict_net.states[end].blobs[1])
    end
    
    n = size(batch)[2]
    pred = zeros(n)
    for i in 1:n
        pred[i] = indmax(batch[:,i]) - 1
    end
    
    return(pred)
end

In [None]:
function evalfun(netInfo)
    pred = predict(netInfo[:valid_net], netInfo[:base_dir])
    model_perfomance = mean(pred .== netInfo[:validLabels])
    
    return(model_perfomance)
end

In [None]:
function estfun(nunits_fc1, nunits_fc2, conv1_nfilt, conv2_nfilt, base_mom, base_lr, regu_coef)
    snapshot_dir = "snapshot_drop_conv_$(conv1_nfilt)_$(conv2_nfilt)_$(nunits_fc1)_$(nunits_fc2)_$(base_mom)_$(base_lr)_$(regu_coef)"
    net, train_net, valid_net, common_layers = configure_training(nunits_fc1, nunits_fc2, conv1_nfilt, conv2_nfilt)
    solver = configure_solver(MAXITER, base_mom, base_lr, EPOCH, snapshot_dir, regu_coef)
    configure_coffebreaks(solver, train_net, valid_net, snapshot_dir)
    solve(solver, net) 
    model = {:net => net, 
             :valid_net => valid_net,
             :base_dir => snapshot_dir, 
             :validLabels => labelsInfoValid[:Labels],
             :common_layers => common_layers}
    
    return(model)
end

### Run grid search

In [None]:
best_model, best_cfg, best_score = gridtune(estfun, evalfun, nunits_fc1, nunits_fc2, conv1_nfilt, conv2_nfilt, base_mom, base_lr, regu_coef; verbose=true)

## Predict using the test set

In [None]:
backend = best_model[:net].backend
common_layers = best_model[:common_layers]
test_batch = size(testImages)[4]

test_data_layer = AsyncHDF5DataLayer(source="data/test.txt", batch_size=test_batch, shuffle=false)
softmax_layer = SoftmaxLayer(name="prob", tops=[:prob], bottoms=[:out])
test_net = Net("SVHN-Test", backend, [test_data_layer, common_layers..., softmax_layer])

println(test_net)
pred = predict(test_net, best_model[:base_dir])

## Save predictions 

In [None]:
filename = joinpath(path, "julia-Gray-Conv-Dropout-Submission.csv")
dic_rev = Dict(zip(values(dic), keys(dic)))
labelsInfoTest[:Class] = map(k -> dic_rev[k], pred)
writetable(filename, labelsInfoTest[:,[:ID, :Class]], separator=',', header=true)
run(`sed --in-place='' 's/"//g' $(filename)`)