## Import Labels

In [92]:
using Colors
using Images
using DataFrames
using HDF5
using MLBase

use_gpu = false
if use_gpu
    ENV["MOCHA_USE_CUDA"] = "true"
else
    ENV["MOCHA_USE_NATIVE_EXT"] = "true"
    blas_set_num_threads(2)
end

using Mocha
srand(333)

In [76]:
imageSize = (20,20,1) # 20 x 20 pixel x 1 color

#Set location of data files, folders
path = "data"

#Read information about training data , IDs.
labelsInfo = readtable("$(path)/trainLabels.csv")

println("Size of data set: ", size(labelsInfo))

labelsInfoTest = readtable("$(path)/sampleSubmission.csv")

println("Size of test data set: ", size(labelsInfoTest))

Size of data set: (6283,2)
Size of test data set: (6220,2)


In [77]:
# We need labels from 0 to N-1 for Mocha
labs = unique(labelsInfo[:Class])
dic = Dict(zip(collect(labs), 0:length(labs)-1))
create_labs(classes) = map(k -> dic[k], classes)
labelsInfo[:Labels] = create_labs(labelsInfo[:Class])
labelsInfoTest[:Labels] = create_labs(labelsInfoTest[:Class])
head(labelsInfo)

Unnamed: 0,ID,Class,Labels
1,1,n,0
2,2,8,1
3,3,T,2
4,4,I,3
5,5,R,4
6,6,W,5


## Split on train and validation sets 

In [78]:
srand(12345)
n = length(labelsInfo[:ID])
trainSet = shuffle(1:n .> n*0.25)
labelsInfoTrain = labelsInfo[trainSet,:]

# Hold out some data from validation set
srand(12345)
n = length(labelsInfo[!trainSet,:ID])
validationSet = shuffle(1:n .> n*0.5)
labelsInfoValid = labelsInfo[!trainSet,:][validationSet,:]
labelsInfoHoldout = labelsInfo[!trainSet,:][!validationSet,:]

println("Size of the train data set: ", size(labelsInfoTrain))
println("Size of the validation data set: ", size(labelsInfoValid))
println("Size of the holdout data set: ", size(labelsInfoHoldout))

Size of the train data set: (4713,3)
Size of the validation data set: (785,3)
Size of the holdout data set: (785,3)


## Read Images from the filesystem

In [79]:
function readImages(typeData, labelsInfo, imageSize, path)
    w, h, c = imageSize
    n = length(labelsInfo[:ID])
    x = float32(zeros(w,h,c,n))
    for (index, idImage) in enumerate(labelsInfo[:ID]) 
        #Read image file 
        nameFile = "$(path)/$(typeData)Resized/$(idImage).Bmp"
        img = imread(nameFile)

        #Convert img to float values 
        img = convert(Array{Gray}, img)
        img = convert(Array{Float32}, img)
        #img = separate(convert(Image{RGB}, img))
        #img = convert(Array, img)
        #img = img[:,:,1:3]
        
        # Normalize data
        img = (img - mean(img)) / std(img)
        x[:,:,:,index] = reshape(img, w, h, c)
        
    end 
    
    return x
end

readImages (generic function with 1 method)

In [80]:
trainImages = readImages("train", labelsInfoTrain, imageSize, path)
println("Size of the train images set: ", size(trainImages))

validImages = readImages("train", labelsInfoValid, imageSize, path)
println("Size of the validation images set: ", size(validImages))

holdoutImages = readImages("train", labelsInfoHoldout, imageSize, path)
println("Size of the holdout images set: ", size(validImages))

testImages = readImages("test", labelsInfoTest, imageSize, path)
println("Size of the test images set: ", size(testImages))



Size of the train images set: (20,20,1,4713)
Size of the validation images set: (20,20,1,785)
Size of the holdout images set: (20,20,1,785)




Size of the test images set: (20,20,1,6220)


## Convert images into HDF5 files

In [81]:
# Import data to HDF5 format
function convert_to_HDF5(typeData, imageSet, labelsInfo, path)
    w, h, c, n = size(imageSet)
    
    HDF5.h5open("$(path)/$(typeData).hdf5", "w") do h5
        dset_data = d_create(h5, "data", datatype(Float32), dataspace(w, h, c, n))
        dset_data[:,:,:,:] =  imageSet
        
        dset_label = d_create(h5, "label", datatype(Float32), dataspace(1,n))
        dset_label[1,:] = labelsInfo[:Labels]
    end

end

convert_to_HDF5("train", trainImages, labelsInfoTrain, path)
convert_to_HDF5("validation", validImages, labelsInfoValid, path)
convert_to_HDF5("holdout", holdoutImages, labelsInfoValid, path)
convert_to_HDF5("test", testImages, labelsInfoTest, path)

run(`echo $(path)/train.hdf5` |> "$(path)/train.txt")
run(`echo $(path)/validation.hdf5` |> "$(path)/validation.txt")
run(`echo $(path)/holdout.hdf5` |> "$(path)/holdout.txt")
run(`echo $(path)/test.hdf5` |> "$(path)/test.txt")

## Neural Network main params

In [82]:
TRAIN_BATCH = 100
EPOCH = int(round(size(trainImages)[4] / TRAIN_BATCH))
MAXITER = 90*EPOCH

NCLASSES = length(unique(labelsInfoTrain[:Class]))
nunits_fc1 = ("nunits_fc1", [2400])
nunits_fc2 = ("nunits_fc2", [1200])
base_mom = ("base_mom", [0.95])
base_lr = ("base_lr", [0.01])
regu_coef = ("regu_coef", [0.0])
conv1_nfilt = ("conv1_nfilt", [96])
conv2_nfilt = ("conv2_nfilt", [128])

n = length(nunits_fc1[2]) * length(nunits_fc2[2]) * length(base_mom[2]) * length(base_lr[2]) * length(regu_coef[2]) * length(conv1_nfilt[2]) * length(conv2_nfilt[2])
println("Grid search will run ", n, " times")
println("Solver will run for ", MAXITER, " iterations")

Grid search will run 1 times
Solver will run for 4230 iterations


### Setup Neural Network configuration

In [83]:
function configure_training(nunits_fc1, nunits_fc2, conv1_nfilt, conv2_nfilt)
    data_layer  = AsyncHDF5DataLayer(name="train-data", source="$(path)/train.txt", 
                                    batch_size=TRAIN_BATCH)
    conv_layer  = ConvolutionLayer(name="conv1", n_filter=conv1_nfilt, kernel=(5,5), bottoms=[:data], tops=[:conv])
    pool_layer  = PoolingLayer(name="pool1", kernel=(2,2), stride=(2,2), bottoms=[:conv], tops=[:pool])
    conv2_layer = ConvolutionLayer(name="conv2", n_filter=conv2_nfilt, kernel=(5,5), bottoms=[:pool], tops=[:conv2])
    pool2_layer = PoolingLayer(name="pool2", kernel=(2,2), stride=(2,2), bottoms=[:conv2], tops=[:pool2])
    fc1_layer   = InnerProductLayer(name="ip1", output_dim=nunits_fc1, neuron=Neurons.ReLU(), bottoms=[:pool2], tops=[:ip1])
    fc2_layer   = InnerProductLayer(name="ip2", output_dim=nunits_fc2, neuron=Neurons.ReLU(), bottoms=[:ip1], tops=[:ip2])
    fc3_layer   = InnerProductLayer(name="ip3", output_dim=NCLASSES, bottoms=[:ip2], tops=[:out])
    loss_layer  = SoftmaxLossLayer(name="loss", bottoms=[:out,:label])
    
    backend = use_gpu ? GPUBackend() : CPUBackend()
    init(backend)

    # setup dropout for the different layers
    # we use 20% dropout on the inputs and 50% dropout in the hidden layers
    # as these values were previously found to be good defaults
    drop_input = DropoutLayer(name="drop_in", bottoms=[:data], ratio=0.1)
    drop_conv1 = DropoutLayer(name="drop_conv1", bottoms=[:pool], ratio=0.2)
    drop_conv2 = DropoutLayer(name="drop_conv2", bottoms=[:pool2], ratio=0.2)
    drop_ip1 = DropoutLayer(name="drop_ip1", bottoms=[:ip1], ratio=0.5)
    drop_ip2= DropoutLayer(name="drop_ip2", bottoms=[:ip2], ratio=0.5)

    common_layers = [conv_layer, pool_layer, conv2_layer, pool2_layer, fc1_layer, fc2_layer, fc3_layer]
    drop_layers = [drop_input, drop_conv1, drop_conv2, drop_ip1, drop_ip2]
    # put training net together, note that the correct ordering will automatically be established by the constructor
    net = Net("SVHN-train", backend, [data_layer, common_layers..., drop_layers..., loss_layer])
    
    # Configure accuracy check on validation set during training process
    full_data_layer = AsyncHDF5DataLayer(
        name="train-full-data", 
        source="$(path)/train.txt", 
        batch_size=size(testImages)[4])
    full_acc_layer = AccuracyLayer(name="full_train", bottoms=[:out, :label], report_error=true)
    train_net = Net("SVHN-train-prediction", backend, [full_data_layer, common_layers..., full_acc_layer])
    
    # Configure accuracy check on validation set during training process
    valid_batch = size(validImages)[4]
    valid_data_layer = AsyncHDF5DataLayer(
        name="validation-data", 
        source="$(path)/validation.txt", 
        batch_size=valid_batch)
    valid_acc_layer = AccuracyLayer(name="validation", bottoms=[:out, :label], report_error=true)
    valid_net = Net("SVHN-validation-prediction", backend, [valid_data_layer, common_layers..., valid_acc_layer])
    
    println(net)
    #println(valid_net)
    return(net, train_net, valid_net) 
end

configure_training (generic function with 1 method)

### Configure solver

In [84]:
function configure_solver(niter, base_mom, base_lr, epoch, base_dir, regu_coef=0.0)
    params = SolverParameters(
                max_iter=niter,
                regu_coef=regu_coef,
                mom_policy=MomPolicy.Fixed(base_mom),
                lr_policy=LRPolicy.Inv(base_lr, 0.0001, 0.75), 
                load_from=base_dir)

    solver = SGD(params)
    
    return(solver)
end

configure_solver (generic function with 2 methods)

### Setup coffee breaks for statistics

In [85]:
function configure_coffebreaks(solver, train_net, valid_net, base_dir)
    setup_coffee_lounge(solver, save_into="$(base_dir)/statistics.jld", every_n_iter=5000)

    # report training progress every 100 iterations
    add_coffee_break(solver, TrainingSummary(show_lr=true, show_mom=true), every_n_iter=100)

    # Report train perfomance every 500 iterations
    add_coffee_break(solver, ValidationPerformance(train_net), every_n_iter=500)
    
    # Report validation perfomance every 500 iterations
    add_coffee_break(solver, ValidationPerformance(valid_net), every_n_iter=500)
    
    # save snapshots every 1000 iterations
    add_coffee_break(solver, Snapshot(base_dir), every_n_iter=1000) 
end

configure_coffebreaks (generic function with 1 method)

### Configure grid serach params

In [86]:
function predict(predict_net::Net, base_dir::String) 
    load_snapshot(predict_net, base_dir)
    
    init(predict_net)
    forward_epoch(predict_net)

    batch = []
    if isa(predict_net.states[end].layer, AccuracyLayer)
        batch = to_array(predict_net.states[end-1].blobs[1])
    else 
        batch = to_array(predict_net.states[end].blobs[1])
    end
    
    n = size(batch)[2]
    pred = zeros(n)
    for i in 1:n
        pred[i] = indmax(batch[:,i]) - 1
    end
    
    return(pred)
end

predict (generic function with 1 method)

In [87]:
function evalfun(netInfo)
    pred = predict(netInfo[:valid_net], netInfo[:base_dir])
    model_perfomance = mean(pred .== netInfo[:validLabels])
    
    return(model_perfomance)
end

evalfun (generic function with 1 method)

In [88]:
function estfun(nunits_fc1, nunits_fc2, conv1_nfilt, conv2_nfilt, base_mom, base_lr, regu_coef)
    snapshot_dir = "snapshot_drop_conv_$(conv1_nfilt)_$(conv2_nfilt)_$(nunits_fc1)_$(nunits_fc2)_$(base_mom)_$(base_lr)_$(regu_coef)"
    net, train_net, valid_net = configure_training(nunits_fc1, nunits_fc2, conv1_nfilt, conv2_nfilt)
    solver = configure_solver(MAXITER, base_mom, base_lr, EPOCH, snapshot_dir, regu_coef)
    configure_coffebreaks(solver, train_net, valid_net, snapshot_dir)
    solve(solver, net) 
    model = {:net => net, 
             :valid_net => valid_net,
             :base_dir => snapshot_dir, 
             :validLabels => labelsInfoValid[:Labels]}
    
    return(model)
end

estfun (generic function with 1 method)

### Run grid search

In [89]:
best_model, best_cfg, best_score = gridtune(estfun, evalfun, nunits_fc1, nunits_fc2, conv1_nfilt, conv2_nfilt, base_mom, base_lr, regu_coef; verbose=true)

25-Aug 21:50:36:INFO:root:Constructing net SVHN-train on CPUBackend...
25-Aug 21:50:36:INFO:root:Topological sorting 14 layers...
25-Aug 21:50:36:INFO:root:Setup layers...
25-Aug 21:50:36:INFO:root:Network constructed!
25-Aug 21:50:36:INFO:root:Constructing net SVHN-train-prediction on CPUBackend...
25-Aug 21:50:36:INFO:root:Topological sorting 9 layers...
25-Aug 21:50:36:INFO:root:Setup layers...
25-Aug 21:50:36:DEBUG:root:ConvolutionLayer(conv1): sharing filters and bias
25-Aug 21:50:36:DEBUG:root:ConvolutionLayer(conv2): sharing filters and bias
25-Aug 21:50:36:DEBUG:root:InnerProductLayer(ip1): sharing weights and bias
25-Aug 21:50:36:DEBUG:root:InnerProductLayer(ip2): sharing weights and bias
25-Aug 21:50:36:DEBUG:root:InnerProductLayer(ip3): sharing weights and bias
25-Aug 21:50:36:INFO:root:Network constructed!
25-Aug 21:50:36:INFO:root:Constructing net SVHN-validation-prediction on CPUBackend...
25-Aug 21:50:36:INFO:root:Topological sorting 9 layers...
25-Aug 21:50:36:INFO:root



25-Aug 21:50:39:INFO:root:Merging existing coffee lounge statistics in snapshot_drop_conv_96_128_2400_1200_0.95_0.01_0.0/statistics.jld
25-Aug 21:50:40:DEBUG:root:Init network SVHN-train-prediction
25-Aug 21:50:40:DEBUG:root:Init network SVHN-validation-prediction
25-Aug 21:50:40:INFO:root:Snapshot directory snapshot_drop_conv_96_128_2400_1200_0.95_0.01_0.0 already exists
25-Aug 21:50:40:INFO:root:ITER = 004000:: TRAIN obj-val = 0.65958869:: LR = 0.00776970:: MOM = 0.95000000
25-Aug 21:50:58:INFO:root:
25-Aug 21:50:58:INFO:root:## Performance on Validation Set after 4000 iterations
25-Aug 21:50:58:INFO:root:---------------------------------------------------------
25-Aug 21:50:58:INFO:root:  Accuracy (avg over 6220) = 99.0675%
25-Aug 21:50:58:INFO:root:---------------------------------------------------------
25-Aug 21:50:58:INFO:root:
25-Aug 21:51:01:INFO:root:
25-Aug 21:51:01:INFO:root:## Performance on Validation Set after 4000 iterations
25-Aug 21:51:01:INFO:root:------------------



25-Aug 21:54:10:INFO:root:ITER = 004200:: TRAIN obj-val = 0.23871490:: LR = 0.00768748:: MOM = 0.95000000
25-Aug 21:54:45:INFO:root:Loading existing model from snapshot_drop_conv_96_128_2400_1200_0.95_0.01_0.0/snapshot-004000.jld
25-Aug 21:54:46:DEBUG:root:Loading parameters for layer conv1
25-Aug 21:54:46:DEBUG:root:Loading parameters for layer conv2
25-Aug 21:54:46:DEBUG:root:Loading parameters for layer ip1
25-Aug 21:54:46:DEBUG:root:Loading parameters for layer ip2
25-Aug 21:54:46:DEBUG:root:Loading parameters for layer ip3
25-Aug 21:54:46:DEBUG:root:Init network SVHN-validation-prediction
[nunits_fc1=2400, nunits_fc2=1200, conv1_nfilt=96, conv2_nfilt=128, base_mom=0.95, base_lr=0.01, regu_coef=0.0] => 0.7070063694267515


({:net=>************************************************************
          NAME: SVHN-train
       BACKEND: CPUBackend
  ARCHITECTURE: 14 layers
............................................................
 *** AsyncHDF5DataLayer(train-data)
    Outputs ---------------------------
          data: Blob(20 x 20 x 1 x 100)
         label: Blob(1 x 100)
............................................................
 *** DropoutLayer(drop_in)
    Inputs ----------------------------
          data: Blob(20 x 20 x 1 x 100)
............................................................
 *** ConvolutionLayer(conv1)
    Inputs ----------------------------
          data: Blob(20 x 20 x 1 x 100)
    Outputs ---------------------------
          conv: Blob(16 x 16 x 96 x 100)
............................................................
 *** PoolingLayer(pool1)
    Inputs ----------------------------
          conv: Blob(16 x 16 x 96 x 100)
    Outputs ---------------------------
          pool: Bl

## Predict using the test set

In [90]:
backend = best_model[:net].backend
common_layers = best_model[:net].layers[2:end-1]
test_batch = size(testImages)[4]

test_data_layer = AsyncHDF5DataLayer(source="data/test.txt", batch_size=test_batch, shuffle=false)
softmax_layer = SoftmaxLayer(name="prob", tops=[:prob], bottoms=[:out])
test_net = Net("SVHN-Test", backend, [test_data_layer, common_layers..., softmax_layer])

println(test_net)
pred = predict(test_net, best_model[:base_dir])

25-Aug 21:54:51:INFO:root:Constructing net SVHN-Test on CPUBackend...
25-Aug 21:54:51:INFO:root:Topological sorting 14 layers...
25-Aug 21:54:51:INFO:root:Setup layers...
25-Aug 21:54:51:DEBUG:root:ConvolutionLayer(conv1): sharing filters and bias
25-Aug 21:54:51:DEBUG:root:ConvolutionLayer(conv2): sharing filters and bias
25-Aug 21:54:51:DEBUG:root:InnerProductLayer(ip1): sharing weights and bias
25-Aug 21:54:51:DEBUG:root:InnerProductLayer(ip2): sharing weights and bias
25-Aug 21:54:51:DEBUG:root:InnerProductLayer(ip3): sharing weights and bias
25-Aug 21:54:52:INFO:root:Network constructed!
************************************************************
          NAME: SVHN-Test
       BACKEND: CPUBackend
  ARCHITECTURE: 14 layers
............................................................
 *** AsyncHDF5DataLayer(hdf5-data)
    Outputs ---------------------------
          data: Blob(20 x 20 x 1 x 6220)
         label: Blob(1 x 6220)
....................................................

6220-element Array{Float64,1}:
  2.0
 19.0
 46.0
 17.0
  2.0
 53.0
 25.0
 10.0
 56.0
 20.0
 14.0
  5.0
 57.0
  ⋮  
 17.0
 10.0
  2.0
 11.0
 19.0
  3.0
 14.0
 25.0
 15.0
  4.0
 14.0
 36.0

## Save predictions 

In [91]:
filename = joinpath(path, "julia-Gray-Conv-Dropout-Submission.csv")
dic_rev = Dict(zip(values(dic), keys(dic)))
labelsInfoTest[:Class] = map(k -> dic_rev[k], pred)
writetable(filename, labelsInfoTest[:,[:ID, :Class]], separator=',', header=true)
run(`sed -i '' 's/"//g' $(filename)`)