## Import Labels

In [1]:
using Images
using DataFrames
using HDF5
using MLBase

use_gpu = false
if use_gpu
    ENV["MOCHA_USE_CUDA"] = "true"
else
    ENV["MOCHA_USE_NATIVE_EXT"] = "true"
    blas_set_num_threads(2)
end

using Mocha
srand(333)

    +(AbstractArray{T,N},DataArray{T,N}) at /Users/quetzal/.julia/v0.3/DataArrays/src/operators.jl:326
is ambiguous with: 
    +(AbstractImageDirect{T,N},AbstractArray{T,N}) at /Users/quetzal/.julia/v0.3/Images/src/algorithms.jl:17.
To fix, define 
    +(AbstractImageDirect{T,N},DataArray{T,N})
before the new definition.
    +(AbstractArray{T,N},AbstractDataArray{T,N}) at /Users/quetzal/.julia/v0.3/DataArrays/src/operators.jl:349
is ambiguous with: 
    +(AbstractImageDirect{T,N},AbstractArray{T,N}) at /Users/quetzal/.julia/v0.3/Images/src/algorithms.jl:17.
To fix, define 
    +(AbstractImageDirect{T,N},AbstractDataArray{T,N})
before the new definition.
    -(AbstractArray{T,N},DataArray{T,N}) at /Users/quetzal/.julia/v0.3/DataArrays/src/operators.jl:326
is ambiguous with: 
    -(AbstractImageDirect{T,N},AbstractArray{T,N}) at /Users/quetzal/.julia/v0.3/Images/src/algorithms.jl:31.
To fix, define 
    -(AbstractImageDirect{T,N},DataArray{T,N})
before the new definition.
    -(AbstractA

Configuring Mocha...
 * CUDA       disabled by default
 * Native Ext enabled (MOCHA_USE_NATIVE_EXT environment variable detected)
Mocha configured, continue loading module...
Loading native extension libmochaext.so...
Native extension loaded


In [2]:
imageSize = (20,20,1) # 20 x 20 pixel x 1 color

#Set location of data files, folders
path = "data"

#Read information about training data , IDs.
labelsInfo = readtable("$(path)/trainLabels.csv")

println("Size of data set: ", size(labelsInfo))

labelsInfoTest = readtable("$(path)/sampleSubmission.csv")

println("Size of test data set: ", size(labelsInfoTest))

Size of data set: (6283,2)
Size of test data set: (6220,2)


In [3]:
# We need labels from 0 to N-1 for Mocha
labs = unique(labelsInfo[:Class])
dic = Dict(zip(collect(labs), 0:length(labs)-1))
create_labs(classes) = map(k -> dic[k], classes)
labelsInfo[:Labels] = create_labs(labelsInfo[:Class])
labelsInfoTest[:Labels] = create_labs(labelsInfoTest[:Class])
head(labelsInfo)

Unnamed: 0,ID,Class,Labels
1,1,n,0
2,2,8,1
3,3,T,2
4,4,I,3
5,5,R,4
6,6,W,5


## Split on train and validation sets 

In [4]:
srand(12345)
n = length(labelsInfo[:ID])
trainSet = shuffle(1:n .> n*0.25)
labelsInfoTrain = labelsInfo[trainSet,:]

# Hold out some data from validation set
srand(12345)
n = length(labelsInfo[!trainSet,:ID])
validationSet = shuffle(1:n .> n*0.5)
labelsInfoValid = labelsInfo[!trainSet,:][validationSet,:]
labelsInfoHoldout = labelsInfo[!trainSet,:][!validationSet,:]

println("Size of the train data set: ", size(labelsInfoTrain))
println("Size of the validation data set: ", size(labelsInfoValid))
println("Size of the holdout data set: ", size(labelsInfoHoldout))

Size of the train data set: (4713,3)
Size of the validation data set: (785,3)
Size of the holdout data set: (785,3)


## Read Images from the filesystem

In [5]:
function readImages(typeData, labelsInfo, imageSize, path)
    w, h, c = imageSize
    n = length(labelsInfo[:ID])
    x = float32(zeros(w,h,c,n))
    for (index, idImage) in enumerate(labelsInfo[:ID]) 
        #Read image file 
        nameFile = "$(path)/$(typeData)Resized/$(idImage).Bmp"
        img = imread(nameFile)

        #Convert img to float values 
        img = convert(Array{Gray}, img)
        img = convert(Array{Float32}, img)
        img = (img - mean(img)) / std(img)
        x[:,:,:,index] = reshape(img, w, h, c)
        
    end 
    
    return x
end

readImages (generic function with 1 method)

In [6]:
trainImages = readImages("train", labelsInfoTrain, imageSize, path)
println("Size of the train images set: ", size(trainImages))

validImages = readImages("train", labelsInfoValid, imageSize, path)
println("Size of the validation images set: ", size(validImages))

holdoutImages = readImages("train", labelsInfoHoldout, imageSize, path)
println("Size of the holdout images set: ", size(validImages))

testImages = readImages("test", labelsInfoTest, imageSize, path)
println("Size of the test images set: ", size(testImages))



Size of the train images set: (20,20,1,4713)
Size of the validation images set: (20,20,1,785)
Size of the holdout images set: (20,20,1,785)




Size of the test images set: (20,20,1,6220)


## Convert images into HDF5 files

In [7]:
# Import data to HDF5 format
function convert_to_HDF5(typeData, imageSet, labelsInfo, path)
    w, h, c, n = size(imageSet)
    
    HDF5.h5open("$(path)/$(typeData).hdf5", "w") do h5
        dset_data = d_create(h5, "data", datatype(Float32), dataspace(w, h, c, n))
        dset_data[:,:,:,:] =  imageSet
        
        dset_label = d_create(h5, "label", datatype(Float32), dataspace(1,n))
        dset_label[1,:] = labelsInfo[:Labels]
    end

end

convert_to_HDF5("train", trainImages, labelsInfoTrain, path)
convert_to_HDF5("validation", validImages, labelsInfoValid, path)
convert_to_HDF5("holdout", holdoutImages, labelsInfoValid, path)
convert_to_HDF5("test", testImages, labelsInfoTest, path)

run(`echo $(path)/train.hdf5` |> "$(path)/train.txt")
run(`echo $(path)/validation.hdf5` |> "$(path)/validation.txt")
run(`echo $(path)/holdout.hdf5` |> "$(path)/holdout.txt")
run(`echo $(path)/test.hdf5` |> "$(path)/test.txt")

## Neural Network main params

In [102]:
TRAIN_BATCH = 100
EPOCH = int(round(size(trainImages)[4] / TRAIN_BATCH))
MAXITER = 100*EPOCH

nclasses = length(unique(labelsInfoTrain[:Class]))
nunits = ("nunits", [2400])
base_mom = ("base_mom", [0.1])
base_lr = ("base_lr", [0.5])

println("Solver will run for ", MAXITER, " iterations")

Solver will run for 4700 iterations


In [93]:
function predict(predict_net::Net, base_dir::String) 
    load_snapshot(predict_net, base_dir)
    
    init(predict_net)
    forward_epoch(predict_net)

    batch = []
    if isa(predict_net.states[end].layer, AccuracyLayer)
        batch = to_array(predict_net.states[end-1].blobs[1])
    else 
        batch = to_array(predict_net.states[end].blobs[1])
    end
    
    n = size(batch)[2]
    pred = zeros(n)
    for i in 1:n
        pred[i] = indmax(batch[:,i]) - 1
    end
    
    return(pred)
end

predict (generic function with 1 method)

### Setup Neural Network configuration

In [96]:
function configure_training(nunits)
    data_layer  = AsyncHDF5DataLayer(name="train-data", source="$(path)/train.txt", 
                                    batch_size=TRAIN_BATCH)
    fc1_layer   = InnerProductLayer(name="fc1", output_dim=nunits, neuron=Neurons.ReLU(),
                                    weight_init = GaussianInitializer(std=0.01),
                                    bottoms=[:data], tops=[:fc1])
    fc2_layer   = InnerProductLayer(name="fc2", output_dim=nunits, neuron=Neurons.ReLU(),
                                    weight_init = GaussianInitializer(std=0.01),
                                    weight_cons = L2Cons(4.5),
                                    bottoms=[:fc1], tops=[:fc2])
    fc3_layer   = InnerProductLayer(name="out", output_dim=nclasses, bottoms=[:fc2],
                                    weight_init = ConstantInitializer(0),
                                    weight_cons = L2Cons(4.5),
                                    tops=[:out])
    loss_layer  = SoftmaxLossLayer(name="loss", bottoms=[:out,:label])
    
    backend = use_gpu ? GPUBackend() : CPUBackend()
    init(backend)

    # setup dropout for the different layers
    # we use 20% dropout on the inputs and 50% dropout in the hidden layers
    # as these values were previously found to be good defaults
    drop_input  = DropoutLayer(name="drop_in", bottoms=[:data], ratio=0.1)
    drop_fc1 = DropoutLayer(name="drop_fc1", bottoms=[:fc1], ratio=0.5)
    drop_fc2  = DropoutLayer(name="drop_fc2", bottoms=[:fc2], ratio=0.5)

    common_layers = [fc1_layer, fc2_layer, fc3_layer]
    #drop_layers = [drop_input, drop_fc1, drop_fc2]
    # put training net together, note that the correct ordering will automatically be established by the constructor
    net = Net("SVHN-train", backend, [data_layer, common_layers..., loss_layer])
    
    # Configure accuracy check on validation set during training process
    valid_batch = size(validImages)[4]
    valid_data_layer = AsyncHDF5DataLayer(
        name="validation-data", 
        source="$(path)/validation.txt", 
        batch_size=valid_batch)
    acc_layer = AccuracyLayer(name="validation-accuracy", bottoms=[:out, :label], report_error=true)
    valid_net = Net("SVHN-validation", backend, [valid_data_layer, common_layers..., acc_layer])
    
    return(net, valid_net) 
end

configure_training (generic function with 1 method)

### Configure solver

In [97]:
function configure_solver(niter, base_mom, base_lr, epoch, base_dir, regu_coef=0.00)
    params = SolverParameters(
                max_iter=niter,
                regu_coef=0.00,
                mom_policy=MomPolicy.Linear(base_mom, 0.0008, epoch, 0.99),
                lr_policy=LRPolicy.Step(base_lr, 0.998, epoch), 
                load_from=base_dir)

    solver = SGD(params)
    
    return(solver)
end

configure_solver (generic function with 2 methods)

### Setup coffee breaks for statistics

In [98]:
function configure_coffebreaks(solver, valid_net, base_dir)
    setup_coffee_lounge(solver, save_into="$(base_dir)/statistics.jld", every_n_iter=5000)

    # report training progress every 100 iterations
    add_coffee_break(solver, TrainingSummary(show_lr=true, show_mom=true), every_n_iter=100)

    # Report validation perfomance every 500 iterations
    add_coffee_break(solver, ValidationPerformance(valid_net), every_n_iter=500)
    
    # save snapshots every 1000 iterations
    add_coffee_break(solver, Snapshot(base_dir), every_n_iter=500) 
end

configure_coffebreaks (generic function with 1 method)

### Configure grid serach params

In [13]:
function evalfun(netInfo)
    pred = predict(netInfo[:valid_net], netInfo[:base_dir])
    model_perfomance = mean(pred .== netInfo[:validLabels])
    
    return(model_perfomance)
end

evalfun (generic function with 1 method)

In [101]:
function estfun(nunits, base_mom, base_lr)
    snapshot_dir = "snapshot_mlp_$(nunits)_$(nunits)_$(base_mom)_$(base_lr)"
    net, valid_net = configure_training(nunits)
    #println(net)
    #println(valid_net)
    solver = configure_solver(MAXITER, base_mom, base_lr, EPOCH, snapshot_dir)
    configure_coffebreaks(solver, valid_net, snapshot_dir)
    solve(solver, net) 
    model = {:net => net, 
             :valid_net => valid_net,
             :base_dir => snapshot_dir, 
             :validLabels => labelsInfoValid[:Labels]}
    
    return(model)
end

estfun (generic function with 1 method)

### Run grid search

In [100]:
best_model, best_cfg, best_score = gridtune(estfun, evalfun, nunits, base_mom, base_lr; verbose=true)

19-Aug 16:31:27:INFO:root:Constructing net SVHN-train on CPUBackend...
19-Aug 16:31:27:INFO:root:Topological sorting 5 layers...
19-Aug 16:31:27:INFO:root:Setup layers...
19-Aug 16:31:27:INFO:root:Network constructed!
19-Aug 16:31:27:INFO:root:Constructing net SVHN-validation on CPUBackend...
19-Aug 16:31:27:INFO:root:Topological sorting 5 layers...
19-Aug 16:31:27:INFO:root:Setup layers...
19-Aug 16:31:27:DEBUG:root:InnerProductLayer(fc1): sharing weights and bias
19-Aug 16:31:27:DEBUG:root:InnerProductLayer(fc2): sharing weights and bias
19-Aug 16:31:27:DEBUG:root:InnerProductLayer(out): sharing weights and bias
19-Aug 16:31:27:INFO:root:Network constructed!
19-Aug 16:31:27:DEBUG:root:Checking network topology for back-propagation
19-Aug 16:31:27:DEBUG:root:Init network SVHN-train
19-Aug 16:31:27:DEBUG:root:Init parameter weight for layer fc1
19-Aug 16:31:27:DEBUG:root:Init parameter bias for layer fc1
19-Aug 16:31:27:DEBUG:root:Init parameter weight for layer fc2
19-Aug 16:31:27:DEB

({:net=>************************************************************
          NAME: SVHN-train
       BACKEND: CPUBackend
  ARCHITECTURE: 5 layers
............................................................
 *** AsyncHDF5DataLayer(train-data)
    Outputs ---------------------------
          data: Blob(20 x 20 x 1 x 100)
         label: Blob(1 x 100)
............................................................
 *** InnerProductLayer(fc1)
    Inputs ----------------------------
          data: Blob(20 x 20 x 1 x 100)
    Outputs ---------------------------
           fc1: Blob(1200 x 100)
............................................................
 *** InnerProductLayer(fc2)
    Inputs ----------------------------
           fc1: Blob(1200 x 100)
    Outputs ---------------------------
           fc2: Blob(1200 x 100)
............................................................
 *** InnerProductLayer(out)
    Inputs ----------------------------
           fc2: Blob(1200 x 100)
    Ou

## Predict using the test set

In [90]:
backend = best_model[:net].backend

common_layers = best_model[:net].layers[2:end-1]
test_batch = size(testImages)[4]
test_data_layer = AsyncHDF5DataLayer(source="data/test.txt", batch_size=test_batch, shuffle=false)
softmax_layer = SoftmaxLayer(name="prob", tops=[:prob], bottoms=[:out])
test_net = Net("SVHN-Test", backend, [test_data_layer, common_layers..., softmax_layer])
println(test_net)
pred = predict(test_net, best_model[:base_dir])

19-Aug 16:19:05:INFO:root:Constructing net SVHN-Test on CPUBackend...
19-Aug 16:19:05:INFO:root:Topological sorting 5 layers...
19-Aug 16:19:05:INFO:root:Setup layers...
19-Aug 16:19:05:DEBUG:root:InnerProductLayer(fc1): sharing weights and bias
19-Aug 16:19:05:DEBUG:root:InnerProductLayer(fc2): sharing weights and bias
19-Aug 16:19:05:DEBUG:root:InnerProductLayer(out): sharing weights and bias
19-Aug 16:19:05:INFO:root:Network constructed!
************************************************************
          NAME: SVHN-Test
       BACKEND: CPUBackend
  ARCHITECTURE: 5 layers
............................................................
 *** AsyncHDF5DataLayer(hdf5-data)
    Outputs ---------------------------
          data: Blob(20 x 20 x 1 x 6220)
         label: Blob(1 x 6220)
............................................................
 *** InnerProductLayer(fc1)
    Inputs ----------------------------
          data: Blob(20 x 20 x 1 x 6220)
    Outputs --------------------------

6220-element Array{Float64,1}:
 17.0
 19.0
  3.0
  2.0
 30.0
  6.0
 25.0
 24.0
 56.0
 20.0
 14.0
  5.0
 20.0
  ⋮  
  4.0
 10.0
 19.0
 11.0
 19.0
  3.0
  8.0
 15.0
 28.0
 36.0
 14.0
 36.0

## Save predictions 

In [91]:
filename = joinpath(path, "juliaNN-Dropout-Submission.csv")
dic_rev = Dict(zip(values(dic), keys(dic)))
labelsInfoTest[:Class] = map(k -> dic_rev[k], pred)
writetable(filename, labelsInfoTest[:,[:ID, :Class]], separator=',', header=true)
run(`sed -i '' 's/"//g' $(filename)`)