## Import Labels

## Configure Neural Net

In [9]:
using HDF5

f = h5open("$(path)/train.hdf5")
N = length(unique(f["label"][1,:]))
close(f)

path = "data"
base_dir = "snapshots_dropout_fc"
use_gpu = false

if use_gpu
  ENV["MOCHA_USE_CUDA"] = "true"
else
  ENV["MOCHA_USE_NATIVE_EXT"] = "true"
  blas_set_num_threads(1)
end

In [10]:
using Mocha
srand(333)

data_layer  = HDF5DataLayer(name="train-data", source="$(path)/train.txt", batch_size=1)
# each fully connected layer uses a ReLU activation and a constraint on the L2 norm of the weights
fc1_layer   = InnerProductLayer(name="fc1", output_dim=1200, neuron=Neurons.ReLU(),
                                weight_init = GaussianInitializer(std=0.01),
                                bottoms=[:data], tops=[:fc1])
fc2_layer   = InnerProductLayer(name="fc2", output_dim=1200, neuron=Neurons.ReLU(),
                                weight_init = GaussianInitializer(std=0.01),
                                weight_cons = L2Cons(4.5),
                                bottoms=[:fc1], tops=[:fc2])
fc3_layer   = InnerProductLayer(name="out", output_dim=N, bottoms=[:fc2],
                                weight_init = ConstantInitializer(0),
                                weight_cons = L2Cons(4.5),
                                tops=[:out])
loss_layer  = SoftmaxLossLayer(name="loss", bottoms=[:out,:label])

# setup dropout for the different layers
# we use 20% dropout on the inputs and 50% dropout in the hidden layers
# as these values were previously found to be good defaults
drop_input  = DropoutLayer(name="drop_in", bottoms=[:data], ratio=0.2)
drop_fc1 = DropoutLayer(name="drop_fc1", bottoms=[:fc1], ratio=0.5)
drop_fc2  = DropoutLayer(name="drop_fc2", bottoms=[:fc2], ratio=0.5)

backend = use_gpu ? GPUBackend() : CPUBackend()
init(backend)

common_layers = [fc1_layer, fc2_layer, fc3_layer]
drop_layers = [drop_input, drop_fc1, drop_fc2]
# put training net together, note that the correct ordering will automatically be established by the constructor
net = Net("SV-train", backend, [data_layer, common_layers..., drop_layers..., loss_layer])
println(net)

13-Aug 18:56:01:INFO:root:Constructing net SV-train on CPUBackend...
13-Aug 18:56:01:INFO:root:Topological sorting 8 layers...
13-Aug 18:56:01:INFO:root:Setup layers...
13-Aug 18:56:03:INFO:root:Network constructed!
************************************************************
          NAME: SV-train
       BACKEND: CPUBackend
  ARCHITECTURE: 8 layers
............................................................
 *** HDF5DataLayer(train-data)
    Outputs ---------------------------
          data: Blob(20 x 20 x 1 x 1)
         label: Blob(1 x 1)
............................................................
 *** DropoutLayer(drop_in)
    Inputs ----------------------------
          data: Blob(20 x 20 x 1 x 1)
............................................................
 *** InnerProductLayer(fc1)
    Inputs ----------------------------
          data: Blob(20 x 20 x 1 x 1)
    Outputs ---------------------------
           fc1: Blob(1200 x 1)
............................................

## Configure solver

In [11]:
# we let the learning rate decrease by 0.998 in each epoch (=600 batches of size 100)
# and let the momentum increase linearly from 0.5 to 0.9 over 500 epochs
# which is equivalent to an increase step of 0.0008
# training is done for 2000 epochs
params = SolverParameters(max_iter=60*2000, regu_coef=0.00,
                          mom_policy=MomPolicy.Linear(0.5, 0.0008, 100, 0.9),
                          lr_policy=LRPolicy.Step(0.1, 0.998, 100),
                          load_from=base_dir)
solver = SGD(params)

setup_coffee_lounge(solver, save_into="$(base_dir)/statistics.jld", every_n_iter=5000)

# report training progress every 100 iterations
add_coffee_break(solver, TrainingSummary(), every_n_iter=100)

# save snapshots every 5000 iterations
add_coffee_break(solver, Snapshot(base_dir), every_n_iter=5000)

2-element Array{CoffeeBreak,1}:
 CoffeeBreak(TrainingSummary(true,true,false,false),100,0)
 CoffeeBreak(Snapshot("snapshots_dropout_fc"),5000,0)     

## Configure accuracy check

In [12]:
# show performance on test data every 600 iterations (one epoch)
data_layer_valid = HDF5DataLayer(name="validation-data", source="$(path)/validation.txt", batch_size=1)
acc_layer = AccuracyLayer(name="validation-accuracy", bottoms=[:out, :label], report_error=true)
valid_net = Net("SV-test", backend, [data_layer_valid, common_layers..., acc_layer])
add_coffee_break(solver, ValidationPerformance(valid_net), every_n_iter=600)

println(valid_net)

13-Aug 18:56:09:INFO:root:Constructing net SV-test on CPUBackend...
13-Aug 18:56:09:INFO:root:Topological sorting 5 layers...
13-Aug 18:56:09:INFO:root:Setup layers...
13-Aug 18:56:09:DEBUG:root:InnerProductLayer(fc1): sharing weights and bias
13-Aug 18:56:09:DEBUG:root:InnerProductLayer(fc2): sharing weights and bias
13-Aug 18:56:09:DEBUG:root:InnerProductLayer(out): sharing weights and bias
13-Aug 18:56:09:INFO:root:Network constructed!
************************************************************
          NAME: SV-test
       BACKEND: CPUBackend
  ARCHITECTURE: 5 layers
............................................................
 *** HDF5DataLayer(validation-data)
    Outputs ---------------------------
          data: Blob(20 x 20 x 1 x 1)
         label: Blob(1 x 1)
............................................................
 *** InnerProductLayer(fc1)
    Inputs ----------------------------
          data: Blob(20 x 20 x 1 x 1)
    Outputs ---------------------------
          

##  Train Neural Network

In [13]:
solve(solver, net)

13-Aug 18:56:15:DEBUG:root:Checking network topology for back-propagation
13-Aug 18:56:15:DEBUG:root:Init network SV-train
13-Aug 18:56:15:DEBUG:root:Init parameter weight for layer fc1
13-Aug 18:56:15:DEBUG:root:Init parameter bias for layer fc1
13-Aug 18:56:15:DEBUG:root:Init parameter weight for layer fc2
13-Aug 18:56:15:DEBUG:root:Init parameter bias for layer fc2
13-Aug 18:56:15:DEBUG:root:Init parameter weight for layer out
13-Aug 18:56:15:DEBUG:root:Init parameter bias for layer out
13-Aug 18:56:16:DEBUG:root:Initializing coffee breaks
13-Aug 18:56:16:INFO:root:Snapshot directory snapshots_dropout_fc already exists
13-Aug 18:56:16:DEBUG:root:Init network SV-test
13-Aug 18:56:17:INFO:root:ITER = 000000:: TRAIN obj-val = 4.12713432
13-Aug 18:56:17:INFO:root:Saving snapshot to snapshot-000000.jld...
13-Aug 18:56:17:DEBUG:root:Saving parameters for layer fc1
13-Aug 18:56:17:DEBUG:root:Saving parameters for layer fc2
13-Aug 18:56:17:DEBUG:root:Saving parameters for layer out
13-Aug 1

LoadError: BoundsError()
while loading In[13], in expression starting on line 1

In [60]:
destroy(net)
destroy(valid_net)
shutdown(backend)

13-Aug 18:15:05:DEBUG:root:Destroying network SV-train
13-Aug 18:15:05:DEBUG:root:Destroying network SV-test


Dict{String,Array{AbstractParameter,1}} with 0 entries