## Neural Network main params

In [1]:
use_gpu = true
ENV["MOCHA_USE_CUDA"] = "true"
ENV["OMP_NUM_THREADS"] = 1
blas_set_num_threads(1)

using HDF5
using Mocha
srand(333)

EPOCH = 47
MAXITER = 90*EPOCH
base_dir = "snapshots"
path = "hdf5-data"

Configuring Mocha...
 * CUDA       enabled [DEV=0] (MOCHA_USE_CUDA environment variable detected)
 * Native Ext disabled by default
Mocha configured, continue loading module...


"hdf5-data"

### Setup Neural Network configuration

In [2]:
data_layer  = AsyncHDF5DataLayer(name="train-data", source="$(path)/train.txt", batch_size=100)
conv_layer  = ConvolutionLayer(name="conv1", n_filter=96, kernel=(5,5), bottoms=[:data], tops=[:conv])
pool_layer  = PoolingLayer(name="pool1", kernel=(2,2), stride=(2,2), bottoms=[:conv], tops=[:pool])
conv2_layer = ConvolutionLayer(name="conv2", n_filter=128, kernel=(5,5), bottoms=[:pool], tops=[:conv2])
pool2_layer = PoolingLayer(name="pool2", kernel=(2,2), stride=(2,2), bottoms=[:conv2], tops=[:pool2])
fc1_layer   = InnerProductLayer(name="ip1", output_dim=2400, neuron=Neurons.ReLU(), bottoms=[:pool2], tops=[:ip1])
fc2_layer   = InnerProductLayer(name="ip2", output_dim=1200, neuron=Neurons.ReLU(), bottoms=[:ip1], tops=[:ip2])
fc3_layer   = InnerProductLayer(name="ip3", output_dim=62, bottoms=[:ip2], tops=[:out])
loss_layer  = SoftmaxLossLayer(name="loss", bottoms=[:out,:label])

backend = use_gpu ? GPUBackend() : CPUBackend()
init(backend)

# setup dropout for the different layers
# we use 20% dropout on the inputs and 50% dropout in the hidden layers
# as these values were previously found to be good defaults
drop_input = DropoutLayer(name="drop_in", bottoms=[:data], ratio=0.1)
drop_conv1 = DropoutLayer(name="drop_conv1", bottoms=[:pool], ratio=0.2)
drop_conv2 = DropoutLayer(name="drop_conv2", bottoms=[:pool2], ratio=0.2)
drop_ip1 = DropoutLayer(name="drop_ip1", bottoms=[:ip1], ratio=0.5)
drop_ip2= DropoutLayer(name="drop_ip2", bottoms=[:ip2], ratio=0.5)

common_layers = [conv_layer, pool_layer, conv2_layer, pool2_layer, fc1_layer, fc2_layer, fc3_layer]
drop_layers = [drop_input, drop_conv1, drop_conv2, drop_ip1, drop_ip2]
# put training net together, note that the correct ordering will automatically be established by the constructor
net = Net("SVHN-train", backend, [data_layer, common_layers..., drop_layers..., loss_layer])

# Configure accuracy check on train set during training process
full_data_layer = AsyncHDF5DataLayer( name="train-full-data", source="$(path)/train.txt", batch_size=4713)
full_acc_layer = AccuracyLayer(name="full_train", bottoms=[:out, :label], report_error=true)
train_net = Net("SVHN-train-prediction", backend, [full_data_layer, common_layers..., full_acc_layer])

# Configure accuracy check on validation set during training process
valid_data_layer = AsyncHDF5DataLayer(name="validation-data", source="$(path)/validation.txt", batch_size=785)
valid_acc_layer = AccuracyLayer(name="validation", bottoms=[:out, :label], report_error=true)
valid_net = Net("SVHN-validation-prediction", backend, [valid_data_layer, common_layers..., valid_acc_layer])

println(net)

params = SolverParameters(max_iter=MAXITER, 
                          regu_coef=0.0,
                          mom_policy=MomPolicy.Fixed(0.95),
                          lr_policy=LRPolicy.Inv(0.01, 0.0001, 0.75), 
                          load_from=base_dir)

solver = SGD(params)

#setup_coffee_lounge(solver, save_into="$(base_dir)/statistics.jld", every_n_iter=5000)

# report training progress every 100 iterations
add_coffee_break(solver, TrainingSummary(show_lr=true, show_mom=true), every_n_iter=100)

# Report train perfomance every 500 iterations
add_coffee_break(solver, ValidationPerformance(train_net), every_n_iter=500)

# Report validation perfomance every 500 iterations
add_coffee_break(solver, ValidationPerformance(valid_net), every_n_iter=500)

solve(solver, net)

destroy(net)
shutdown(backend)

31-Aug 07:02:09:INFO:root:Initializing CuDNN backend...


INFO: CuDNN backend initialized!


31-Aug 07:02:10:INFO:root:Constructing net SVHN-train on GPUBackend...
31-Aug 07:02:10:INFO:root:Topological sorting 14 layers...
31-Aug 07:02:10:INFO:root:Setup layers...
31-Aug 07:02:13:INFO:root:Network constructed!
31-Aug 07:02:13:INFO:root:Constructing net SVHN-train-prediction on GPUBackend...
31-Aug 07:02:13:INFO:root:Topological sorting 9 layers...
31-Aug 07:02:13:INFO:root:Setup layers...
31-Aug 07:02:13:DEBUG:root:ConvolutionLayer(conv1): sharing filters and bias
31-Aug 07:02:13:DEBUG:root:ConvolutionLayer(conv2): sharing filters and bias
31-Aug 07:02:13:DEBUG:root:InnerProductLayer(ip1): sharing weights and bias
31-Aug 07:02:13:DEBUG:root:InnerProductLayer(ip2): sharing weights and bias
31-Aug 07:02:13:DEBUG:root:InnerProductLayer(ip3): sharing weights and bias
31-Aug 07:02:13:INFO:root:Network constructed!
31-Aug 07:02:13:INFO:root:Constructing net SVHN-validation-prediction on GPUBackend...
31-Aug 07:02:13:INFO:root:Topological sorting 9 layers...
31-Aug 07:02:13:INFO:root

## Retrain on CPU backend

In [3]:
workspace()

In [None]:
use_gpu = false
ENV["MOCHA_USE_CUDA"] = "false"
ENV["MOCHA_USE_NATIVE_EXT"] = "true"

using HDF5
using Mocha
srand(333)

EPOCH = 47
MAXITER = 90*EPOCH
base_dir = "snapshots"
path = "hdf5-data"

## Setup Neural Network configuration

In [5]:
data_layer  = AsyncHDF5DataLayer(name="train-data", source="$(path)/train.txt", batch_size=100)
conv_layer  = ConvolutionLayer(name="conv1", n_filter=96, kernel=(5,5), bottoms=[:data], tops=[:conv])
pool_layer  = PoolingLayer(name="pool1", kernel=(2,2), stride=(2,2), bottoms=[:conv], tops=[:pool])
conv2_layer = ConvolutionLayer(name="conv2", n_filter=128, kernel=(5,5), bottoms=[:pool], tops=[:conv2])
pool2_layer = PoolingLayer(name="pool2", kernel=(2,2), stride=(2,2), bottoms=[:conv2], tops=[:pool2])
fc1_layer   = InnerProductLayer(name="ip1", output_dim=2400, neuron=Neurons.ReLU(), bottoms=[:pool2], tops=[:ip1])
fc2_layer   = InnerProductLayer(name="ip2", output_dim=1200, neuron=Neurons.ReLU(), bottoms=[:ip1], tops=[:ip2])
fc3_layer   = InnerProductLayer(name="ip3", output_dim=62, bottoms=[:ip2], tops=[:out])
loss_layer  = SoftmaxLossLayer(name="loss", bottoms=[:out,:label])

backend = use_gpu ? GPUBackend() : CPUBackend()
init(backend)

# setup dropout for the different layers
# we use 20% dropout on the inputs and 50% dropout in the hidden layers
# as these values were previously found to be good defaults
drop_input = DropoutLayer(name="drop_in", bottoms=[:data], ratio=0.1)
drop_conv1 = DropoutLayer(name="drop_conv1", bottoms=[:pool], ratio=0.2)
drop_conv2 = DropoutLayer(name="drop_conv2", bottoms=[:pool2], ratio=0.2)
drop_ip1 = DropoutLayer(name="drop_ip1", bottoms=[:ip1], ratio=0.5)
drop_ip2= DropoutLayer(name="drop_ip2", bottoms=[:ip2], ratio=0.5)

common_layers = [conv_layer, pool_layer, conv2_layer, pool2_layer, fc1_layer, fc2_layer, fc3_layer]
drop_layers = [drop_input, drop_conv1, drop_conv2, drop_ip1, drop_ip2]
# put training net together, note that the correct ordering will automatically be established by the constructor
net = Net("SVHN-train", backend, [data_layer, common_layers..., drop_layers..., loss_layer])

# Configure accuracy check on train set during training process
full_data_layer = AsyncHDF5DataLayer( name="train-full-data", source="$(path)/train.txt", batch_size=4713)
full_acc_layer = AccuracyLayer(name="full_train", bottoms=[:out, :label], report_error=true)
train_net = Net("SVHN-train-prediction", backend, [full_data_layer, common_layers..., full_acc_layer])

# Configure accuracy check on validation set during training process
valid_data_layer = AsyncHDF5DataLayer(name="validation-data", source="$(path)/validation.txt", batch_size=785)
valid_acc_layer = AccuracyLayer(name="validation", bottoms=[:out, :label], report_error=true)
valid_net = Net("SVHN-validation-prediction", backend, [valid_data_layer, common_layers..., valid_acc_layer])

println(net)

params = SolverParameters(max_iter=MAXITER, 
                          regu_coef=0.0,
                          mom_policy=MomPolicy.Fixed(0.95),
                          lr_policy=LRPolicy.Inv(0.01, 0.0001, 0.75), 
                          load_from=base_dir)

solver = SGD(params)

#setup_coffee_lounge(solver, save_into="$(base_dir)/statistics.jld", every_n_iter=5000)

# report training progress every 100 iterations
add_coffee_break(solver, TrainingSummary(show_lr=true, show_mom=true), every_n_iter=100)

# Report train perfomance every 500 iterations
add_coffee_break(solver, ValidationPerformance(train_net), every_n_iter=500)

# Report validation perfomance every 500 iterations
add_coffee_break(solver, ValidationPerformance(valid_net), every_n_iter=500)

solve(solver, net)

destroy(net)
shutdown(backend)

31-Aug 07:02:51:INFO:root:Constructing net SVHN-train on CPUBackend...
31-Aug 07:02:51:INFO:root:Topological sorting 14 layers...
31-Aug 07:02:51:INFO:root:Setup layers...
31-Aug 07:02:52:INFO:root:Network constructed!
31-Aug 07:02:52:INFO:root:Constructing net SVHN-train-prediction on CPUBackend...
31-Aug 07:02:52:INFO:root:Topological sorting 9 layers...
31-Aug 07:02:52:INFO:root:Setup layers...
31-Aug 07:02:52:DEBUG:root:ConvolutionLayer(conv1): sharing filters and bias
31-Aug 07:02:52:DEBUG:root:ConvolutionLayer(conv2): sharing filters and bias
31-Aug 07:02:52:DEBUG:root:InnerProductLayer(ip1): sharing weights and bias
31-Aug 07:02:52:DEBUG:root:InnerProductLayer(ip2): sharing weights and bias
31-Aug 07:02:52:DEBUG:root:InnerProductLayer(ip3): sharing weights and bias
31-Aug 07:02:52:INFO:root:Network constructed!
31-Aug 07:02:52:INFO:root:Constructing net SVHN-validation-prediction on CPUBackend...
31-Aug 07:02:52:INFO:root:Topological sorting 9 layers...
31-Aug 07:02:52:INFO:root

Dict{String,Array{AbstractParameter,1}} with 0 entries