Skip to content

Commit

Permalink
Merge 73d9915 into 334750d
Browse files Browse the repository at this point in the history
  • Loading branch information
pluskid committed Dec 29, 2014
2 parents 334750d + 73d9915 commit b4911d8
Show file tree
Hide file tree
Showing 17 changed files with 279 additions and 15 deletions.
14 changes: 14 additions & 0 deletions docs/user-guide/layers/util-layer.rst
Expand Up @@ -49,6 +49,20 @@ Utility Layers
file. If this attribute is given, the corresponding symbol in this list is
used as the dataset name instead of the original blob's name.

.. class:: IdentityLayer

Identity layer maps inputs to outputs without changing anything. This could
be useful as glue layers to rename some blobs. There is no data-copying for
this layer.

.. attribute::
tops
bottoms

Blob names for output and input. This layer could take multiple input
blobs and produce the corresponding number of output blobs. The shapes of
the input blobs do not need to be the same.

.. class:: ReshapeLayer

Reshape a blob. Can be useful if, for example, you want to make the *flat*
Expand Down
@@ -0,0 +1 @@
../../mnist/data/test.hdf5
@@ -0,0 +1 @@
../../mnist/data/train.hdf5
@@ -0,0 +1,129 @@
################################################################################
# Configuration
################################################################################
ENV["MOCHA_USE_CUDA"] = "true"
using Mocha

n_hidden_layer = 3
n_hidden_unit = 1000
neuron = Neurons.Sigmoid()
param_key_prefix = "ip-layer"
corruption_rates = [0.1,0.2,0.3]
pretrain_epoch = 15
finetune_epoch = 1000
batch_size = 100
momentum = 0.0
pretrain_lr = 0.001
finetune_lr = 0.1

param_keys = ["$param_key_prefix-$i" for i = 1:n_hidden_layer]

################################################################################
# Construct the Net
################################################################################
srand(12345678)

backend = GPUBackend()
init(backend)

data_layer = HDF5DataLayer(name="train-data", source="data/train.txt",
batch_size=batch_size, shuffle=@windows ? false : true)
rename_layer = IdentityLayer(bottoms=[:data], tops=[:ip0])
hidden_layers = [
InnerProductLayer(name="ip-$i", param_key=param_keys[i],
output_dim=n_hidden_unit, neuron=neuron,
bottoms=[symbol("ip$(i-1)")], tops=[symbol("ip$i")])
for i = 1:n_hidden_layer
]

################################################################################
# Layerwise pre-training for hidden layers
################################################################################
for i = 1:n_hidden_layer
ae_data_layer = SplitLayer(bottoms=[symbol("ip$(i-1)")], tops=[:orig_data, :corrupt_data])
corrupt_layer = RandomMaskLayer(ratio=corruption_rates[i], bottoms=[:corrupt_data])

encode_layer = copy(hidden_layers[i], bottoms=[:corrupt_data])
recon_layer = TiedInnerProductLayer(name="tied-ip-$i", tied_param_key=param_keys[i],
tops=[:recon], bottoms=[symbol("ip$i")])
recon_loss_layer = SquareLossLayer(bottoms=[:recon, :orig_data])

da_layers = [data_layer, rename_layer, ae_data_layer, corrupt_layer, hidden_layers[1:i-1]...,
encode_layer, recon_layer, recon_loss_layer]
da = Net("Denoising-Autoencoder-$i", backend, da_layers)
println(da)

# freeze all but the layers for auto-encoder
freeze_all!(da)
unfreeze!(da, "ip-$i", "tied-ip-$i")

base_dir = "pretrain-$i"
pretrain_params = SolverParameters(max_iter=div(pretrain_epoch*60000,batch_size),
regu_coef=0.0, mom_policy=MomPolicy.Fixed(momentum),
lr_policy=LRPolicy.Fixed(pretrain_lr), load_from=base_dir)
solver = SGD(pretrain_params)

add_coffee_break(solver, TrainingSummary(), every_n_iter=1000)
add_coffee_break(solver, Snapshot(base_dir), every_n_iter=3000)
solve(solver, da)

destroy(da)
end

################################################################################
# Fine-tuning
################################################################################

pred_layer = InnerProductLayer(name="pred", output_dim=10,
bottoms=[symbol("ip$n_hidden_layer")], tops=[:pred])
loss_layer = SoftmaxLossLayer(bottoms=[:pred, :label])

net = Net("MNIST-finetune", backend, [data_layer, rename_layer, hidden_layers..., pred_layer, loss_layer])

base_dir = "finetune"
params = SolverParameters(max_iter=div(finetune_epoch*60000,batch_size),
regu_coef=0.0, mom_policy=MomPolicy.Fixed(momentum),
lr_policy=LRPolicy.Fixed(finetune_lr), load_from=base_dir)
solver = SGD(params)

setup_coffee_lounge(solver, save_into="$base_dir/statistics.jld", every_n_iter=10000)

add_coffee_break(solver, TrainingSummary(), every_n_iter=1000)
add_coffee_break(solver, Snapshot(base_dir), every_n_iter=10000)

data_layer_test = HDF5DataLayer(name="test-data", source="data/test.txt", batch_size=100)
acc_layer = AccuracyLayer(name="test-accuracy", bottoms=[:pred, :label])
test_net = Net("MNIST-finetune-test", backend, [data_layer_test, rename_layer,
hidden_layers..., pred_layer, acc_layer])
add_coffee_break(solver, ValidationPerformance(test_net), every_n_iter=5000)

solve(solver, net)

destroy(net)
destroy(test_net)

################################################################################
# Random-initialization, for comparison
################################################################################
registry_reset(backend)
net = Net("MNIST-rnd", backend, [data_layer, rename_layer, hidden_layers..., pred_layer, loss_layer])
base_dir = "randinit"

params = copy(params, load_from=base_dir)
solver = SGD(params)

setup_coffee_lounge(solver, save_into="$base_dir/statistics.jld", every_n_iter=10000)

add_coffee_break(solver, TrainingSummary(), every_n_iter=1000)
add_coffee_break(solver, Snapshot(base_dir), every_n_iter=10000)
test_net = Net("MNIST-randinit-test", backend, [data_layer_test, rename_layer,
hidden_layers..., pred_layer, acc_layer])
add_coffee_break(solver, ValidationPerformance(test_net), every_n_iter=5000)

solve(solver, net)

destroy(net)
destroy(test_net)


shutdown(backend)
13 changes: 11 additions & 2 deletions src/backend.jl
Expand Up @@ -11,13 +11,22 @@ show(io::IO, backend::Backend) = show(io, typeof(backend))
function init(backend::Backend)
end
function shutdown(backend::Backend)
registry_reset(backend)
end
function registry_reset(backend::Backend)
for (k,params) in backend.param_registry
map(destroy, params)
end
backend.param_registry = ParameterRegistry()
end
function registry_put(backend::Backend, key::String, params::Vector)
# convert Vector{Parameter} to Vector{AbstractParameter}
backend.param_registry[key] = convert(Vector{AbstractParameter}, params)
if haskey(backend.param_registry, key)
map(destroy, backend.param_registry[key])
end

# we keep a reference to the parameters, so that even those the original
# network is destroyed, we still have valid access to those trained parameters
backend.param_registry[key] = AbstractParameter[share_parameter(backend, p) for p in params]
end
function registry_get(backend::Backend, key::String)
return get(backend.param_registry, key, nothing)
Expand Down
9 changes: 9 additions & 0 deletions src/cuda/layers/square-loss.jl
Expand Up @@ -5,6 +5,15 @@ function forward(backend::GPUBackend, state::SquareLossLayerState, inputs::Vecto
data_type = eltype(pred)
n = length(pred)

pred_arr = to_array(pred)
if any(isnan(pred_arr))
error("NaN in pred")
end
label_arr = to_array(label)
if any(isnan(label_arr))
error("NaN in label")
end

copy!(state.pred_copy, pred)
CuBLAS.axpy(backend.cublas_ctx, n, convert(data_type, -1), label.ptr, 1, state.pred_copy.ptr, 1)
state.loss = 0.5/get_num(pred)*CuBLAS.dot(backend.cublas_ctx, data_type, n, state.pred_copy.ptr, 1, state.pred_copy.ptr, 1)
Expand Down
6 changes: 4 additions & 2 deletions src/layers.jl
Expand Up @@ -5,6 +5,7 @@ export InnerProductLayer, ConvolutionLayer, PoolingLayer, SoftmaxLayer
export PowerLayer, SplitLayer, ElementWiseLayer, ChannelPoolingLayer
export LRNLayer, DropoutLayer, ReshapeLayer, ArgmaxLayer, HDF5OutputLayer
export CropLayer, ConcatLayer, RandomMaskLayer, TiedInnerProductLayer
export IdentityLayer
export SquareLossLayer, SoftmaxLossLayer, MultinomialLogisticLossLayer
export AccuracyLayer

Expand Down Expand Up @@ -166,13 +167,13 @@ function show_layer(io::IO, state::LayerState, inputs::Vector{Blob})
if !is_source(state.layer)
println(io, " Inputs ----------------------------")
for i = 1:length(inputs)
println(io, " $(@sprintf("%6s", state.layer.bottoms[i])): $(inputs[i])")
println(io, " $(@sprintf("%9s", state.layer.bottoms[i])): $(inputs[i])")
end
end
if !is_sink(state.layer) && !is_inplace(state.layer)
println(io, " Outputs ---------------------------")
for i = 1:length(state.blobs)
println(io, " $(@sprintf("%6s", state.layer.tops[i])): $(state.blobs[i])")
println(io, " $(@sprintf("%9s", state.layer.tops[i])): $(state.blobs[i])")
end
end
end
Expand Down Expand Up @@ -201,6 +202,7 @@ include("layers/crop.jl")
include("layers/concat.jl")
include("layers/random-mask.jl")
include("layers/tied-inner-product.jl")
include("layers/identity.jl")

#############################################################
# Utility layers
Expand Down
2 changes: 1 addition & 1 deletion src/layers/convolution.jl
Expand Up @@ -101,7 +101,7 @@ type ConvolutionLayerState <: LayerState
@assert shared_params[1].name == "filter" && shared_params[2].name == "bias"
@assert size(shared_params[1].blob) == tuple(layer.kernel...,div(channels,layer.n_group),layer.n_filter)
@assert eltype(shared_params[1].blob) == dtype
@debug("ConvolutionLayer: sharing filters and bias")
@debug("ConvolutionLayer($(layer.name)): sharing filters and bias")

param_filter, param_bias = [share_parameter(backend, param) for param in shared_params]
else
Expand Down
34 changes: 34 additions & 0 deletions src/layers/identity.jl
@@ -0,0 +1,34 @@
############################################################
# Identity Layer
############################################################
@defstruct IdentityLayer Layer (
name :: String = "identity",
(tops :: Vector{Symbol} = Symbol[], length(tops) > 0),
(bottoms :: Vector{Symbol} = Symbol[], length(bottoms) == length(tops)),
)
@characterize_layer(IdentityLayer,
can_do_bp => true
)

type IdentityLayerState <: LayerState
layer :: IdentityLayer
blobs :: Vector{Blob}
blobs_diff :: Vector{Blob}
end

function setup(backend::Backend, layer::IdentityLayer, inputs::Vector{Blob}, diffs::Vector{Blob})
blobs = inputs[:] # shallow copy
blobs_diff = diffs[:] # shallow_copy

IdentityLayerState(layer, blobs, blobs_diff)
end
function shutdown(backend::Backend, state::IdentityLayerState)
end

function forward(backend::Backend, state::IdentityLayerState, inputs::Vector{Blob})
# do nothing
end

function backward(backend::Backend, state::IdentityLayerState, inputs::Vector{Blob}, diffs::Vector{Blob})
# do nothing
end
2 changes: 1 addition & 1 deletion src/layers/inner-product.jl
Expand Up @@ -68,7 +68,7 @@ type InnerProductLayerState <: LayerState
@assert size(shared_params[1].blob) == (fea_size, out_dim)
@assert eltype(shared_params[1].blob) == data_type
@assert size(shared_params[2].blob) == (out_dim, 1)
@debug("InnerProductLayer: sharing weights and bias")
@debug("InnerProductLayer($(layer.name)): sharing weights and bias")

param_weight, param_bias = [share_parameter(backend, param) for param in shared_params]
else
Expand Down
2 changes: 1 addition & 1 deletion src/layers/tied-inner-product.jl
Expand Up @@ -66,7 +66,7 @@ type TiedInnerProductLayerState <: LayerState
@assert length(shared_params) == 1
@assert shared_params[1].name == "bias"
@assert size(shared_params[1].blob) == (out_dim, 1)
@debug("TiedInnerProductLayer: sharing bias")
@debug("TiedInnerProductLayer($(layer.name)): sharing bias")

params = [share_parameter(backend, shared_params[1])]
else
Expand Down
38 changes: 35 additions & 3 deletions src/macros.jl
Expand Up @@ -19,11 +19,21 @@
# could be used to force user to provide a
# valid value when no meaningful default value
# is available.
#
# The macro will define a constructor that could accept
# the keyword arguments. Since the defined type is
# immutable, a "copy" function is also defined, which
# takes a prototype object, and accept extra keyword
# parameters that could be used to construct a new
# object with specified changes of fields.
#############################################################
import Base.copy
export copy
macro defstruct(name, super_name, fields)
@assert fields.head == :tuple
fields = fields.args
@assert length(fields) > 0
name = esc(name)

field_defs = Array(Expr, length(fields)) # :(field2 :: Int)
field_names = Array(Symbol, length(fields)) # :field2
Expand All @@ -48,17 +58,39 @@ macro defstruct(name, super_name, fields)
asserts = map(filter(i -> isdefined(field_asserts,i), 1:length(fields))) do i
:(@assert($(field_asserts[i])))
end
construct = Expr(:call, esc(name), field_names...)
construct = Expr(:call, name, field_names...)
ctor_body = Expr(:block, asserts..., construct)
ctor_def = Expr(:call, esc(name), Expr(:parameters, field_defaults...))
ctor_def = Expr(:call, name, Expr(:parameters, field_defaults...))
ctor = Expr(:(=), ctor_def, ctor_body)

# for copy constructor
field_assigns = Expr(:block, [:(params[symbol($(esc(string(fname))))] = proto.$fname) for fname in field_names]...)
field_expose = Expr(:block, [:($(esc(fname)) = params[symbol($(esc(string(fname))))]) for fname in field_names]...)
assert_block = Expr(:block, asserts...)
obj_construct = Expr(:call, name, field_names...)
copy_fname = esc(:copy)

quote
immutable $(esc(name)) <: $super_name
immutable $(name) <: $super_name
$type_body
end

$ctor

function $copy_fname(proto::$name; kw...)
params = Dict()
$field_assigns

for (k,v) in kw
@assert haskey(params, k) "Unrecognized field " * string(k) * " for " * $(string(name.args[1]))
params[k] = v
end

$field_expose
$assert_block

$obj_construct
end
end
end

Expand Down
4 changes: 2 additions & 2 deletions src/net.jl
Expand Up @@ -92,7 +92,7 @@ function init(net::Net)
@debug("Init network $(net.name)")
for i = 1:length(net.layers)
state = net.states[i]
if has_param(net.layers[i])
if has_param(net.layers[i]) && !is_frozen(net.states[i])
for param in state.parameters
if !isa(param.initializer, NullInitializer)
@debug("Init parameter $(param.name) for layer $(net.layers[i].name)")
Expand Down Expand Up @@ -173,7 +173,7 @@ function backward(net::Net, regu_coef :: FloatingPoint = 0.0)
backward(net.backend, net.states[i], net.blobs_forward[i], net.blobs_backward[i])

# handle regularization
if has_param(net.layers[i])
if has_param(net.layers[i]) && !is_frozen(net.states[i])
for param in net.states[i].parameters
backward(net.backend, param.regularizer, regu_coef, param.blob, param.gradient)
end
Expand Down
2 changes: 1 addition & 1 deletion src/solvers.jl
Expand Up @@ -230,7 +230,7 @@ function solve(solver::Solver, net::Net)
i_state = setup(solver, net, solver_state)

@debug("Entering solver loop")
trainable_layers = filter(i -> has_param(net.layers[i]), 1:length(net.layers))
trainable_layers = filter(i -> has_param(net.layers[i]) && !is_frozen(net.states[i]), 1:length(net.layers))
while true
solver_state.iter += 1

Expand Down
3 changes: 2 additions & 1 deletion src/solvers/nesterov.jl
Expand Up @@ -18,7 +18,8 @@ type NesterovInternalState <: SolverInternelState
end

function setup(nag::Nesterov, net::Net, solver_state::SolverState)
param_states = filter(x -> :parameters names(x), net.states)
param_states = map(i -> net.states[i],
filter(i -> has_param(net.layers[i]) && !is_frozen(net.states[i]), 1:length(net.layers)))
param_history = Array(Vector{Blob}, length(param_states))
for i = 1:length(param_states)
state = param_states[i]
Expand Down

0 comments on commit b4911d8

Please sign in to comment.