Skip to content

Commit

Permalink
Merge pull request #31 from pluskid/dA
Browse files Browse the repository at this point in the history
Freeze Layer Parameters
  • Loading branch information
pluskid committed Dec 23, 2014
2 parents 9b4bd0c + 18e5f6d commit 8dd63e8
Show file tree
Hide file tree
Showing 9 changed files with 191 additions and 53 deletions.
22 changes: 12 additions & 10 deletions src/cuda/layers/convolution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -104,17 +104,19 @@ function backward(backend::GPUBackend, state::ConvolutionLayerState, inputs::Vec
top_diff = state.blobs_diff[i]

for g = 1:state.layer.n_group
# gradient w.r.t. bias
CuDNN.convolution_backward_bias(backend.cudnn_ctx, alpha,
state.etc.outputs_desc[i], CuPtr(top_diff.ptr.p + state.etc.top_offset * (g-1)),
beta_accumulate, state.etc.bias_desc, CuPtr(state.∇bias.ptr.p + state.etc.bias_offset * (g-1)))
if !state.frozen
# gradient w.r.t. bias
CuDNN.convolution_backward_bias(backend.cudnn_ctx, alpha,
state.etc.outputs_desc[i], CuPtr(top_diff.ptr.p + state.etc.top_offset * (g-1)),
beta_accumulate, state.etc.bias_desc, CuPtr(state.∇bias.ptr.p + state.etc.bias_offset * (g-1)))

# gradient w.r.t. weights
CuDNN.convolution_backward_filter(backend.cudnn_ctx, alpha,
state.etc.inputs_desc[i], CuPtr(bottom.ptr.p + state.etc.bottom_offset * (g-1)),
state.etc.outputs_desc[i], CuPtr(top_diff.ptr.p + state.etc.top_offset * (g-1)),
state.etc.conv_desc[i],
beta_accumulate, state.etc.filter_desc, CuPtr(state.∇filter.ptr.p + state.etc.weight_offset * (g-1)))
# gradient w.r.t. weights
CuDNN.convolution_backward_filter(backend.cudnn_ctx, alpha,
state.etc.inputs_desc[i], CuPtr(bottom.ptr.p + state.etc.bottom_offset * (g-1)),
state.etc.outputs_desc[i], CuPtr(top_diff.ptr.p + state.etc.top_offset * (g-1)),
state.etc.conv_desc[i],
beta_accumulate, state.etc.filter_desc, CuPtr(state.∇filter.ptr.p + state.etc.weight_offset * (g-1)))
end

# gradient w.r.t. bottom data
if isa(diffs[i], CuTensorBlob)
Expand Down
13 changes: 8 additions & 5 deletions src/cuda/layers/inner-product.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,15 @@ function backward(backend::GPUBackend, state::InnerProductLayerState, inputs::Ve
input = inputs[i]
batch_size = get_num(input)
∂f_∂o = state.blobs_diff[i]
CuBLAS.gemm(backend.cublas_ctx, CuBLAS.OP_N, CuBLAS.OP_T, source_dim, target_dim, batch_size,
convert(data_type, 1), input.ptr, source_dim, ∂f_∂o.ptr, target_dim, zero_and_then_one, state.∇W.ptr, source_dim)

# ∂f/∂b = sum(∂f/∂o, 2)
CuBLAS.gemm(backend.cublas_ctx, CuBLAS.OP_N, CuBLAS.OP_N, target_dim, 1, batch_size,
convert(data_type, 1), ∂f_∂o.ptr, target_dim, state.bias_multipliers[i].ptr, batch_size, zero_and_then_one, state.∇b.ptr, target_dim)
if !state.frozen
CuBLAS.gemm(backend.cublas_ctx, CuBLAS.OP_N, CuBLAS.OP_T, source_dim, target_dim, batch_size,
one(data_type), input.ptr, source_dim, ∂f_∂o.ptr, target_dim, zero_and_then_one, state.∇W.ptr, source_dim)

# ∂f/∂b = sum(∂f/∂o, 2)
CuBLAS.gemm(backend.cublas_ctx, CuBLAS.OP_N, CuBLAS.OP_N, target_dim, 1, batch_size,
one(data_type), ∂f_∂o.ptr, target_dim, state.bias_multipliers[i].ptr, batch_size, zero_and_then_one, state.∇b.ptr, target_dim)
end

zero_and_then_one = convert(data_type, 1)

Expand Down
8 changes: 5 additions & 3 deletions src/cuda/layers/tied-inner-product.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@ function backward(backend::GPUBackend, state::TiedInnerProductLayerState, inputs
batch_size = get_num(input)
∂f_∂o = state.blobs_diff[i]

# ∂f/∂b = sum(∂f/∂o, 2)
CuBLAS.gemm(backend.cublas_ctx, CuBLAS.OP_N, CuBLAS.OP_N, recon_dim, 1, batch_size,
convert(data_type, 1), ∂f_∂o.ptr, recon_dim, state.bias_multipliers[i].ptr, batch_size, zero_and_then_one, state.∇b.ptr, recon_dim)
if !state.frozen
# ∂f/∂b = sum(∂f/∂o, 2)
CuBLAS.gemm(backend.cublas_ctx, CuBLAS.OP_N, CuBLAS.OP_N, recon_dim, 1, batch_size,
convert(data_type, 1), ∂f_∂o.ptr, recon_dim, state.bias_multipliers[i].ptr, batch_size, zero_and_then_one, state.∇b.ptr, recon_dim)
end

zero_and_then_one = one(data_type)

Expand Down
15 changes: 15 additions & 0 deletions src/layers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export setup, forward, backward, shutdown

export get_param_key
export reset_statistics, show_statistics
export freeze!, unfreeze!, is_frozen

############################################################
# Implementing a Layer
Expand Down Expand Up @@ -125,6 +126,20 @@ function param_key(layer::Layer)
return isempty(key) ? layer.name : key
end

# If a layer state is frozen, its parameters will not be trained, also there is no
# need to compute the gradients for the parameters
function freeze!(state::LayerState)
@assert !has_param(state.layer) "Layers with parameters should implement their own freeze function"
# freeze has no effects for layers without parameters
end
function unfreeze!(state::LayerState)
@assert !has_param(state.layer) "Layers with parameters should implement their own unfreeze function"
end
function is_frozen(state::LayerState)
@assert !has_param(state.layer) "Layers with parameters should implement their own is_frozen function"
false # layers without parameters are never frozen
end

#############################################################
# Display layers
#############################################################
Expand Down
58 changes: 37 additions & 21 deletions src/layers/convolution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ type ConvolutionLayerState <: LayerState

state.etc = etc

state.frozen = false

return state
end

Expand All @@ -138,6 +140,18 @@ type ConvolutionLayerState <: LayerState
width_out :: Int

etc :: Any # whatever status a computation backend needs to maintain

frozen :: Bool
end

function freeze!(state::ConvolutionLayerState)
state.frozen = true
end
function unfreeze!(state::ConvolutionLayerState)
state.frozen = false
end
function is_frozen(state::ConvolutionLayerState)
state.frozen
end

function setup(backend::Backend, layer::ConvolutionLayer, shared_state, inputs::Vector{Blob}, diffs::Vector{Blob})
Expand Down Expand Up @@ -206,29 +220,31 @@ function backward(backend::CPUBackend, state::ConvolutionLayerState, inputs::Vec
top_offset = state.etc.M * state.etc.N * sizeof(dtype)
top_img_offset = state.height_out * state.width_out * state.layer.n_filter * sizeof(dtype)

for n = 1:num
top_diff_ptr = convert(Ptr{dtype}, top_diff.data) + top_img_offset * (n-1)
if !state.frozen
for n = 1:num
top_diff_ptr = convert(Ptr{dtype}, top_diff.data) + top_img_offset * (n-1)

#----------------------------------------------
# bias gradient
RawBLAS.gemv!('T', state.etc.M, state.layer.n_filter, convert(dtype, 1), top_diff_ptr,
state.etc.bias_multiplier.data, convert(dtype, 1), pointer(state.∇bias.data))
#----------------------------------------------
# bias gradient
RawBLAS.gemv!('T', state.etc.M, state.layer.n_filter, convert(dtype, 1), top_diff_ptr,
state.etc.bias_multiplier.data, convert(dtype, 1), pointer(state.∇bias.data))

#----------------------------------------------
# filter gradient
if isa(state.etc.col_buffer, NullBlob)
col_buffer = convert(Ptr{dtype}, input.data) + img_offset * (n-1)
else
col_buffer = state.etc.col_buffer.data
im2col(input.data, n, col_buffer,
width, height, channels, state.layer.kernel, state.layer.pad, state.layer.stride)
col_buffer = convert(Ptr{dtype}, col_buffer)
end
for g = 1:state.layer.n_group
RawBLAS.gemm!('T', 'N', state.etc.K, state.etc.N, state.etc.M, convert(dtype, 1),
col_buffer + col_offset * (g-1),
top_diff_ptr + top_offset * (g-1), convert(dtype, 1),
convert(Ptr{dtype}, pointer(state.∇filter.data)) + weight_offset * (g-1))
#----------------------------------------------
# filter gradient
if isa(state.etc.col_buffer, NullBlob)
col_buffer = convert(Ptr{dtype}, input.data) + img_offset * (n-1)
else
col_buffer = state.etc.col_buffer.data
im2col(input.data, n, col_buffer,
width, height, channels, state.layer.kernel, state.layer.pad, state.layer.stride)
col_buffer = convert(Ptr{dtype}, col_buffer)
end
for g = 1:state.layer.n_group
RawBLAS.gemm!('T', 'N', state.etc.K, state.etc.N, state.etc.M, convert(dtype, 1),
col_buffer + col_offset * (g-1),
top_diff_ptr + top_offset * (g-1), convert(dtype, 1),
convert(Ptr{dtype}, pointer(state.∇filter.data)) + weight_offset * (g-1))
end
end
end

Expand Down
28 changes: 22 additions & 6 deletions src/layers/inner-product.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ type InnerProductLayerState <: LayerState
# a all-1 vector used in gemm to help bias calculation
bias_multipliers :: Vector{Blob}

frozen :: Bool

InnerProductLayerState(backend::Backend, layer::InnerProductLayer, shared_params, inputs::Vector{Blob}) = begin
fea_size = get_fea_size(inputs[1])
data_type = eltype(inputs[1])
Expand Down Expand Up @@ -81,11 +83,22 @@ type InnerProductLayerState <: LayerState
state.b = param_bias.blob
state.∇b = param_bias.gradient
state.parameters = [param_weight, param_bias]
state.frozen = false

return state
end
end

function freeze!(state::InnerProductLayerState)
state.frozen = true
end
function unfreeze!(state::InnerProductLayerState)
state.frozen = false
end
function is_frozen(state::InnerProductLayerState)
state.frozen
end

function setup(backend::Backend, layer::InnerProductLayer, shared_state, inputs::Vector{Blob}, diffs::Vector{Blob})
state = InnerProductLayerState(backend, layer, shared_state, inputs)
return state
Expand Down Expand Up @@ -131,13 +144,16 @@ function backward(backend::CPUBackend, state::InnerProductLayerState, inputs::Ve
input = inputs[i]
batch_size = get_num(input)
∂f_∂o = state.blobs_diff[i]
BLAS.gemm!('N', 'T', one(data_type), reshape(input.data, (source_dim, batch_size)),
∂f_∂o.data, zero_and_then_one, state.∇W.data)

# ∂f/∂b = sum(∂f/∂o, 2)
BLAS.gemm!('N', 'N', one(data_type), ∂f_∂o.data,
reshape(state.bias_multipliers[i].data, (batch_size, 1)),
zero_and_then_one, state.∇b.data)
if !state.frozen
BLAS.gemm!('N', 'T', one(data_type), reshape(input.data, (source_dim, batch_size)),
∂f_∂o.data, zero_and_then_one, state.∇W.data)

# ∂f/∂b = sum(∂f/∂o, 2)
BLAS.gemm!('N', 'N', one(data_type), ∂f_∂o.data,
reshape(state.bias_multipliers[i].data, (batch_size, 1)),
zero_and_then_one, state.∇b.data)
end

zero_and_then_one = one(data_type)

Expand Down
23 changes: 19 additions & 4 deletions src/layers/tied-inner-product.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ type TiedInnerProductLayerState <: LayerState
# a all-1 vector used in gemm to help bias calculation
bias_multipliers :: Vector{Blob}

frozen :: Bool

TiedInnerProductLayerState(backend::Backend, layer::TiedInnerProductLayer, shared_params, inputs::Vector{Blob}) = begin
fea_size = get_fea_size(inputs[1])
data_type = eltype(inputs[1])
Expand Down Expand Up @@ -76,11 +78,22 @@ type TiedInnerProductLayerState <: LayerState
state.b = params[1].blob
state.∇b = params[1].gradient
state.parameters = params
state.frozen = false

return state
end
end

function freeze!(state::TiedInnerProductLayerState)
state.frozen = true
end
function unfreeze!(state::TiedInnerProductLayerState)
state.frozen = false
end
function is_frozen(state::TiedInnerProductLayerState)
state.frozen
end

function setup(backend::Backend, layer::TiedInnerProductLayer, shared_state, inputs::Vector{Blob}, diffs::Vector{Blob})
TiedInnerProductLayerState(backend, layer, shared_state, inputs)
end
Expand Down Expand Up @@ -129,10 +142,12 @@ function backward(backend::CPUBackend, state::TiedInnerProductLayerState, inputs
batch_size = get_num(input)
∂f_∂o = state.blobs_diff[i]

# ∂f/∂b = sum(∂f/∂o, 2)
BLAS.gemm!('N', 'N', one(data_type), ∂f_∂o.data,
reshape(state.bias_multipliers[i].data, (batch_size, 1)), zero_and_then_one,
state.∇b.data)
if !state.frozen
# ∂f/∂b = sum(∂f/∂o, 2)
BLAS.gemm!('N', 'N', one(data_type), ∂f_∂o.data,
reshape(state.bias_multipliers[i].data, (batch_size, 1)), zero_and_then_one,
state.∇b.data)
end

zero_and_then_one = one(data_type)

Expand Down
50 changes: 50 additions & 0 deletions src/net.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
export Net
export init, destroy, forward, backward, forward_backward, get_epoch, check_bp_topology
export get_layer, get_layer_state, freeze!, unfreeze!, freeze_all!, unfreeze_all!
export show_statistics, reset_statistics

type Net{T <: Backend}
Expand Down Expand Up @@ -38,6 +39,55 @@ function get_epoch(net::Net)
return net.states[net.data_layers[1]].epoch
end

function get_layer(net::Net, idx::Int)
net.layers[idx]
end
function get_layer_index(net::Net, name::String)
index = filter(i -> net.layers[i].name == name, 1:length(net.layers))
@assert length(index) == 1
index[1]
end
function get_layer(net::Net, name::String)
net.layers[get_layer_index(net, name)]
end

function get_layer_state(net::Net, idx::Int)
net.states[idx]
end
function get_layer_state(net::Net, name::String)
net.states[get_layer_index(net, name)]
end

function freeze!(net::Net) end
function freeze!(net::Net, idx::Int...)
for i in idx
freeze!(get_layer_state(net, i))
end
end
function freeze!(net::Net, names::String...)
for name in names
freeze!(get_layer_state(net, name))
end
end

function unfreeze!(net::Net) end
function unfreeze!(net::Net, idx::Int...)
for i in idx
unfreeze!(get_layer_state(net, i))
end
end
function unfreeze!(net::Net, names::String...)
for name in names
unfreeze!(get_layer_state(net, name))
end
end
function freeze_all!(net::Net)
map(freeze!, net.states)
end
function unfreeze_all!(net::Net)
map(unfreeze!, net.states)
end

function init(net::Net)
@debug("Init network $(net.name)")
for i = 1:length(net.layers)
Expand Down
27 changes: 23 additions & 4 deletions test/layers/convolution.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
function test_convolution_layer(backend::Backend, n_group, filter_w, filter_h, pad_w, pad_h, stride_w, stride_h, n_input, T, eps)
println("-- Testing Convolution on $(typeof(backend)){$T} filter=$((filter_w,filter_h))...")
function test_convolution_layer(backend::Backend, n_group, filter_w, filter_h, pad_w, pad_h, stride_w, stride_h, n_input, freeze, T, eps)
println("-- Testing Convolution(frozen=$freeze) on $(typeof(backend)){$T} filter=$((filter_w,filter_h))...")
println(" > Setup")
input_w = 16
input_h = 10
Expand Down Expand Up @@ -43,6 +43,9 @@ function test_convolution_layer(backend::Backend, n_group, filter_w, filter_h, p
copy!(state.blobs_diff[i], top_diff[i])
end

if freeze
freeze!(state)
end
backward(backend, state, inputs, data_diffs)

grad_filter_exp = zeros(T, filter_dims)
Expand All @@ -61,8 +64,20 @@ function test_convolution_layer(backend::Backend, n_group, filter_w, filter_h, p
grad_bias_got = similar(grad_bias_exp)
copy!(grad_filter_got, state.∇filter)
copy!(grad_bias_got, state.∇bias)
@test all(abs(grad_filter_exp - grad_filter_got) .< eps)
@test all(abs(grad_bias_exp - grad_bias_got) .< eps)

is_grad_filter_match = all(abs(grad_filter_exp - grad_filter_got) .< eps)
is_grad_bias_match = all(abs(grad_bias_exp - grad_bias_got) .< eps)

if freeze
# when frozen, the gradients are not computed, so the got value
# should be random un-initialized values, so should not match (with
# very high probability)
@test !is_grad_bias_match
@test !is_grad_filter_match
else
@test is_grad_filter_match
@test is_grad_bias_match
end

shutdown(backend, state)
end
Expand Down Expand Up @@ -163,6 +178,10 @@ function convolution_backward(state, filter::Array, bias::Array, input::Array, t
return (∇filter, ∇bias, ∇input)
end

function test_convolution_layer(backend::Backend, n_group, filter_w, filter_h, pad_w, pad_h, stride_w, stride_h, n_input, T, eps)
test_convolution_layer(backend, n_group, filter_w, filter_h, pad_w, pad_h, stride_w, stride_h, n_input, true, T, eps)
test_convolution_layer(backend, n_group, filter_w, filter_h, pad_w, pad_h, stride_w, stride_h, n_input, false, T, eps)
end
function test_convolution_layer(backend::Backend, n_input, T, eps)
test_convolution_layer(backend, 2, 3, 4, 2, 2, 1, 2, n_input, T, eps)
test_convolution_layer(backend, 1, 1, 1, 0, 0, 1, 1, n_input, T, eps)
Expand Down

0 comments on commit 8dd63e8

Please sign in to comment.