Skip to content

Commit

Permalink
Merge pull request #25 from pluskid/nd-tensor
Browse files Browse the repository at this point in the history
Cutsomizable "channel" dimension for ND-tensor
  • Loading branch information
pluskid committed Dec 19, 2014
2 parents 44f5311 + 822dabc commit 44fca43
Show file tree
Hide file tree
Showing 21 changed files with 462 additions and 330 deletions.
1 change: 1 addition & 0 deletions src/Mocha.jl
Expand Up @@ -15,6 +15,7 @@ include("exception.jl")
include("utils/blas.jl")
include("utils/math.jl")
include("utils/io.jl")
include("utils/tensor.jl")

if Config.use_native_extension
include("utils/im2col-native.jl")
Expand Down
12 changes: 6 additions & 6 deletions src/cuda/layers/accuracy.jl
@@ -1,6 +1,7 @@
function setup_etc(backend::GPUBackend, layer::AccuracyLayer, inputs)
width, height, channels, num = get_whcn(inputs[1])
etc = make_blob(backend, eltype(inputs[1]), (width,height,1,num))
function setup_etc(backend::GPUBackend, layer::AccuracyLayer, op_dim::Int, inputs)
dims = [size(inputs[1])...]
dims[op_dim] = 1
etc = make_blob(backend, eltype(inputs[1]), dims...)
return etc
end
function shutdown(backend::GPUBackend, state::AccuracyLayerState)
Expand All @@ -11,8 +12,7 @@ function forward(backend::GPUBackend, state::AccuracyLayerState, inputs::Vector{
pred = inputs[1]
label = inputs[2]

width, height, channels, num = get_whcn(pred)
spatial_dim = width*height
spatial_dim, pred_dim, num = split_dims(pred, state.op_dim)
data_type = eltype(pred)

x_block = int(ceil(float64(num)/CUDA.THREADS_PER_BLOCK_X));
Expand All @@ -26,7 +26,7 @@ function forward(backend::GPUBackend, state::AccuracyLayerState, inputs::Vector{
error("Unsupported data type $data_type")
end
CUDA.launch(kernel, (x_block,y_block),(CUDA.THREADS_PER_BLOCK_X,CUDA.THREADS_PER_BLOCK_Y),
(pred.ptr.p, label.ptr.p, state.etc.ptr.p, num, channels, spatial_dim));
(pred.ptr.p, label.ptr.p, state.etc.ptr.p, num, pred_dim, spatial_dim));

N = num * spatial_dim
accuracy = CuBLAS.dot(backend.cublas_ctx, data_type, N, state.etc.ptr, 1, state.etc.ptr, 1)
Expand Down
3 changes: 1 addition & 2 deletions src/cuda/layers/argmax.jl
Expand Up @@ -3,8 +3,7 @@ function forward(backend::GPUBackend, state::ArgmaxLayerState, inputs::Vector{Bl
input = inputs[i]
output = state.blobs[i]

width, height, channels, num = get_whcn(input)
spatial_dim = width*height
spatial_dim, channels, num = split_dims(input, state.dims[i])
data_type = eltype(input)

x_block = int(ceil(float64(num)/CUDA.THREADS_PER_BLOCK_X));
Expand Down
44 changes: 19 additions & 25 deletions src/cuda/layers/channel-pooling.jl
@@ -1,16 +1,14 @@
function setup_etc(backend::GPUBackend, layer::ChannelPoolingLayer, inputs, pooled_chann)
function setup_etc(backend::GPUBackend, layer::ChannelPoolingLayer, inputs, blobs)
if isa(layer.pooling, Pooling.Max)
masks = Array(CuPtr, length(inputs))
for i = 1:length(inputs)
masks[i] = CUDA.cualloc(Csize_t, get_width(inputs[i]) * get_height(inputs[i]) *
pooled_chann[i] * get_num(inputs[i]))
masks[i] = CUDA.cualloc(Csize_t, length(blobs[i]))
end
etc = masks
elseif isa(layer.pooling, Pooling.Mean)
integrals = Array(CuPtr, length(inputs))
for i = 1:length(inputs)
integrals[i] = CUDA.cualloc(eltype(inputs[i]), get_width(inputs[i]) * get_height(inputs[i]) *
get_chann(inputs[i]))
integrals[i] = CUDA.cualloc(eltype(inputs[i]), prod(size(inputs[i])[1:end-1]))
end
etc = integrals
else
Expand Down Expand Up @@ -39,9 +37,9 @@ function forward(backend::GPUBackend, pool::StdPoolingFunction,
output = state.blobs[i]

if isa(pool, Pooling.Max)
cuda_max_channel_pooling_forward(backend, input, output, state.etc[i], state.layer)
cuda_max_channel_pooling_forward(backend, input, output, state.etc[i], state.layer, state.op_dims[i])
elseif isa(pool, Pooling.Mean)
cuda_mean_channel_pooling_forward(backend, input, output, state.etc[i], state.layer)
cuda_mean_channel_pooling_forward(backend, input, output, state.etc[i], state.layer, state.op_dims[i])
else
error("Pooling for $pool not implemented yet")
end
Expand All @@ -59,9 +57,9 @@ function backward(backend::GPUBackend, pool::StdPoolingFunction, state::ChannelP
diff = diffs[i]
if !isa(diff, NullBlob)
if isa(pool, Pooling.Max)
cuda_max_channel_pooling_backward(backend, diff, state.blobs_diff[i], state.etc[i], state.layer)
cuda_max_channel_pooling_backward(backend, diff, state.blobs_diff[i], state.etc[i], state.layer, state.op_dims[i])
elseif isa(pool, Pooling.Mean)
cuda_mean_channel_pooling_backward(backend, diff, state.blobs_diff[i], state.layer)
cuda_mean_channel_pooling_backward(backend, diff, state.blobs_diff[i], state.layer, state.op_dims[i])
else
error("Pooling for $pool not implemented yet")
end
Expand All @@ -72,15 +70,14 @@ function backward(backend::GPUBackend, pool::StdPoolingFunction, state::ChannelP
end

function cuda_mean_channel_pooling_forward{T}(backend::GPUBackend, input::CuTensorBlob{T},
output::CuTensorBlob{T}, integral::CuPtr, layer)
output::CuTensorBlob{T}, integral::CuPtr, layer, op_dim)

width, height, channels, num = size(input)
pooled_chann = size(output, 3)
spatial_dim_T, channels, num = split_dims(input, op_dim)
pooled_chann = size(output, op_dim)
one = convert(T, 1)
neg_one = convert(T, -1)
scale = convert(T, 1.0/layer.kernel)

spatial_dim_T = width*height
spatial_dim = spatial_dim_T * sizeof(T)
fea_dim = spatial_dim * channels
output_fea_dim = spatial_dim * pooled_chann
Expand Down Expand Up @@ -118,15 +115,14 @@ function cuda_mean_channel_pooling_forward{T}(backend::GPUBackend, input::CuTens
end

function cuda_mean_channel_pooling_backward{T}(backend::GPUBackend, input::CuTensorBlob{T},
output::CuTensorBlob{T}, layer)
output::CuTensorBlob{T}, layer, op_dim)

width, height, channels, num = size(input)
pooled_chann = size(output, 3)
spatial_dim_T, channels, num = split_dims(input, op_dim)
pooled_chann = size(output, op_dim)
scale = 1/convert(T, layer.kernel)

fill!(input, 0)

spatial_dim_T = width*height
spatial_dim = spatial_dim_T * sizeof(T)
fea_dim = spatial_dim * channels
output_fea_dim = spatial_dim * pooled_chann
Expand Down Expand Up @@ -159,11 +155,10 @@ function cuda_geometry_max_chann_pool(sp_dim::Int, num::Int)

end
function cuda_max_channel_pooling_forward{T}(backend::GPUBackend, input::CuTensorBlob{T},
output::CuTensorBlob{T}, mask::CuPtr, layer)
output::CuTensorBlob{T}, mask::CuPtr, layer, op_dim)

width, height, channels, num = size(input)
sp_dim = width*height
pooled_chann = get_chann(output)
sp_dim, channels, num = split_dims(input, op_dim)
pooled_chann = size(output, op_dim)

cuda_dim = cuda_geometry_max_chann_pool(sp_dim, num);
if T == Float32
Expand All @@ -179,11 +174,10 @@ function cuda_max_channel_pooling_forward{T}(backend::GPUBackend, input::CuTenso
end

function cuda_max_channel_pooling_backward{T}(backend::GPUBackend, input::CuTensorBlob{T},
output::CuTensorBlob{T}, mask::CuPtr, layer)
output::CuTensorBlob{T}, mask::CuPtr, layer, op_dim)

width, height, channels, num = size(input)
sp_dim = width*height
pooled_chann = get_chann(output)
sp_dim, channels, num = split_dims(input, op_dim)
pooled_chann = size(output, op_dim)

cuda_dim = cuda_geometry_max_chann_pool(sp_dim, num);
if T == Float32
Expand Down
4 changes: 1 addition & 3 deletions src/cuda/layers/multinomial-logistic-loss.jl
Expand Up @@ -3,9 +3,7 @@ function forward(backend::GPUBackend, state::MultinomialLogisticLossLayerState,
label = inputs[2]
data_type = eltype(pred)

width, height, channels, num = get_whcn(pred)

spatial_dim = height*width
spatial_dim, channels, num = split_dims(pred, state.op_dim)
prob_dim = channels

x_block = int(ceil(float64(num)/CUDA.THREADS_PER_BLOCK_X))
Expand Down
4 changes: 1 addition & 3 deletions src/cuda/layers/softmax-loss.jl
Expand Up @@ -4,9 +4,7 @@ function backward(backend::GPUBackend, state::SoftmaxLossLayerState, inputs::Vec
copy!(diff, state.softmax.blobs[1])

data_type = eltype(diff)
height, width, channels, num = get_whcn(diff)

spatial_dim = height*width
spatial_dim, channels, num = split_dims(diff, state.logistic.op_dim)
prob_dim = channels

x_block = int(ceil(float64(num)/CUDA.THREADS_PER_BLOCK_X))
Expand Down
7 changes: 4 additions & 3 deletions src/cuda/layers/softmax.jl
Expand Up @@ -3,12 +3,13 @@ type CuDNNSoftmaxState
outputs_desc :: Vector{CuDNN.Tensor4dDescriptor}
end

function setup_etc(backend::GPUBackend, layer::SoftmaxLayer, data_type, inputs)
function setup_etc(backend::GPUBackend, layer::SoftmaxLayer, dims::Vector{Int}, data_type, inputs)
inputs_desc = Array(CuDNN.Tensor4dDescriptor, length(inputs))
outputs_desc = Array(CuDNN.Tensor4dDescriptor, length(inputs))
for i = 1:length(inputs)
inputs_desc[i] = CuDNN.create_tensor4d_descriptor(data_type, get_whcn(inputs[i]))
outputs_desc[i] = CuDNN.create_tensor4d_descriptor(data_type, get_whcn(inputs[i]))
dim_sp, dim_prob, dim_num = split_dims(inputs[i], dims[i])
inputs_desc[i] = CuDNN.create_tensor4d_descriptor(data_type, (1,dim_sp,dim_prob,dim_num))
outputs_desc[i] = CuDNN.create_tensor4d_descriptor(data_type, (1,dim_sp,dim_prob,dim_num))
end
etc = CuDNNSoftmaxState(inputs_desc, outputs_desc)
return etc
Expand Down
29 changes: 17 additions & 12 deletions src/layers/accuracy.jl
@@ -1,6 +1,7 @@
@defstruct AccuracyLayer Layer (
name :: String = "accuracy",
report_error :: Bool = false,
(dim :: Int = -2, dim != 0),
(bottoms :: Vector{Symbol} = Symbol[], length(bottoms) == 2),
)
@characterize_layer(AccuracyLayer,
Expand All @@ -11,18 +12,24 @@
type AccuracyLayerState <: LayerState
layer :: AccuracyLayer

op_dim :: Int
accuracy :: Float64
n_accum :: Int
etc :: Any
end

function setup_etc(backend::CPUBackend, layer::AccuracyLayer, inputs)
function setup_etc(backend::CPUBackend, layer::AccuracyLayer, op_dim::Int, inputs)
nothing
end

function setup(backend::Backend, layer::AccuracyLayer, inputs::Vector{Blob}, diffs::Vector{Blob})
etc = setup_etc(backend, layer, inputs)
return AccuracyLayerState(layer, 0.0, 0, etc)
total_dim = ndims(inputs[1])
dim = layer.dim < 0 ? layer.dim + total_dim + 1 : layer.dim
@assert 1 <= dim <= total_dim
@assert dim != total_dim

etc = setup_etc(backend, layer, dim, inputs)
return AccuracyLayerState(layer, dim, 0.0, 0, etc)
end
function shutdown(backend::CPUBackend, state::AccuracyLayerState)
end
Expand All @@ -48,20 +55,18 @@ function forward(backend::CPUBackend, state::AccuracyLayerState, inputs::Vector{
pred = inputs[1].data
label = inputs[2].data

width, height, channels, num = get_whcn(pred)
canonical_pred = reshape(pred, (width,height,channels,num))
canonical_label = reshape(label, (width,height,1,num))
dim_pre, dim_prob, dim_post = split_dims(pred, state.op_dim)

accuracy = 0.0
for w = 1:width
for h = 1:height
for n = 1:num
if int(canonical_label[w,h,1,n])+1 == indmax(canonical_pred[w,h,:,n])
accuracy += 1.0
end
for i = 0:dim_pre-1
for j = 0:dim_post-1
idx = Int[i + dim_pre*(k + dim_prob*j) for k=0:dim_prob-1] + 1
@inbounds if int(label[i + dim_pre*j + 1])+1 == indmax(pred[idx])
accuracy += 1.0
end
end
end

state.accuracy = float64(state.accuracy * state.n_accum + accuracy) / (state.n_accum + length(label))
state.n_accum += length(label)
end
Expand Down
47 changes: 27 additions & 20 deletions src/layers/argmax.jl
@@ -1,45 +1,52 @@
@defstruct ArgmaxLayer Layer (
name :: String = "argmax",
(dim :: Int = -2, dim != 0),
(tops :: Vector{Symbol} = Symbol[], length(tops) > 0),
(bottoms :: Vector{Symbol} = Symbol[], length(bottoms) == length(tops)),
)

type ArgmaxLayerState <: LayerState
layer :: ArgmaxLayer
blobs :: Vector{Blob}

dims :: Vector{Int}
end

function setup(backend::Backend, layer::ArgmaxLayer, inputs::Vector{Blob}, diffs::Vector{Blob})
blobs = map(inputs) do input
width, height, channels, num = get_whcn(input)
data_type = eltype(input)

blob = make_blob(backend, data_type, width, height, 1, num)
blob
dims = Array(Int, length(inputs))
blobs = Array(Blob, length(inputs))
for i = 1:length(inputs)
total_dim = ndims(inputs[i])
dim = layer.dim < 0 ? layer.dim + total_dim + 1 : layer.dim
@assert 1 <= dim <= total_dim
@assert dim != total_dim
dims[i] = dim
shape = [size(inputs[i])...]
shape[dim] = 1
blobs[i] = make_blob(backend, eltype(inputs[i]), shape...)
end

return ArgmaxLayerState(layer, blobs)
return ArgmaxLayerState(layer, blobs, dims)
end

function forward(backend::CPUBackend, state::ArgmaxLayerState, inputs::Vector{Blob})
for i = 1:length(inputs)
input = inputs[i].data
output = state.blobs[i].data
width, height, channels, num = get_whcn(input)
canonical_input = reshape(input, (width,height,channels,num))
for n = 1:num
for w = 1:width
for h = 1:height
maxc = 1; maxval = canonical_input[w,h,maxc,n]
for c = 2:channels
@inbounds val = canonical_input[w,h,c,n]
if val > maxval
maxval = val
maxc = c
end
pre_dim, mid_dim, post_dim = split_dims(input, state.dims[i])
for x = 0:pre_dim-1
for z = 0:post_dim-1
idx = Int[x + pre_dim*(y + mid_dim*z) for y=0:mid_dim-1] + 1
maxc = 1
@inbounds maxval = input[idx[1]]
for y = 2:length(idx)
@inbounds val = input[idx[y]]
if val > maxval
maxval = val
maxc = y
end
@inbounds output[w,h,1,n] = maxc-1
end
@inbounds output[x + pre_dim*z + 1] = maxc-1
end
end
end
Expand Down

0 comments on commit 44fca43

Please sign in to comment.