Skip to content

Commit

Permalink
ported SpatialUpSamplingNearest from cunn
Browse files Browse the repository at this point in the history
  • Loading branch information
pawni committed Feb 11, 2016
1 parent 19ab3b5 commit 091882d
Show file tree
Hide file tree
Showing 8 changed files with 388 additions and 5 deletions.
5 changes: 3 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ SET(CMAKE_CXX_FLAGS "-std=c++0x -Wall")

execute_process( COMMAND git log -n 1 --pretty=%h OUTPUT_VARIABLE git_commit OUTPUT_STRIP_TRAILING_WHITESPACE)
#execute_process( COMMAND echo string commit="${git_commit}" > ${CMAKE_CURRENT_SOURCE_DIR}/commit.h )
file(GENERATE OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/clnn_commit_generated.h
file(GENERATE OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/clnn_commit_generated.h
CONTENT "const char *clnn_commit=\"${git_commit}\";\n"
)

Expand All @@ -29,9 +29,10 @@ message("Torch_INSTALL_LIB ${Torch_INSTALL_LIB}")

set(src init.cpp utils.cpp
)

set(luasrc init.lua MSECriterion.lua Pointwise.lua Threshold.lua
LogSoftMax.lua ClassNLLCriterion.lua StatefulTimer.lua THCLNN.lua
Narrow.lua CMulTable.lua test.lua test/testSpatialMaxPooling.lua test/testSpatialConvolutionMM.lua
SpatialUpSamplingNearest.lua Narrow.lua CMulTable.lua test.lua test/testSpatialMaxPooling.lua test/testSpatialConvolutionMM.lua
test/testMSECriterion.lua test/testSoftMax.lua test/testLogSoftMax.lua test/testSpatialAveragePooling.lua)
ADD_TORCH_PACKAGE(clnn "${src}" "${luasrc}" )

Expand Down
41 changes: 41 additions & 0 deletions SpatialUpSamplingNearest.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
require 'nn'

nn.SpatialUpSamplingNearest.baseUpdateOutput = nn.SpatialUpSamplingNearest.updateOutput
nn.SpatialUpSamplingNearest.baseUpdateGradInput = nn.SpatialUpSamplingNearest.updateGradInput

function nn.SpatialUpSamplingNearest:updateOutput(input)
if torch.type(input) ~= 'torch.ClTensor' then
return self:baseUpdateOutput(input, target)
end
if input:dim() ~= 4 and input:dim() ~= 3 then
error('SpatialUpSamplingNearest only support 3D or 4D tensors')
end
-- Copy the input size
local xdim = input:dim()
local ydim = input:dim() - 1
for i = 1, input:dim() do
self.inputSize[i] = input:size(i)
self.outputSize[i] = input:size(i)
end
self.outputSize[ydim] = self.outputSize[ydim] * self.scale_factor
self.outputSize[xdim] = self.outputSize[xdim] * self.scale_factor
-- Resize the output if needed
if input:dim() == 3 then
self.output:resize(self.outputSize[1], self.outputSize[2],
self.outputSize[3])
else
self.output:resize(self.outputSize)
end
input.THNN.SpatialUpSamplingNearest_updateOutput(input:cdata(), self.output:cdata(), self.scale_factor)

return self.output
end

function nn.SpatialUpSamplingNearest:updateGradInput(input, gradOutput)
if torch.type(input) ~= 'torch.ClTensor' then
return self:baseUpdateGradInput(input, gradOutput)
end
self.gradInput:resizeAs(input)
input.THNN.SpatialUpSamplingNearest_updateGradInput(input:cdata(), gradOutput:cdata(), self.gradInput:cdata(), self.scale_factor)
return self.gradInput
end
13 changes: 12 additions & 1 deletion THCLNN.lua
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,18 @@ TH_API void THNN_ClSoftMax_updateGradInput(
THClTensor *gradOutput,
THClTensor *gradInput,
THClTensor *output);
TH_API void THNN_ClSpatialUpSamplingNearest_updateOutput(
THClState *state,
THClTensor *input,
THClTensor *output,
int scale_factor);
TH_API void THNN_ClSpatialUpSamplingNearest_updateGradInput(
THClState *state,
THClTensor *input,
THClTensor *gradOutput,
THClTensor *gradInput,
int scale_factor);
]]

local preprocessed = string.gsub(THCLNN_h, 'TH_API ', '')
Expand All @@ -154,4 +166,3 @@ THNN.kernels['torch.ClTensor'] = THNN.bind(THCLNN.C, function_names, 'Cl', THCLN
torch.getmetatable('torch.ClTensor').THNN = THNN.kernels['torch.ClTensor']

return THCLNN

1 change: 1 addition & 0 deletions init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ include 'CMulTable.lua'

include 'test.lua'

include 'SpatialUpSamplingNearest.lua'
3 changes: 1 addition & 2 deletions lib/THCLNN/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ ENDIF()

#FILE(GLOB src-cl *.cpp)
set(src-cl Abs.cpp SpatialConvolutionMM.cpp ELU.cpp SpatialAveragePooling.cpp SpatialMaxPooling.cpp
SoftMax.cpp Tanh.cpp common.cpp
SoftMax.cpp Tanh.cpp common.cpp SpatialUpSamplingNearest.cpp
)

add_library(THCLNN MODULE ${src-cl})
Expand All @@ -21,4 +21,3 @@ TARGET_LINK_LIBRARIES(THCLNN THCl TH EasyCL)

INSTALL(TARGETS THCLNN
LIBRARY DESTINATION ${THCLNN_INSTALL_LIB_SUBDIR})

69 changes: 69 additions & 0 deletions lib/THCLNN/SpatialUpSamplingNearest.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// from SpatialUpSamplingNearest.cu:

/*__device__*/ int translate_idx(int ii, int d1, int d2, int d3, int scale_factor)
{
int x, y, z, w;
w = ii % d3;
ii = ii/d3;
z = ii % d2;
ii = ii/d2;
y = ii % d1;
ii = ii/d1;
x = ii;
w = w/scale_factor;
z = z/scale_factor;
d2 /= scale_factor;
d3 /= scale_factor;
return (((x*d1+y)*d2)+z)*d3+w;

}
/*__device__*/ int translate_idx_inv(int ii, int d1, int d2, int d3, int scale_factor, int off_x, int off_y)
{
int x, y, z, w;
w = ii % d3;
ii = ii/d3;
z = ii % d2;
ii = ii/d2;
y = ii % d1;
ii = ii/d1;
x = ii;
w = w*scale_factor+off_x;
z = z*scale_factor+off_y;
d2 *= scale_factor;
d3 *= scale_factor;
return (((x*d1+y)*d2)+z)*d3+w;

}

kernel void upscale(global float *input_data, int input_offset, global float *output_data, int output_offset, long no_elements,
int scale_factor, int d1, int d2, int d3)
{
global float *input = input_data + input_offset;
global float *output = output_data + output_offset;
// output offset:
long ii = get_local_id(0) + get_local_size(0) * get_group_id(0);
ii += get_local_id(1) + get_local_size(1) * (get_local_size(0) * get_num_groups(0)) * get_group_id(1);
if (ii >= no_elements) return;
int ipidx = translate_idx(ii, d1, d2, d3, scale_factor);
output[ii]=input[ipidx];
}

/*
* Description:
*/
kernel void downscale(global float *gradInput_data_data, int gradInput_data_offset, global float *gradOutput_data_data, int gradOutput_data_offset, long no_elements,
int scale_factor, int d1, int d2, int d3)
{
global float *gradInput_data = gradInput_data_data + gradInput_data_offset;
global float *gradOutput_data = gradOutput_data_data + gradOutput_data_offset;
// output offset:
long ii = get_local_id(0) + get_local_size(0) * get_group_id(0);
ii += get_local_id(1) + get_local_size(1) * (get_local_size(0) * get_num_groups(0)) * get_group_id(1);
if (ii >= no_elements) return;
for (int i=0; i < scale_factor; i++){
for(int j=0; j < scale_factor; j++){
int ipidx = translate_idx_inv(ii, d1, d2, d3, scale_factor, i, j);
gradInput_data[ii] += gradOutput_data[ipidx];
}
}
}
Loading

0 comments on commit 091882d

Please sign in to comment.