Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge branch 'cudaconv'
  • Loading branch information
clementfarabet committed Sep 6, 2012
2 parents e35a228 + 4086394 commit 9143bb0
Show file tree
Hide file tree
Showing 8 changed files with 700 additions and 1 deletion.
417 changes: 417 additions & 0 deletions extra/cuda/lib/THC/THCTensorConv.cu

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions extra/cuda/lib/THC/THCTensorConv.h
Expand Up @@ -15,4 +15,8 @@ TH_API void THCudaTensor_conv2DRevgerm(THCudaTensor *output, float beta, float a
THCudaTensor *input, THCudaTensor *kernel,
long srow, long scol);

TH_API void THCudaTensor_conv2Dmap(THCudaTensor *output, THCudaTensor *input,
THCudaTensor *kernel, long stride_x, long stride_y
, THCudaTensor *table, long fanin);

#endif
57 changes: 57 additions & 0 deletions extra/cuda/pkg/cunn/SpatialConvolutionMap.cu
@@ -0,0 +1,57 @@
static int cunn_SpatialConvolutionMap_updateOutput(lua_State *L)
{
THCudaTensor *input = (THCudaTensor*)
luaT_checkudata(L, 2, torch_CudaTensor_id);
int dW = luaT_getfieldcheckint(L, 1, "dW");
int dH = luaT_getfieldcheckint(L, 1, "dH");
THCudaTensor *weight = (THCudaTensor*)
luaT_getfieldcheckudata(L, 1, "weight", torch_CudaTensor_id);
THCudaTensor *bias = (THCudaTensor*)
luaT_getfieldcheckudata(L, 1, "bias", torch_CudaTensor_id);
THCudaTensor *output = (THCudaTensor*)
luaT_getfieldcheckudata(L, 1, "output", torch_CudaTensor_id);
THCudaTensor *connTableRev = (THCudaTensor*)
luaT_getfieldcheckudata(L, 1, "connTableRev", torch_CudaTensor_id);
luaL_argcheck(L, connTableRev->nDimension == 3, 2,
"Reverse table not generated (is table fixed fanin?)");
luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor is expected");

int dimw = 2;
int dimh = 1;

// long nOutputPlane = weight->size[0];
int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
long kW = weight->size[2];
long kH = weight->size[1];
long inputWidth = input->size[dimw];
long inputHeight = input->size[dimh];
long outputWidth = (inputWidth - kW) / dW + 1;
long outputHeight = (inputHeight - kH) / dH + 1;
long fanin = weight->size[0] / nOutputPlane;
THCudaTensor_resize3d(output, nOutputPlane, outputHeight, outputWidth);
/* add bias first */
long k;
THCudaTensor *outputPlane = THCudaTensor_new();
for(k=0; k<nOutputPlane; k++) {
THCudaTensor_select(outputPlane, output, 0, k);
THCudaTensor_fill(outputPlane, THCudaTensor_get1d(bias, k));
}
THCudaTensor_free(outputPlane);
/* do convolutions */
THCudaTensor_conv2Dmap(output, input, weight, dW, dH,
connTableRev,fanin);
return 1;
}

static const struct luaL_Reg cunn_SpatialConvolutionMap__ [] = {
{"SpatialConvolutionMap_updateOutput", cunn_SpatialConvolutionMap_updateOutput},

{NULL, NULL}
};

static void cunn_SpatialConvolutionMap_init(lua_State *L)
{
luaT_pushmetaclass(L, torch_CudaTensor_id);
luaT_registeratname(L, cunn_SpatialConvolutionMap__, "nn");
lua_pop(L,1);
}
75 changes: 75 additions & 0 deletions extra/cuda/pkg/cunn/Sqrt.cu
@@ -0,0 +1,75 @@
struct sqrtupdateOutput_functor
{
const double bias;

sqrtupdateOutput_functor(double bias_) : bias(bias_) {}

__host__ __device__ float operator()(const float& input) const
{
return sqrt(input+bias);
}
};

static int cunn_Sqrt_updateOutput(lua_State *L)
{
double bias = luaT_getfieldchecknumber(L,1,"eps");
THCudaTensor *input = (THCudaTensor*)luaT_checkudata(L, 2, torch_CudaTensor_id);
THCudaTensor *output = (THCudaTensor*)luaT_getfieldcheckudata(L, 1, "output", torch_CudaTensor_id);
long size = THCudaTensor_nElement(input);

input = THCudaTensor_newContiguous(input);

THCudaTensor_resizeAs(output, input);

thrust::device_ptr<float> output_data(THCudaTensor_data(output));
thrust::device_ptr<float> input_data(THCudaTensor_data(input));
thrust::transform(input_data, input_data+size, output_data, sqrtupdateOutput_functor(bias));

THCudaTensor_free(input);
return 1;
}

struct sqrtupdateGradInput_functor
{
const double bias;

sqrtupdateGradInput_functor(double bias_) : bias(bias_) {}

__host__ __device__ float operator()(const float& output, const float& gradOutput) const
{
return 0.5 * gradOutput / output;
}
};

static int cunn_Sqrt_updateGradInput(lua_State *L)
{
double bias = luaT_getfieldchecknumber(L,1,"eps");
THCudaTensor *output = (THCudaTensor*)luaT_getfieldcheckudata(L, 1, "output", torch_CudaTensor_id);
THCudaTensor *gradOutput = (THCudaTensor*)luaT_checkudata(L, 3, torch_CudaTensor_id);
THCudaTensor *gradInput = (THCudaTensor*)luaT_getfieldcheckudata(L, 1, "gradInput", torch_CudaTensor_id);
long size = THCudaTensor_nElement(output);

gradOutput = THCudaTensor_newContiguous(gradOutput);
THCudaTensor_resizeAs(gradInput, output);

thrust::device_ptr<float> output_data(THCudaTensor_data(output));
thrust::device_ptr<float> gradOutput_data(THCudaTensor_data(gradOutput));
thrust::device_ptr<float> gradInput_data(THCudaTensor_data(gradInput));
thrust::transform(output_data, output_data+size, gradOutput_data, gradInput_data, sqrtupdateGradInput_functor(bias));

THCudaTensor_free(gradOutput);
return 1;
}

static const struct luaL_Reg cunn_Sqrt__ [] = {
{"Sqrt_updateOutput", cunn_Sqrt_updateOutput},
{"Sqrt_updateGradInput", cunn_Sqrt_updateGradInput},
{NULL, NULL}
};

static void cunn_Sqrt_init(lua_State *L)
{
luaT_pushmetaclass(L, torch_CudaTensor_id);
luaT_registeratname(L, cunn_Sqrt__, "nn");
lua_pop(L,1);
}
65 changes: 65 additions & 0 deletions extra/cuda/pkg/cunn/Square.cu
@@ -0,0 +1,65 @@
struct squareupdateOutput_functor
{
__host__ __device__ float operator()(const float& input) const
{
return input*input;
}
};

static int cunn_Square_updateOutput(lua_State *L)
{
THCudaTensor *input = (THCudaTensor*)luaT_checkudata(L, 2, torch_CudaTensor_id);
THCudaTensor *output = (THCudaTensor*)luaT_getfieldcheckudata(L, 1, "output", torch_CudaTensor_id);
long size = THCudaTensor_nElement(input);

input = THCudaTensor_newContiguous(input);

THCudaTensor_resizeAs(output, input);

thrust::device_ptr<float> output_data(THCudaTensor_data(output));
thrust::device_ptr<float> input_data(THCudaTensor_data(input));
thrust::transform(input_data, input_data+size, output_data, squareupdateOutput_functor());

THCudaTensor_free(input);
return 1;
}

struct squareupdateGradInput_functor
{
__host__ __device__ float operator()(const float& input, const float& gradOutput) const
{
return 2.0 * gradOutput * input;
}
};

static int cunn_Square_updateGradInput(lua_State *L)
{
THCudaTensor *input = (THCudaTensor*)luaT_checkudata(L, 3, torch_CudaTensor_id);
THCudaTensor *gradOutput = (THCudaTensor*)luaT_checkudata(L, 3, torch_CudaTensor_id);
THCudaTensor *gradInput = (THCudaTensor*)luaT_getfieldcheckudata(L, 1, "gradInput", torch_CudaTensor_id);
long size = THCudaTensor_nElement(input);

gradOutput = THCudaTensor_newContiguous(gradOutput);
THCudaTensor_resizeAs(gradInput, input);

thrust::device_ptr<float> input_data(THCudaTensor_data(input));
thrust::device_ptr<float> gradOutput_data(THCudaTensor_data(gradOutput));
thrust::device_ptr<float> gradInput_data(THCudaTensor_data(gradInput));
thrust::transform(input_data, input_data+size, gradOutput_data, gradInput_data, squareupdateGradInput_functor());

THCudaTensor_free(gradOutput);
return 1;
}

static const struct luaL_Reg cunn_Square__ [] = {
{"Square_updateOutput", cunn_Square_updateOutput},
{"Square_updateGradInput", cunn_Square_updateGradInput},
{NULL, NULL}
};

static void cunn_Square_init(lua_State *L)
{
luaT_pushmetaclass(L, torch_CudaTensor_id);
luaT_registeratname(L, cunn_Square__, "nn");
lua_pop(L,1);
}
6 changes: 6 additions & 0 deletions extra/cuda/pkg/cunn/init.cu
Expand Up @@ -14,7 +14,10 @@ const void *torch_CudaTensor_id = NULL;
#include "LogSoftMax.cu"
#include "TemporalConvolution.cu"
#include "SpatialConvolution.cu"
#include "SpatialConvolutionMap.cu"
#include "SpatialSubSampling.cu"
#include "Square.cu"
#include "Sqrt.cu"
#include "MultiMarginCriterion.cu"
#include "MSECriterion.cu"

Expand All @@ -29,8 +32,11 @@ DLL_EXPORT TH_API int luaopen_libcunn(lua_State *L)
cunn_LogSoftMax_init(L);
cunn_TemporalConvolution_init(L);
cunn_SpatialConvolution_init(L);
cunn_SpatialConvolutionMap_init(L);
cunn_SpatialSubSampling_init(L);
cunn_MultiMarginCriterion_init(L);
cunn_Square_init(L);
cunn_Sqrt_init(L);
cunn_MSECriterion_init(L);

return 1;
Expand Down
44 changes: 44 additions & 0 deletions extra/cuda/pkg/cunn/test/test.lua
Expand Up @@ -426,6 +426,50 @@ function cunntest.SpatialSubSampling_backward_batch()
mytester:assertlt(berror:abs():max(), precision_backward, 'error on bias (backward) ')
end

function cunntest.SpatialConvolutionMap_forward()
local from = math.random(1,64)
local to = math.random(1,64)
local ki = math.random(3,15)
local kj = math.random(3,15)
local si = math.random(1,2)
local sj = math.random(1,2)
local outi = math.random(1,256)
local outj = math.random(1,256)
local ini = (outi-1)*si+ki
local inj = (outj-1)*sj+kj
local fanin = math.random(1,from)

local tm = {}
local title = string.format('SpatialConvolutionMap.forward %dx%dx%d o %dx%d -> %dx%dx%d [s: %dx%d]',
from, inj, ini, kj, ki, to, outj, outi, sj, si)
times[title] = tm
local input = torch.randn(from,inj,ini)
local sconv = nn.SpatialConvolutionMap(nn.tables.random(from,to,fanin),ki,kj,si,sj)
groundtruth = sconv:forward(input)
local a = torch.Timer()
for i = 1,nloop do
groundtruth = sconv:forward(input)
end
tm.cpu = a:time().real
input = input:cuda()
local gconv = nn.SpatialConvolutionMap(nn.tables.random(from,to,fanin),ki,kj,si,sj):cuda()
gconv.weight = sconv.weight:cuda()
gconv.bias = sconv.bias:cuda()
gconv.connTableRev=sconv.connTableRev:cuda()
gconv.connTable=sconv.connTable:cuda()
rescuda = gconv:forward(input)
a:reset()
for i = 1,nloop do
rescuda = gconv:forward(input)
end
tm.gpu = a:time().real
print(title)
local error = rescuda:float() - groundtruth
print('calculated error')
mytester:assertlt(error:abs():max(), precision_forward, 'error on state (forward) ')
end


function cunntest.mse()
local size = math.random(3000,5000)
local input = torch.randn(size,1,1)
Expand Down
33 changes: 32 additions & 1 deletion extra/nn/SpatialConvolutionMap.lua
Expand Up @@ -54,6 +54,37 @@ function nn.tables.random(nin, nout, nto)
return tbl
end

function constructTableRev(conMatrix)
local conMatrixL = conMatrix:type('torch.LongTensor')
-- Construct reverse lookup connection table
local thickness = conMatrixL:select(2,2):max()
-- approximate fanin check
if (#conMatrixL)[1] % thickness == 0 then
-- do a proper fanin check and set revTable
local fanin = (#conMatrixL)[1] / thickness
local revTable = torch.Tensor(thickness, fanin, 2)
for ii=1,thickness do
local tempf = fanin
for jj=1,(#conMatrixL)[1] do
if conMatrixL[jj][2] == ii then
if tempf <= 0 then break end
revTable[ii][tempf][1] = conMatrixL[jj][1]
revTable[ii][tempf][2] = jj
tempf = tempf - 1
end
end
if tempf ~= 0 then
fanin = -1
break
end
end
if fanin ~= -1 then
return revTable
end
end
return {}
end

function SpatialConvolutionMap:__init(conMatrix, kW, kH, dW, dH)
parent.__init(self)

Expand All @@ -65,9 +96,9 @@ function SpatialConvolutionMap:__init(conMatrix, kW, kH, dW, dH)
self.dW = dW
self.dH = dH
self.connTable = conMatrix
self.connTableRev = constructTableRev(conMatrix)
self.nInputPlane = self.connTable:select(2,1):max()
self.nOutputPlane = self.connTable:select(2,2):max()

self.weight = torch.Tensor(self.connTable:size(1), kH, kW)
self.bias = torch.Tensor(self.nOutputPlane)
self.gradWeight = torch.Tensor(self.connTable:size(1), kH, kW)
Expand Down

0 comments on commit 9143bb0

Please sign in to comment.