Skip to content

Commit

Permalink
Add new CPU and GPU kernels - CHI^2,
Browse files Browse the repository at this point in the history
  • Loading branch information
ksopyla committed Nov 28, 2012
1 parent f344306 commit 4252029
Show file tree
Hide file tree
Showing 14 changed files with 739 additions and 74 deletions.
163 changes: 162 additions & 1 deletion KMLib.CUDA/CUDAmodules/cudaSVMKernels.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@

/*
author: Krzysztof Sopyła
*/

//texture for vector, which is used for matrix vector multiplication
//in SVM, when we have to compute many dot products (one vector with others)
texture<float,1,cudaReadModeElementType> mainVectorTexRef;
Expand All @@ -19,6 +24,8 @@ texture<float,1,cudaReadModeElementType> svTexRef;

#define VECDIM 597

#define maxNNZ 100

/******************************************************************
*
* Cuda Kernels for SVM kernels
Expand Down Expand Up @@ -570,6 +577,160 @@ extern "C" __global__ void rbfEllpackFormatKernel(const float * vals,
}


//cuda kernel funtion for computing SVM Chi-Square kernel,
// K(x,y)= 1 - Sum( (xi-yi)^2/(xi+yi))
//
//uses Ellpack-R fromat for storing sparse matrix, labels are in texture cache
//
//Params:
//vals - array of vectors values
//colIdx - array of column indexes in ellpack-r fromat
//rowLength -array, contains number of nonzero elements in each row
//selfDot - array of precomputed self linear product
//results - array of results Linear Kernel
//num_rows -number of vectors
//mainVecIndex - main vector index, needed for retriving its label
extern "C" __global__ void chiSquaredEllpackKernel(const float * vals,
const int * colIdx,
const int * rowLength,
float * results,
const int numRows,
const int mainVecIndex)
{


__shared__ int shMainVecIdx;
__shared__ float shLabel;
__shared__ int shMainCols[maxNNZ];
__shared__ float shMainVals[maxNNZ];
__shared__ int shMainNNZ;

if(threadIdx.x==0)
{
shMainVecIdx=mainVecIndex;
shLabel = tex1Dfetch(labelsTexRef,shMainVecIdx);
shMainNNZ= rowLength[mainVecIndex];
}

__syncthreads();

for(int s=threadIdx.x;s<shMainNNZ;s+=blockDim.x){
shMainCols[s] = colIdx[numRows*s+shMainVecIdx];
shMainVals[s]=tex1Dfetch(mainVectorTexRef,shMainCols[s]);
}

__syncthreads();

const int row = blockDim.x * blockIdx.x + threadIdx.x; // global thread index
const int num_rows =numRows;
if(row<num_rows)
{
int maxEl = rowLength[row];
int labelProd = tex1Dfetch(labelsTexRef,row)*shLabel;
float chi=0;

int col1=-1;
float val1=0;
float val2=0;
int i=0;
int k=0;
int prevCol=shMainCols[k];

for(i=0; i<maxEl;i++)
{
col1=colIdx[num_rows*i+row];
val1= vals[num_rows*i+row];
val2 = tex1Dfetch(mainVectorTexRef,col1);

chi+= (val1-val2)*(val1-val2)/(val1+val2);

//vector in Ellpack format might miss some previous columns which are non zero in dens vector
//we want to "catch up with main dense vector"

while(k<shMainNNZ && prevCol<col1) //prevCol=cols[numRows*k+shMainVecIdx];
{
//it is sufficient to add only value of dense vector,
//because sparse vector values in this position is zero
chi+=tex1Dfetch(mainVectorTexRef,prevCol);
k++;
if(k<shMainNNZ)
prevCol=shMainCols[k];

}
if(prevCol==col1){
k++;//increase k, to move to first grather than col1 index
if(k<shMainNNZ)
prevCol=shMainCols[k];
}
}

//add those values which left (were not added before)
while(k<shMainNNZ){
chi+=shMainVals[k++];

//prevCol=cols[numRows*k+shMainVecIdx];
//chi+=tex1Dfetch(mainVectorTexRef,prevCol);
}
results[row]=labelProd*(1-2*chi);
}

}

//cuda kernel funtion for computing SVM Chi-Square kernel in its normalized version,
// K(x,y)= Sum( (xi*yi)/(xi+yi))
// uses Ellpack-R fromat for storing sparse matrix, labels are in texture cache
//Params:
//vals - array of vectors values
//colIdx - array of column indexes in ellpack-r fromat
//rowLength -array, contains number of nonzero elements in each row
//selfDot - array of precomputed self linear product
//results - array of results Linear Kernel
//num_rows -number of vectors
//mainVecIndex - main vector index, needed for retriving its label
extern "C" __global__ void chiSquaredNormEllpackKernel(const float * vals,
const int * colIdx,
const int * rowLength,
float * results,
const int numRows,
const int mainVecIndex)
{

__shared__ float shLabel;

if(threadIdx.x==0)
{
shLabel = tex1Dfetch(labelsTexRef,mainVecIndex);
}

__syncthreads();

const int row = blockDim.x * blockIdx.x + threadIdx.x; // global thread index
const int num_rows =numRows;
if(row<num_rows)
{
int maxEl = rowLength[row];
int labelProd = tex1Dfetch(labelsTexRef,row)*shLabel;
float chi=0;

int col1=-1;
float val1=0;
float val2=0;
int i=0;

for(i=0; i<maxEl;i++)
{
col1=colIdx[num_rows*i+row];
val1= vals[num_rows*i+row];
val2 = tex1Dfetch(mainVectorTexRef,col1);

chi+= (val1*val2)/(val1+val2);

}
results[row]=labelProd*chi;
}

}


/*******************************************************************************************/
/*
Expand Down
7 changes: 6 additions & 1 deletion KMLib.CUDA/CUDAmodules/rbfSlicedEllpackKernel.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@

/*
author: Krzysztof Sopyła
mail: krzysztofsopyla@gmail.com
Licence: contact with author
web page: http://wmii.uwm.edu.pl/wydzial/kadra/get/143
*/

texture<float,1,cudaReadModeElementType> mainVecTexRef;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@ namespace KMLib.GPU

/// <summary>
/// Represents linear evaluation class which use CUDA,
/// data in CSR format
/// </summary>
public class CudaLinearEvaluator : CudaVectorEvaluator, IDisposable
public class CudaLinearCSREvaluator : CudaVectorEvaluator, IDisposable
{


public CudaLinearEvaluator()
public CudaLinearCSREvaluator()
{
cudaEvaluatorKernelName = "linearCSREvaluatorDenseVector";
}
Expand Down Expand Up @@ -149,10 +150,6 @@ public override float[] Predict(SparseVec[] elements)
//gridDimX is valid for this function
cuda.LaunchAsync(cuFuncSign, gridDimX, 1, stream);





//wait for all computation
cuda.SynchronizeContext();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@ namespace KMLib.GPU
{

/// <summary>
/// Represents RBF evaluator, its used for prediction unseen elements. For prediction use CUDA kernel.
/// Represents RBF evaluator, its used for prediction unseen elements. For prediction use CUDA, data is in CSR fromat.
/// </summary>
public class CudaRBFEvaluator : CudaVectorEvaluator, IDisposable
public class CudaRBFCSREvaluator : CudaVectorEvaluator, IDisposable
{


CUdeviceptr elSelf;
CUdeviceptr svSelf;

float gamma;
public CudaRBFEvaluator(float gamma)
public CudaRBFCSREvaluator(float gamma)
{
this.gamma = gamma;
cudaEvaluatorKernelName = "rbfCSREvaluatorDenseVector";
Expand Down
3 changes: 1 addition & 2 deletions KMLib.CUDA/GPUKernels/CUDAVectorKernel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,7 @@ protected void InitCudaModule()
throw new ArgumentException("Failed access to cuda module" + modluePath);

cuModule = cuda.LoadModule(modluePath);
cuFunc = cuda.GetModuleFunction(cudaProductKernelName);
//var cuModule2 = cuda.LoadModule(Path.Combine(Environment.CurrentDirectory, "rbfSlicedEllpackKernel.cubin"));
cuFunc = cuda.GetModuleFunction(cudaProductKernelName);
}


Expand Down

0 comments on commit 4252029

Please sign in to comment.