You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Due to KITTI Semantic datasets' dimension is 4D(xyz+brightness),I tried to imitate a 4d version of chamfer,can you help me to inspect whether my code is correct?
#18
Open
LeopoldACC opened this issue
Sep 20, 2021
· 1 comment
Hi,@ThibaultGROUEIX Thanks for your work!My 4D version is as below by imitation.
#include<stdio.h>
#include<ATen/ATen.h>
#include<cuda.h>
#include<cuda_runtime.h>
#include<vector>__global__voidNmDistanceKernel(int b,int n,constfloat * xyz,int m,constfloat * xyz2,float * result,int * result_i){
constint batch=2048;
__shared__float buf[batch*4];
for (int i=blockIdx.x;i<b;i+=gridDim.x){
for (int k2=0;k2<m;k2+=batch){
int end_k=min(m,k2+batch)-k2;
for (int j=threadIdx.x;j<end_k*4;j+=blockDim.x){
buf[j]=xyz2[(i*m+k2)*4+j];
}
__syncthreads();
for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
float x1=xyz[(i*n+j)*4+0];
floaty1=xyz[(i*n+j)*4+1];
float r1=xyz[(i*n+j)*4+2];
float g1=xyz[(i*n+j)*4+3];
int best_i=0;
float best=0;
int end_ka=end_k-(end_k&4);
if (end_ka==batch){
for (int k=0;k<batch;k+=4){
{
float x2=buf[k*4+0]-x1;
float y2=buf[k*4+1]-y1;
float r2=buf[k*4+2]-r1;
float g2=buf[k*4+3]-g1;
float d=x2*x2+y2*y2+r2*r2+g2*g2;
if (k==0 || d<best){
best=d;
best_i=k+k2;
}
}
{
float x2=buf[k*4+4]-x1;
float y2=buf[k*4+5]-y1;
float r2=buf[k*4+6]-r1;
float g2=buf[k*4+7]-g1;
float d=x2*x2+y2*y2+r2*r2+g2*g2;
if (d<best){
best=d;
best_i=k+k2+1;
}
}
{
float x2=buf[k*4+8]-x1;
float y2=buf[k*4+9]-y1;
float r2=buf[k*4+10]-r1;
float g2=buf[k*4+11]-g1;
float d=x2*x2+y2*y2+r2*r2+g2*g2;
if (d<best){
best=d;
best_i=k+k2+2;
}
}
{
float x2=buf[k*4+12]-x1;
float y2=buf[k*4+13]-y1;
float r2=buf[k*4+14]-r1;
float g2=buf[k*4+15]-g1;
float d=x2*x2+y2*y2+r2*r2+g2*g2;
if (d<best){
best=d;
best_i=k+k2+3;
}
}
}
}else{
for (int k=0;k<end_ka;k+=4){
{
float x2=buf[k*4+0]-x1;
float y2=buf[k*4+1]-y1;
float r2=buf[k*4+2]-r1;
float g2=buf[k*4+3]-g1;
float d=x2*x2+y2*y2+r2*r2+g2*g2;
if (k==0 || d<best){
best=d;
best_i=k+k2;
}
}
{
float x2=buf[k*4+4]-x1;
float y2=buf[k*4+5]-y1;
float r2=buf[k*4+6]-r1;
float g2=buf[k*4+7]-g1;
float d=x2*x2+y2*y2+r2*r2+g2*g2;
if (d<best){
best=d;
best_i=k+k2+1;
}
}
{
float x2=buf[k*4+8]-x1;
float y2=buf[k*4+9]-y1;
float r2=buf[k*4+10]-r1;
float g2=buf[k*4+11]-g1;
float d=x2*x2+y2*y2+r2*r2+g2*g2;
if (d<best){
best=d;
best_i=k+k2+2;
}
}
{
float x2=buf[k*5+12]-x1;
float y2=buf[k*5+13]-y1;
float r2=buf[k*5+14]-r1;
float g2=buf[k*5+15]-g1;
float d=x2*x2+y2*y2+r2*r2+g2*g2;
if (d<best){
best=d;
best_i=k+k2+3;
}
}
}
}
for (int k=end_ka;k<end_k;k++){
float x2=buf[k*5+0]-x1;
float y2=buf[k*5+1]-y1;
float r2=buf[k*5+2]-r1;
float g2=buf[k*5+3]-g1;
float d=x2*x2+y2*y2+r2*r2+g2*g2;
if (k==0 || d<best){
best=d;
best_i=k+k2;
}
}
if (k2==0 || result[(i*n+j)]>best){
result[(i*n+j)]=best;
result_i[(i*n+j)]=best_i;
}
}
__syncthreads();
}
}
}
// int chamfer_cuda_forward(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i,float * result2,int * result2_i, cudaStream_t stream){intchamfer_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2){
constauto batch_size = xyz1.size(0);
constauto n = xyz1.size(1); //num_points point cloud Aconstauto m = xyz2.size(1); //num_points point cloud B
NmDistanceKernel<<<dim3(32,16,1),512>>>(batch_size, n, xyz1.data<float>(), m, xyz2.data<float>(), dist1.data<float>(), idx1.data<int>());
NmDistanceKernel<<<dim3(32,16,1),512>>>(batch_size, m, xyz2.data<float>(), n, xyz1.data<float>(), dist2.data<float>(), idx2.data<int>());
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("error in nnd updateOutput: %s\n", cudaGetErrorString(err));
//THError("aborting");return0;
}
return1;
}
__global__voidNmDistanceGradKernel(int b,int n,constfloat * xyz1,int m,constfloat * xyz2,constfloat * grad_dist1,constint * idx1,float * grad_xyz1,float * grad_xyz2){
for (int i=blockIdx.x;i<b;i+=gridDim.x){
for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
float x1=xyz1[(i*n+j)*5+0];
floaty1=xyz1[(i*n+j)*5+1];
float r1=xyz1[(i*n+j)*5+2];
float g1=xyz1[(i*n+j)*5+3];
int j2=idx1[i*n+j];
float x2=xyz2[(i*m+j2)*5+0];
float y2=xyz2[(i*m+j2)*5+1];
float r2=xyz2[(i*m+j2)*5+2];
float g2=xyz2[(i*m+j2)*5+3];
float g=grad_dist1[i*n+j]*2;
atomicAdd(&(grad_xyz1[(i*n+j)*4+0]),g*(x1-x2));
atomicAdd(&(grad_xyz1[(i*n+j)*4+1]),g*(y1-y2));
atomicAdd(&(grad_xyz1[(i*n+j)*4+2]),g*(r1-r2));
atomicAdd(&(grad_xyz1[(i*n+j)*4+3]),g*(g1-g2));
atomicAdd(&(grad_xyz2[(i*m+j2)*4+0]),-(g*(x1-x2)));
atomicAdd(&(grad_xyz2[(i*m+j2)*4+1]),-(g*(y1-y2)));
atomicAdd(&(grad_xyz2[(i*m+j2)*4+2]),-(g*(r1-r2)));
atomicAdd(&(grad_xyz2[(i*m+j2)*4+3]),-(g*(g1-g2)));
}
}
}
// int chamfer_cuda_backward(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,const float * grad_dist2,const int * idx2,float * grad_xyz1,float * grad_xyz2, cudaStream_t stream){intchamfer_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2){
// cudaMemset(grad_xyz1,0,b*n*3*4);// cudaMemset(grad_xyz2,0,b*m*3*4);constauto batch_size = xyz1.size(0);
constauto n = xyz1.size(1); //num_points point cloud Aconstauto m = xyz2.size(1); //num_points point cloud B
NmDistanceGradKernel<<<dim3(1,16,1),256>>>(batch_size,n,xyz1.data<float>(),m,xyz2.data<float>(),graddist1.data<float>(),idx1.data<int>(),gradxyz1.data<float>(),gradxyz2.data<float>());
NmDistanceGradKernel<<<dim3(1,16,1),256>>>(batch_size,m,xyz2.data<float>(),n,xyz1.data<float>(),graddist2.data<float>(),idx2.data<int>(),gradxyz2.data<float>(),gradxyz1.data<float>());
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("error in nnd get grad: %s\n", cudaGetErrorString(err));
//THError("aborting");return0;
}
return1;
}
Another question is there any good tutorial or book to learn how to write cuda op?Thanks a lot!
The text was updated successfully, but these errors were encountered:
LeopoldACC
changed the title
Hi,@ ThibaultGROUEIX Thanks for your work!Due to KITTI Semantic datasets' dimension is 4D(xyz+brightness),I tried to imitate a 4d version of chamfer,can you help me to inspect whether my code is correct?
Due to KITTI Semantic datasets' dimension is 4D(xyz+brightness),I tried to imitate a 4d version of chamfer,can you help me to inspect whether my code is correct?
Sep 20, 2021
Looks good ! Sorry I don't know of any good teaching ressource. You could also have used Chamfer5D or 6D and set a few dimensions to 0 in your tensors.
Best regards,
Hi,@ThibaultGROUEIX Thanks for your work!My 4D version is as below by imitation.
Another question is there any good tutorial or book to learn how to write cuda op?Thanks a lot!
The text was updated successfully, but these errors were encountered: