From 005f1c2c6c12b117190483a694936895b2af092e Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Tue, 31 Oct 2023 11:25:37 +0100 Subject: [PATCH 1/3] Fix error when building for newer GPU architectures --- lib/CUDAKernels/kernels.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/CUDAKernels/kernels.cu b/lib/CUDAKernels/kernels.cu index ebb271c..66822d7 100644 --- a/lib/CUDAKernels/kernels.cu +++ b/lib/CUDAKernels/kernels.cu @@ -206,7 +206,7 @@ __device__ __forceinline__ double RSQRT(double val) { return rsqrt(val); } // template<> __device__ __forceinline__ double RSQRT(double val) { return 1.0/sqrt(val); } - +#if __CUDA_ARCH__ < 600 __device__ double atomicAdd(double* address, double val) { unsigned long long int* address_as_ull = @@ -220,6 +220,7 @@ __device__ double atomicAdd(double* address, double val) } while (assumed != old); return __longlong_as_double(old); } +#endif __device__ __forceinline__ double atomicMin(double *address, double val) From 0263a6d25e228c5193a0ac7810e6cbaae20f705c Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Mon, 30 Oct 2023 11:59:33 +0100 Subject: [PATCH 2/3] Fix build for newer CUDA --- lib/Makefile | 2 +- lib/include/cudadev.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/Makefile b/lib/Makefile index f70fd40..b36cc64 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -41,7 +41,7 @@ NVCCVERSION=$(shell "${NVCC}" --version | grep ^Cuda | sed 's/^.* //g') ifeq "${NVCCVERSION}" "V5.5.22" NVCCFLAGS ?= -arch sm_20 else - NVCCFLAGS ?= -arch sm_30 + NVCCFLAGS ?= -arch sm_50 endif #NVCCFLAGS = -arch sm_35 diff --git a/lib/include/cudadev.h b/lib/include/cudadev.h index b2af740..042c13c 100644 --- a/lib/include/cudadev.h +++ b/lib/include/cudadev.h @@ -710,12 +710,14 @@ namespace dev { // jitOptionCount++; // } - + +#if CUDA_VERSION < 6000 if(computeMode < CU_TARGET_COMPUTE_20) { fprintf(stderr,"Sapporo2 requires at least a Fermi or newer NVIDIA architecture.\n"); exit(-1); } +#endif //Set the architecture // { From 2cc55c3829115cdf86f766b47a778f847ea6969f Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Tue, 31 Oct 2023 14:13:17 +0100 Subject: [PATCH 3/3] Fix errors in OpenCL compilation --- lib/include/defines.h | 9 ++++++++- lib/include/ocldev.h | 4 ++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/include/defines.h b/lib/include/defines.h index 16e01f7..e6f3216 100644 --- a/lib/include/defines.h +++ b/lib/include/defines.h @@ -55,8 +55,15 @@ inline const char* get_kernelName(const int integrator, case SIXTH: if(precision == DOUBLESINGLE) { +#ifdef _OCL_ + fprintf(stderr, "ERROR: Sixth order integrator with double single precision"); + fprintf(stderr, "ERROR: is not implemented in OpenCL, only in CUDA. Please"); + fprintf(stderr, "ERROR: file an issue on GitHub if you need this combination."); + exit(1); +#else perThreadSM = sizeof(float4)*2 + sizeof(float4) + sizeof(float3); - return "dev_evaluate_gravity_sixth_DS"; +#endif + return "dev_evaluate_gravity_sixth_DS"; } else if(precision == DOUBLE){ #ifdef _OCL_ diff --git a/lib/include/ocldev.h b/lib/include/ocldev.h index 453e67b..e621348 100644 --- a/lib/include/ocldev.h +++ b/lib/include/ocldev.h @@ -574,8 +574,8 @@ namespace dev { void copy(const memory &src, const cl_bool OCL_BLOCKING = CL_TRUE) { assert(ContextFlag); if (n != src.n) { - ocl_free(); - cmalloc(src.n, DeviceMemFlags); + ocl_free(); + allocate(src.n, DeviceMemFlags); } oclSafeCall(clEnqueueCopyBuffer(CommandQueue, src.DeviceMem,