Permalink
Browse files

Added runtime selection of gravity kernel based on arch, so that it w…

…orks on Fermi again
  • Loading branch information...
1 parent 652e391 commit 94dab38784f4e67014fbbdffef5529ddc105f548 @harrism harrism committed Apr 26, 2012
@@ -75,28 +75,28 @@ set (HFILES
set (CUFILES
CUDAkernels/build_tree.cu
CUDAkernels/compute_propertiesD.cu
- CUDAkernels/dev_approximate_gravity.cu
- CUDAkernels/dev_approximate_gravity_let.cu
CUDAkernels/parallel.cu
CUDAkernels/sortKernels.cu
CUDAkernels/timestep.cu
+ CUDAkernels/dev_approximate_gravity_fermi.cu
)
+ if (COMPILE_SM30)
+ set (CUFILES
+ ${CUFILES}
+ CUDAkernels/dev_approximate_gravity.cu
+ )
+endif (COMPILE_SM30)
+
set (CUHFILES
CUDAkernels/support_kernels.cu
CUDAkernels/scanKernels.cu
)
-source_group("CUDA Source Files" FILES ${CUFILES})
-source_group("CUDA Include Files" FILES ${CUHFILES})
+source_group("CUDA Source Files" FILES ${CUFILES} ${CUHFILES})
-set (GENCODE)
-if (COMPILE_SM20)
- set(GENCODE -gencode=arch=compute_20,code=sm_20 -gencode=arch=compute_20,code=compute_20)
-endif(COMPILE_SM20)
-if (COMPILE_SM30)
- set(GENCODE ${GENCODE} -gencode=arch=compute_30,code=sm_30 -gencode=arch=compute_30,code=compute_30)
-endif(COMPILE_SM30)
+set(GENCODE_SM20 -gencode=arch=compute_20,code=sm_20 -gencode=arch=compute_20,code=compute_20)
+set(GENCODE_SM30 -gencode=arch=compute_30,code=sm_30 -gencode=arch=compute_30,code=compute_30)
if (CUDA_VERBOSE_PTXAS)
set(VERBOSE_PTXAS --ptxas-options=-v)
@@ -146,11 +146,18 @@ if (USE_OPENGL)
endif (USE_OPENGL)
+set(GENCODE)
+if (COMPILE_SM20)
+ set(GENCODE ${GENCODE} ${GENCODE_SM20})
+endif(COMPILE_SM20)
+if (COMPILE_SM30)
+ set(GENCODE ${GENCODE} ${GENCODE_SM_30})
+endif(COMPILE_SM30)
cuda_add_executable(bonsai2
${CCFILES}
${HFILES}
- ${CUFILES}
+ ${CUFILES}
OPTIONS ${GENCODE} ${VERBOSE_PTXAS} ${DEVICE_DEBUGGING}
)
@@ -1,4 +1,7 @@
- // #include "support_kernels.cu"
+// MJH: I put the whole file in this because I couldn't get CMake to treat .cu files differently
+#if __CUDA_ARCH__ >= 300 // This file now contains kepler-only code
+
+// #include "support_kernels.cu"
#include <stdio.h>
#include "node_specs.h"
@@ -1013,7 +1016,7 @@ __device__ float4 approximate_gravity(int DIM2x, int DIM2y,
extern "C" __global__ void
__launch_bounds__(NTHREAD)
- dev_approximate_gravity(const int n_active_groups,
+ dev_approximate_gravity_kepler(const int n_active_groups,
int n_bodies,
float eps2,
uint2 node_begend,
@@ -1202,4 +1205,4 @@ __launch_bounds__(NTHREAD)
} //end while
}
-
+#endif // __CUDA_ARCH__ >= 300
@@ -1,4 +1,7 @@
- // #include "support_kernels.cu"
+// MJH: I put the whole file in this because I couldn't get CMake to treat .cu files differently
+#if __CUDA_ARCH__ < 300 // This file contains fermi-only code
+
+// #include "support_kernels.cu"
#include <stdio.h>
#include "node_specs.h"
@@ -1055,3 +1058,4 @@ __launch_bounds__(NTHREAD)
}
+#endif // __CUDA_ARCH__ < 300
@@ -118,6 +118,7 @@ namespace my_dev {
int ccMinor;
int defaultComputeMode;
+
public:
@@ -137,6 +138,8 @@ namespace my_dev {
{
}
}
+
+ int getComputeCapability() const { return 100 * ccMajor + 10 * ccMinor; }
int create(std::ofstream &log, bool disableTiming = false)
@@ -190,41 +193,43 @@ namespace my_dev {
int res = cudaSetDevice((int)dev);
if(res != cudaSuccess)
{
- printf("failed (error #: %d), now trying all devices starting at 0 \n", res);
-
- for(int i=0; i < ciDeviceCount; i++)
- {
- printf("Trying device: %d ...", i);
- if(cudaSetDevice(i) != cudaSuccess)
- {
- printf("failed!\n");
- if(i+1 == ciDeviceCount)
- {
- printf("All devices failed, exit! \n");
- exit(0);
- }
- }
- else
- {
- printf("success! \n");
- this->dev = i;
- break;
- }
- }
+ printf("failed (error #: %d), now trying all devices starting at 0 \n", res);
+
+ for(int i=0; i < ciDeviceCount; i++)
+ {
+ printf("Trying device: %d ...", i);
+ if(cudaSetDevice(i) != cudaSuccess)
+ {
+ printf("failed!\n");
+ if(i+1 == ciDeviceCount)
+ {
+ printf("All devices failed, exit! \n");
+ exit(0);
+ }
+ }
+ else
+ {
+ printf("success! \n");
+ this->dev = i;
+ break;
+ }
+ }
}
else
{
- printf("success!\n");
+ printf("success!\n");
}
-
+
cudaDeviceProp deviceProp;
CU_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, (int)dev));
//Get the number of multiprocessors of the device
multiProcessorCount = deviceProp.multiProcessorCount;
-
+ ccMajor = deviceProp.major;
+ ccMinor = deviceProp.minor;
+
hContext_flag = true;
}
-
+
void startTiming(cudaStream_t stream=0)
{
@@ -275,7 +275,12 @@ void octree::load_kernels() {
getTNext.create("get_Tnext");
predictParticles.create("predict_particles");
getNActive.create("get_nactive");
- approxGrav.create("dev_approximate_gravity");
+
+ if (devContext.getComputeCapability() >= 300)
+ approxGrav.create("dev_approximate_gravity_kepler");
+ else
+ approxGrav.create("dev_approximate_gravity");
+
correctParticles.create("correct_particles");
computeDt.create("compute_dt");
setActiveGrps.create("setActiveGroups");

0 comments on commit 94dab38

Please sign in to comment.