Skip to content

Commit

Permalink
Added runtime selection of gravity kernel based on arch, so that it w…
Browse files Browse the repository at this point in the history
…orks on Fermi again
  • Loading branch information
harrism committed Apr 26, 2012
1 parent 652e391 commit 94dab38
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 42 deletions.
31 changes: 19 additions & 12 deletions runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,28 +75,28 @@ set (HFILES
set (CUFILES
CUDAkernels/build_tree.cu
CUDAkernels/compute_propertiesD.cu
CUDAkernels/dev_approximate_gravity.cu
CUDAkernels/dev_approximate_gravity_let.cu
CUDAkernels/parallel.cu
CUDAkernels/sortKernels.cu
CUDAkernels/timestep.cu
CUDAkernels/dev_approximate_gravity_fermi.cu
)

if (COMPILE_SM30)
set (CUFILES
${CUFILES}
CUDAkernels/dev_approximate_gravity.cu
)
endif (COMPILE_SM30)

set (CUHFILES
CUDAkernels/support_kernels.cu
CUDAkernels/scanKernels.cu
)

source_group("CUDA Source Files" FILES ${CUFILES})
source_group("CUDA Include Files" FILES ${CUHFILES})
source_group("CUDA Source Files" FILES ${CUFILES} ${CUHFILES})

set (GENCODE)
if (COMPILE_SM20)
set(GENCODE -gencode=arch=compute_20,code=sm_20 -gencode=arch=compute_20,code=compute_20)
endif(COMPILE_SM20)
if (COMPILE_SM30)
set(GENCODE ${GENCODE} -gencode=arch=compute_30,code=sm_30 -gencode=arch=compute_30,code=compute_30)
endif(COMPILE_SM30)
set(GENCODE_SM20 -gencode=arch=compute_20,code=sm_20 -gencode=arch=compute_20,code=compute_20)
set(GENCODE_SM30 -gencode=arch=compute_30,code=sm_30 -gencode=arch=compute_30,code=compute_30)

if (CUDA_VERBOSE_PTXAS)
set(VERBOSE_PTXAS --ptxas-options=-v)
Expand Down Expand Up @@ -146,11 +146,18 @@ if (USE_OPENGL)

endif (USE_OPENGL)

set(GENCODE)
if (COMPILE_SM20)
set(GENCODE ${GENCODE} ${GENCODE_SM20})
endif(COMPILE_SM20)
if (COMPILE_SM30)
set(GENCODE ${GENCODE} ${GENCODE_SM_30})
endif(COMPILE_SM30)

cuda_add_executable(bonsai2
${CCFILES}
${HFILES}
${CUFILES}
${CUFILES}
OPTIONS ${GENCODE} ${VERBOSE_PTXAS} ${DEVICE_DEBUGGING}
)

Expand Down
9 changes: 6 additions & 3 deletions runtime/CUDAkernels/dev_approximate_gravity.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
// #include "support_kernels.cu"
// MJH: I put the whole file in this because I couldn't get CMake to treat .cu files differently
#if __CUDA_ARCH__ >= 300 // This file now contains kepler-only code

// #include "support_kernels.cu"
#include <stdio.h>

#include "node_specs.h"
Expand Down Expand Up @@ -1013,7 +1016,7 @@ __device__ float4 approximate_gravity(int DIM2x, int DIM2y,

extern "C" __global__ void
__launch_bounds__(NTHREAD)
dev_approximate_gravity(const int n_active_groups,
dev_approximate_gravity_kepler(const int n_active_groups,
int n_bodies,
float eps2,
uint2 node_begend,
Expand Down Expand Up @@ -1202,4 +1205,4 @@ __launch_bounds__(NTHREAD)
} //end while
}


#endif // __CUDA_ARCH__ >= 300
6 changes: 5 additions & 1 deletion runtime/CUDAkernels/dev_approximate_gravity_fermi.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
// #include "support_kernels.cu"
// MJH: I put the whole file in this because I couldn't get CMake to treat .cu files differently
#if __CUDA_ARCH__ < 300 // This file contains fermi-only code

// #include "support_kernels.cu"
#include <stdio.h>

#include "node_specs.h"
Expand Down Expand Up @@ -1055,3 +1058,4 @@ __launch_bounds__(NTHREAD)
}


#endif // __CUDA_ARCH__ < 300
55 changes: 30 additions & 25 deletions runtime/include/my_cuda_rt.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ namespace my_dev {
int ccMinor;
int defaultComputeMode;




public:
Expand All @@ -137,6 +138,8 @@ namespace my_dev {
{
}
}

int getComputeCapability() const { return 100 * ccMajor + 10 * ccMinor; }


int create(std::ofstream &log, bool disableTiming = false)
Expand Down Expand Up @@ -190,41 +193,43 @@ namespace my_dev {
int res = cudaSetDevice((int)dev);
if(res != cudaSuccess)
{
printf("failed (error #: %d), now trying all devices starting at 0 \n", res);

for(int i=0; i < ciDeviceCount; i++)
{
printf("Trying device: %d ...", i);
if(cudaSetDevice(i) != cudaSuccess)
{
printf("failed!\n");
if(i+1 == ciDeviceCount)
{
printf("All devices failed, exit! \n");
exit(0);
}
}
else
{
printf("success! \n");
this->dev = i;
break;
}
}
printf("failed (error #: %d), now trying all devices starting at 0 \n", res);

for(int i=0; i < ciDeviceCount; i++)
{
printf("Trying device: %d ...", i);
if(cudaSetDevice(i) != cudaSuccess)
{
printf("failed!\n");
if(i+1 == ciDeviceCount)
{
printf("All devices failed, exit! \n");
exit(0);
}
}
else
{
printf("success! \n");
this->dev = i;
break;
}
}
}
else
{
printf("success!\n");
printf("success!\n");
}

cudaDeviceProp deviceProp;
CU_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, (int)dev));
//Get the number of multiprocessors of the device
multiProcessorCount = deviceProp.multiProcessorCount;

ccMajor = deviceProp.major;
ccMinor = deviceProp.minor;

hContext_flag = true;
}


void startTiming(cudaStream_t stream=0)
{
Expand Down
7 changes: 6 additions & 1 deletion runtime/src/load_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,12 @@ void octree::load_kernels() {
getTNext.create("get_Tnext");
predictParticles.create("predict_particles");
getNActive.create("get_nactive");
approxGrav.create("dev_approximate_gravity");

if (devContext.getComputeCapability() >= 300)
approxGrav.create("dev_approximate_gravity_kepler");
else
approxGrav.create("dev_approximate_gravity");

correctParticles.create("correct_particles");
computeDt.create("compute_dt");
setActiveGrps.create("setActiveGroups");
Expand Down

0 comments on commit 94dab38

Please sign in to comment.