Skip to content

Commit

Permalink
lyra2 +3.5-4% on the 750ti
Browse files Browse the repository at this point in the history
  • Loading branch information
runestensland committed Jul 7, 2015
1 parent 21e20a9 commit 384d4cc
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions lyra2/cuda_lyra2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ __device__ __forceinline__ void reduceDuplex(vectype state[4], uint32_t thread)
uint32_t ps1 = (256 * thread);
uint32_t ps2 = (memshift * 7 + memshift * 8 + 256 * thread);

#pragma unroll 4
//#pragma unroll 4
for (int i = 0; i < 8; i++)
{
uint32_t s1 = ps1 + i*memshift;
Expand Down Expand Up @@ -136,7 +136,7 @@ __device__ __forceinline__ void reduceDuplexRowSetupV2(const int rowIn, const in
uint32_t ps3 = (memshift*7 + memshift * 8 * rowOut + 256 * thread);


#pragma unroll 1
//#pragma unroll 1
for (int i = 0; i < 8; i++)
{
uint32_t s1 = ps1 + i*memshift;
Expand Down Expand Up @@ -235,7 +235,7 @@ __device__ __forceinline__ void reduceDuplexRowtV2(const int rowIn, const int ro
uint32_t ps2 = (memshift * 8 * rowInOut + 256 * thread);
uint32_t ps3 = (memshift * 8 * rowOut + 256 * thread);

#pragma unroll 1
//#pragma unroll 1
for (int i = 0; i < 8; i++)
{
uint32_t s1 = ps1 + i*memshift;
Expand Down Expand Up @@ -357,7 +357,7 @@ __device__ __forceinline__ void reduceDuplexRowtV3(const int rowIn, const int ro
#if __CUDA_ARCH__ < 500
__global__ __launch_bounds__(48, 1)
#elif __CUDA_ARCH__ == 500
__global__ __launch_bounds__(16, 1)
__global__ __launch_bounds__(16,1)
#else
__global__ __launch_bounds__(TPB, 1)
#endif
Expand Down Expand Up @@ -589,7 +589,7 @@ uint32_t tpb;
if (device_sm[device_map[thr_id]]<500)
tpb = 48;
else if (device_sm[device_map[thr_id]]==500)
tpb = 16;
tpb = 8;
else
tpb = TPB;
dim3 grid((threads + tpb - 1) / tpb);
Expand Down

0 comments on commit 384d4cc

Please sign in to comment.