Skip to content

Commit

Permalink
skein: merge the double implementations in one
Browse files Browse the repository at this point in the history
based on alexis skein kernels, tested ok on SM 2.1 and 3.0

code is a bit hard to read but... well... users dont care :p
  • Loading branch information
tpruvot committed Jan 28, 2017
1 parent 16ac9b6 commit feb99d0
Show file tree
Hide file tree
Showing 3 changed files with 761 additions and 3,276 deletions.
34 changes: 34 additions & 0 deletions cuda_vectors.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#include "cuda_helper.h"

/* Macros for uint2 operations (used by skein) */

__device__ __forceinline__
uint2 ROR8(const uint2 a) {
uint2 result;
result.x = __byte_perm(a.x, a.y, 0x4321);
result.y = __byte_perm(a.y, a.x, 0x4321);
return result;
}

__device__ __forceinline__
uint2 ROL24(const uint2 a) {
uint2 result;
result.x = __byte_perm(a.x, a.y, 0x0765);
result.y = __byte_perm(a.y, a.x, 0x0765);
return result;
}

static __device__ __forceinline__ uint2 operator+ (const uint2 a, const uint32_t b)
{
#if 0 && defined(__CUDA_ARCH__) && CUDA_VERSION < 7000
uint2 result;
asm(
"add.cc.u32 %0,%2,%4; \n\t"
"addc.u32 %1,%3,%5; \n\t"
: "=r"(result.x), "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(b), "r"(0));
return result;
#else
return vectorize(devectorize(a) + b);
#endif
}

0 comments on commit feb99d0

Please sign in to comment.