Skip to content

Commit

Permalink
Faster AES part one.
Browse files Browse the repository at this point in the history
Pointer hacking. More is comming.
  • Loading branch information
runestensland committed May 20, 2015
1 parent d8c380e commit 6313154
Showing 1 changed file with 20 additions and 7 deletions.
27 changes: 20 additions & 7 deletions x11/cuda_x11_aes.cu
Original file line number Diff line number Diff line change
Expand Up @@ -84,31 +84,42 @@ uint32_t bfe(uint32_t x, uint32_t bit, uint32_t numBits) {
asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(x), "r"(bit), "r"(numBits));
return ret;
}

__device__ __forceinline__
uint32_t bfi(uint32_t x, uint32_t a, uint32_t bit, uint32_t numBits) {
uint32_t ret;
asm("bfi.b32 %0, %1, %2, %3,%4;" : "=r"(ret) : "r"(x), "r"(a), "r"(bit), "r"(numBits));
return ret;
}

__device__ __forceinline__
static void aes_round(
const uint32_t *const __restrict__ sharedMemory,
const uint32_t x0, const uint32_t x1, const uint32_t x2, const uint32_t x3, const uint32_t k0,
uint32_t &y0, uint32_t &y1, uint32_t &y2, uint32_t &y3)
{
y0 = sharedMemory[x0 & 0xff]
^sharedMemory[bfe(x1, 8, 8) + 256]
^sharedMemory[bfe(x2, 16, 8) + 512]
^sharedMemory[(x3>>24) + 768]^k0;

y1 = sharedMemory[x1 & 0xff]
y0 = *(uint32_t *)(bfi(x0, (uint32_t)&sharedMemory[0], 2, 8))
^ sharedMemory[bfe(x1, 8, 8) + 256]
^ sharedMemory[bfe(x2, 16, 8) + 512]
^ sharedMemory[(x3>>24) + 768]^k0;

y1 = *(uint32_t *)(bfi(x1, (uint32_t)&sharedMemory[0], 2, 8))
^sharedMemory[bfe(x2, 8, 8) + 256]
^sharedMemory[bfe(x3, 16, 8) + 512]
^sharedMemory[(x0>>24) + 768];

y2 = sharedMemory[x2 & 0xff]
y2 = *(uint32_t *)(bfi(x2, (uint32_t)&sharedMemory[0], 2, 8))
^sharedMemory[bfe(x3, 8, 8) + 256]
^sharedMemory[bfe(x0, 16, 8) + 512]
^sharedMemory[(x1>>24) + 768];

y3 = sharedMemory[x3 & 0xff]
y3 = *(uint32_t *)(bfi(x3, (uint32_t)&sharedMemory[0], 2, 8))
^ sharedMemory[bfe(x0, 8, 8) + 256]
^ sharedMemory[bfe(x1, 16, 8) + 512]
^ sharedMemory[(x2>>24) + 768];


}

__device__ __forceinline__
Expand All @@ -117,6 +128,8 @@ const uint32_t *const __restrict__ sharedMemory,
const uint32_t x0, const uint32_t x1, const uint32_t x2, const uint32_t x3,
uint32_t &y0, uint32_t &y1, uint32_t &y2, uint32_t &y3)
{
// uint32_t s0 = (uint32_t)&sharedMemory[0];

y0 = sharedMemory[x0 & 0xff]
^ sharedMemory[bfe(x1, 8, 8) + 256]
^ sharedMemory[bfe(x2, 16, 8) + 512]
Expand Down

0 comments on commit 6313154

Please sign in to comment.