Skip to content

Commit

Permalink
LBRY Credits kernal
Browse files Browse the repository at this point in the history
  • Loading branch information
runestensland committed Jul 16, 2016
1 parent 5626509 commit bf91f65
Showing 1 changed file with 236 additions and 1 deletion.
237 changes: 236 additions & 1 deletion x17/cuda_x17_sha512.cu
Original file line number Diff line number Diff line change
Expand Up @@ -762,7 +762,7 @@ void __device__ __forceinline__ RIPEMD160_ROUND_BODY(uint32_t *in, uint32_t *h)

#define ROL32(x, y) ROTL32(x,y) //rotate(x, y ## U)
#define SHR(x, y) (x >> y)
#define SWAP32(a) (as_uint(as_uchar4(a).wzyx))
//#define SWAP32(a) (as_uint(as_uchar4(a).wzyx))

#define S0(x) (ROL32(x, 25) ^ ROL32(x, 14) ^ SHR(x, 3))
#define S1(x) (ROL32(x, 15) ^ ROL32(x, 13) ^ SHR(x, 10))
Expand Down Expand Up @@ -943,3 +943,238 @@ __device__ __forceinline__ uint8 sha256_round(uint16 data, uint8 buf)
res.s7 = (v7 + buf.s7);
return (res);
}

__device__ void search(uint32_t threads, uint32_t startNounce, const uint32_t *input, uint8 *ctx)
{
// SHA256 takes 16 uints of input per block - we have 112 bytes to process
// 8 * 16 == 64, meaning two block transforms.

uint32_t SHA256Buf[16];

uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = (startNounce + thread);
uint32_t hashPosition = (nounce - startNounce);

// uint32_t gid = 1;// get_global_id(0);

// Remember the last four is the nonce - so 108 bytes / 4 bytes per dword
#pragma unroll
for (int i = 0; i < 16; ++i) SHA256Buf[i] = cuda_swab32(input[i]);



// SHA256 initialization constants
// uint8 outbuf; = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
uint8 outbuf;
outbuf.s0 = 0x6A09E667;
outbuf.s1 = 0xBB67AE85;
outbuf.s2 = 0x3C6EF372;
outbuf.s3 = 0xA54FF53A;
outbuf.s4 = 0x510E527F;
outbuf.s5 = 0x9B05688C;
outbuf.s6 = 0x1F83D9AB;
outbuf.s7 = 0x5BE0CD19;

#pragma unroll
for (int i = 0; i < 3; ++i)
{
if (i == 1)
{
#pragma unroll
for (int i = 0; i < 11; ++i) SHA256Buf[i] = cuda_swab32(input[i + 16]);
SHA256Buf[11] = cuda_swab32(hashPosition);
SHA256Buf[12] = 0x80000000;
SHA256Buf[13] = 0x00000000;
SHA256Buf[14] = 0x00000000;
SHA256Buf[15] = 0x00000380;
}
if (i == 2)
{
((uint8 *)SHA256Buf)[0] = outbuf;
SHA256Buf[8] = 0x80000000;
#pragma unroll
for (int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000;
SHA256Buf[15] = 0x00000100;
// outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
outbuf.s0 = 0x6A09E667;
outbuf.s1 = 0xBB67AE85;
outbuf.s2 = 0x3C6EF372;
outbuf.s3 = 0xA54FF53A;
outbuf.s4 = 0x510E527F;
outbuf.s5 = 0x9B05688C;
outbuf.s6 = 0x1F83D9AB;
outbuf.s7 = 0x5BE0CD19;
}
outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
}

/*
outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
#pragma unroll
for(int i = 0; i < 11; ++i) SHA256Buf[i] = SWAP32(input[i + 16]);
SHA256Buf[11] = SWAP32(gid);
SHA256Buf[12] = 0x80000000;
SHA256Buf[13] = 0x00000000;
SHA256Buf[14] = 0x00000000;
SHA256Buf[15] = 0x00000380;
outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
((uint8 *)SHA256Buf)[0] = outbuf;
SHA256Buf[8] = 0x80000000;
for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000;
SHA256Buf[15] = 0x00000100;
outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
*/


/*
//outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
//outbuf = sha256_round(((uint16 *)SHA256Buf)[1], outbuf);
// outbuf would normall be SWAP32'd here, but it'll need it again
// once we use it as input to the next SHA256, so it negates.
((uint8 *)SHA256Buf)[0] = outbuf;
SHA256Buf[8] = 0x80000000;
for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000;
SHA256Buf[15] = 0x00000100;
outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
*/



outbuf.s0 = cuda_swab32(outbuf.s0);
outbuf.s1 = cuda_swab32(outbuf.s1);
outbuf.s2 = cuda_swab32(outbuf.s2);
outbuf.s3 = cuda_swab32(outbuf.s3);
outbuf.s4 = cuda_swab32(outbuf.s4);
outbuf.s5 = cuda_swab32(outbuf.s5);
outbuf.s6 = cuda_swab32(outbuf.s6);
outbuf.s7 = cuda_swab32(outbuf.s7);

ctx[hashPosition] = outbuf;
// ctx[get_global_id(0) - get_global_offset(0)] = outbuf;
}
}

__device__ void search1(uint32_t threads, uint32_t startNounce, uint8 *ctx)
{
uint64_t W[16] = { 0UL }, SHA512Out[8];
uint32_t SHA256Buf[16];

uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = (startNounce + thread);
uint32_t hashPosition = (nounce - startNounce);

uint8 outbuf = ctx[hashPosition]; //[get_global_id(0) - get_global_offset(0)];

((uint8 *)W)[0] = outbuf;

for (int i = 0; i < 4; ++i) W[i] = SWAP64(W[i]);

W[4] = 0x8000000000000000UL;
W[15] = 0x0000000000000100UL;

for (int i = 0; i < 8; ++i) SHA512Out[i] = SHA512_INIT[i];

SHA512Block(W, SHA512Out);

for (int i = 0; i < 8; ++i) SHA512Out[i] = SWAP64(SHA512Out[i]);

uint32_t RMD160_0[16] = { 0U };
uint32_t RMD160_1[16] = { 0U };
uint32_t RMD160_0_Out[5], RMD160_1_Out[5];

for (int i = 0; i < 4; ++i)
{
((uint64_t *)RMD160_0)[i] = SHA512Out[i];
((uint64_t *)RMD160_1)[i] = SHA512Out[i + 4];
}

RMD160_0[8] = RMD160_1[8] = 0x00000080;
RMD160_0[14] = RMD160_1[14] = 0x00000100;

for (int i = 0; i < 5; ++i)
{
RMD160_0_Out[i] = RMD160_IV[i];
RMD160_1_Out[i] = RMD160_IV[i];
}

RIPEMD160_ROUND_BODY(RMD160_0, RMD160_0_Out);
RIPEMD160_ROUND_BODY(RMD160_1, RMD160_1_Out);

for (int i = 0; i < 5; ++i) SHA256Buf[i] = cuda_swab32(RMD160_0_Out[i]);
for (int i = 5; i < 10; ++i) SHA256Buf[i] = cuda_swab32(RMD160_1_Out[i - 5]);
SHA256Buf[10] = 0x80000000;

for (int i = 11; i < 15; ++i) SHA256Buf[i] = 0x00000000U;

SHA256Buf[15] = 0x00000140;

// outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
outbuf.s0 = 0x6A09E667;
outbuf.s1 = 0xBB67AE85;
outbuf.s2 = 0x3C6EF372;
outbuf.s3 = 0xA54FF53A;
outbuf.s4 = 0x510E527F;
outbuf.s5 = 0x9B05688C;
outbuf.s6 = 0x1F83D9AB;
outbuf.s7 = 0x5BE0CD19;


outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
ctx[hashPosition] = outbuf;
}


}

__device__ void search2(uint32_t threads, uint32_t startNounce, uint8 *ctx, uint32_t *output, uint64_t target)
{

uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = (startNounce + thread);
uint32_t hashPosition = (nounce - startNounce);
uint32_t SHA256Buf[16] = { 0U };
uint8 outbuf = ctx[hashPosition];//get_global_id(0) - get_global_offset(0)];

((uint8 *)SHA256Buf)[0] = outbuf;
SHA256Buf[8] = 0x80000000;
for (int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000;
SHA256Buf[15] = 0x00000100;

// outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);

outbuf.s0 = 0x6A09E667;
outbuf.s1 = 0xBB67AE85;
outbuf.s2 = 0x3C6EF372;
outbuf.s3 = 0xA54FF53A;
outbuf.s4 = 0x510E527F;
outbuf.s5 = 0x9B05688C;
outbuf.s6 = 0x1F83D9AB;
outbuf.s7 = 0x5BE0CD19;

outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);

outbuf.s6 = cuda_swab32(outbuf.s6);
outbuf.s7 = cuda_swab32(outbuf.s7);

uint64_t test = MAKE_ULONGLONG(outbuf.s7, outbuf.s6);
//if(!(outbuf.s7)) output[atomic_inc(output+0xFF)] = SWAP32(gid);
if (test <= target)
{
//yai.
}
// output[atomic_inc(output + 0xFF)] = SWAP32(gid);
}
}

0 comments on commit bf91f65

Please sign in to comment.