Skip to content

Commit

Permalink
decred: return to host 2D array to allow the free
Browse files Browse the repository at this point in the history
  • Loading branch information
tpruvot committed Sep 27, 2016
1 parent 34e264c commit 92b7d72
Showing 1 changed file with 17 additions and 16 deletions.
33 changes: 17 additions & 16 deletions Algo256/decred.cu
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ __constant__ uint32_t _ALIGN(16) c_xors[215];

/* Buffers of candidate nonce(s) */
static uint32_t *d_resNonce[MAX_GPUS];
static __thread uint32_t *h_resNonce;
static uint32_t *h_resNonce[MAX_GPUS];

#define ROR8(a) __byte_perm(a, 0, 0x0321)
#define ROL16(a) __byte_perm(a, 0, 0x1032)
Expand Down Expand Up @@ -375,47 +375,48 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);

CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], MAX_RESULTS*sizeof(uint32_t)), -1);
CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce, MAX_RESULTS*sizeof(uint32_t)), -1);
CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], MAX_RESULTS*sizeof(uint32_t)), -1);
init[thr_id] = true;
}
memcpy(endiandata, pdata, 180);

decred_cpu_setBlock_52(endiandata);
h_resNonce[0] = 1;
cudaMemset(d_resNonce[thr_id], 0x00, sizeof(uint32_t));

do {
if (h_resNonce[0])
cudaMemset(d_resNonce[thr_id], 0x00, sizeof(uint32_t));
uint32_t* resNonces = h_resNonce[thr_id];

if (resNonces[0]) cudaMemset(d_resNonce[thr_id], 0x00, sizeof(uint32_t));

// GPU HASH
decred_gpu_hash_nonce <<<grid, block>>> (throughput, (*pnonce), d_resNonce[thr_id], targetHigh);

// first cell contains the valid nonces count
cudaMemcpy(h_resNonce, d_resNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
cudaMemcpy(resNonces, d_resNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);

if (h_resNonce[0])
if (resNonces[0])
{
uint32_t _ALIGN(64) vhash[8];

cudaMemcpy(h_resNonce, d_resNonce[thr_id], (h_resNonce[0]+1)*sizeof(uint32_t), cudaMemcpyDeviceToHost);
cudaMemcpy(resNonces, d_resNonce[thr_id], (resNonces[0]+1)*sizeof(uint32_t), cudaMemcpyDeviceToHost);

be32enc(&endiandata[DCR_NONCE_OFT32], h_resNonce[1]);
be32enc(&endiandata[DCR_NONCE_OFT32], resNonces[1]);
decred_hash(vhash, endiandata);
if (vhash[6] <= ptarget[6] && fulltest(vhash, ptarget))
{
int rc = work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
*hashes_done = (*pnonce) - first_nonce + throughput;
work->nonces[0] = swab32(h_resNonce[1]);
work->nonces[0] = swab32(resNonces[1]);
*pnonce = work->nonces[0];

// search for another nonce
for(uint32_t n=2; n <= h_resNonce[0]; n++)
for(uint32_t n=2; n <= resNonces[0]; n++)
{
be32enc(&endiandata[DCR_NONCE_OFT32], h_resNonce[n]);
be32enc(&endiandata[DCR_NONCE_OFT32], resNonces[n]);
decred_hash(vhash, endiandata);
if (vhash[6] <= ptarget[6] && fulltest(vhash, ptarget)) {
work->nonces[1] = swab32(h_resNonce[n]);
work->nonces[1] = swab32(resNonces[n]);

if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0]) {
// we really want the best first ? depends...
Expand All @@ -434,13 +435,13 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce
work->nonces[0], work->sharediff[0], n, work->nonces[1], work->sharediff[1]);

} else if (vhash[6] > ptarget[6]) {
gpulog(LOG_WARNING, thr_id, "result %u for %08x does not validate on CPU!", n, h_resNonce[n]);
gpulog(LOG_WARNING, thr_id, "result %u for %08x does not validate on CPU!", n, resNonces[n]);
}
}
return rc;

} else if (vhash[6] > ptarget[6]) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", h_resNonce[1]);
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", resNonces[1]);
}
}
*pnonce += throughput;
Expand All @@ -459,7 +460,7 @@ extern "C" void free_decred(int thr_id)
return;

cudaDeviceSynchronize();
cudaFreeHost(h_resNonce);
cudaFreeHost(h_resNonce[thr_id]);
cudaFree(d_resNonce[thr_id]);

init[thr_id] = false;
Expand Down

0 comments on commit 92b7d72

Please sign in to comment.