Skip to content

Commit

Permalink
Bether default intensities quark on the 980ti
Browse files Browse the repository at this point in the history
  • Loading branch information
runestensland committed Apr 2, 2016
1 parent 6848402 commit 3a726d9
Show file tree
Hide file tree
Showing 13 changed files with 1,172 additions and 175 deletions.
851 changes: 851 additions & 0 deletions Algo256/decred_ok.cu

Large diffs are not rendered by default.

6 changes: 2 additions & 4 deletions bitslice_transformations_quad.cu
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,7 @@ void from_bitslice_quad_final(const uint32_t *const __restrict__ input, uint32_t
output[10] = __byte_perm(output[2], output[10], 0x7632);
SWAP4_final(output[6], output[10]);
output[6] = __byte_perm(output[6], 0, 0x3232);
}
else
} else
{
output[0] = __byte_perm(input[0], input[4], perm);
output[2] = __byte_perm(input[1], input[5], perm);
Expand Down Expand Up @@ -212,8 +211,7 @@ void from_bitslice_quad_final(const uint32_t *const __restrict__ input, uint32_t
output[2] = __byte_perm(output[2], 0, 0x1032);
output[8] = __byte_perm(output[8], 0, 0x1032);
output[10] = __byte_perm(output[10], 0, 0x1032);
}
else
}else
{
output[4] = output[0];
output[6] = output[2];
Expand Down
2 changes: 2 additions & 0 deletions ccminer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2705,6 +2705,8 @@ int main(int argc, char *argv[])
opt_syslog_pfx = strdup(PROGRAM_NAME);
opt_api_allow = strdup("127.0.0.1"); /* 0.0.0.0 for all ips */

printf("\t\t\tSP-Mod Private #6 \n");

#ifdef _MSC_VER
printf("Compiled with Visual C++ %d ", _MSC_VER / 100);
#else
Expand Down
Binary file modified ccminer.v12.suo
Binary file not shown.
357 changes: 244 additions & 113 deletions keccak/cuda_keccak256.cu

Large diffs are not rendered by default.

84 changes: 48 additions & 36 deletions keccak/keccak256.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ extern "C"

#include "cuda_helper.h"

static uint32_t *d_hash[8];
static uint32_t h_nounce[MAX_GPUS][2];

extern void keccak256_cpu_init(int thr_id, int threads);
extern void keccak256_setBlock_80(void *pdata,const void *ptarget);
extern uint32_t keccak256_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void keccak256_cpu_init(int thr_id, uint32_t threads);
extern void keccak256_setBlock_80(void *pdata,const uint64_t *ptarget);
extern void keccak256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *h_nounce);

// CPU Hash
extern "C" void keccak256_hash(void *state, const void *input)
Expand All @@ -34,25 +34,29 @@ extern "C" void keccak256_hash(void *state, const void *input)
memcpy(state, hash, 32);
}

static bool init[MAX_GPUS] = { 0 };

extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
uint32_t intensity = (device_sm[device_map[thr_id]] > 500) ? 1 << 28 : 1 << 27;;
uint32_t throughput = device_intensity(device_map[thr_id], __func__, intensity); // 256*4096
throughput = min(throughput, max_nonce - first_nonce);

if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0005;

uint32_t throughput = opt_work_size ? opt_work_size : (1 << 21); // 256*256*8*4
throughput = min(throughput, (int)(max_nonce - first_nonce));
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x01;

static bool init[8] = {0,0,0,0,0,0,0,0};
if (!init[thr_id]) {
cudaSetDevice(device_map[thr_id]);

CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
keccak256_cpu_init(thr_id, throughput);

if (!opt_cpumining) cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
if (opt_n_gputhreads == 1)
{
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
}
keccak256_cpu_init(thr_id, (int)throughput);
init[thr_id] = true;
}

Expand All @@ -61,38 +65,46 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
}

keccak256_setBlock_80((void*)endiandata, ptarget);
keccak256_setBlock_80((void*)endiandata, (uint64_t *)ptarget);

do {
int order = 0;

uint32_t foundNonce = keccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
if (foundNonce != 0xffffffff)
keccak256_cpu_hash_80(thr_id, (int) throughput, pdata[19], h_nounce[thr_id]);
if (h_nounce[thr_id][0] != UINT32_MAX)
{
uint32_t vhash64[8];
uint32_t Htarg = ptarget[7];
be32enc(&endiandata[19], foundNonce);
uint32_t vhash64[8];
be32enc(&endiandata[19], h_nounce[thr_id][0]);
keccak256_hash(vhash64, endiandata);

if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {

*hashes_done = pdata[19] + throughput - first_nonce;
pdata[19] = foundNonce;
return 1;

} else {
applog(LOG_DEBUG, "GPU #%d: result for nounce %08x does not validate on CPU!", thr_id, foundNonce);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget))
{
int res = 1;
// check if there was some other ones...
*hashes_done = pdata[19] - first_nonce + throughput;
if (h_nounce[thr_id][1] != 0xffffffff)
{
pdata[21] = h_nounce[thr_id][1];
res++;
if (opt_benchmark)
applog(LOG_INFO, "GPU #%d Found second nounce %08x", thr_id, h_nounce[thr_id][1], vhash64[7], Htarg);
}
pdata[19] = h_nounce[thr_id][0];
if (opt_benchmark)
applog(LOG_INFO, "GPU #%d Found nounce %08x", thr_id, h_nounce[thr_id][0], vhash64[7], Htarg);
return res;
}
else
{
if (vhash64[7] != Htarg)
{
applog(LOG_INFO, "GPU #%d: result for %08x does not validate on CPU!", thr_id, h_nounce[thr_id][0]);
}
}
}

if ((uint64_t) pdata[19] + throughput > (uint64_t) max_nonce) {
pdata[19] = max_nonce;
break;
}

pdata[19] += throughput;

} while (!scan_abort_flag && !work_restart[thr_id].restart);

*hashes_done = pdata[19] - first_nonce + 1;
} while (!scan_abort_flag && !work_restart[thr_id].restart && ((uint64_t)max_nonce > ((uint64_t)(pdata[19]) + (uint64_t)throughput)));
*hashes_done = pdata[19] - first_nonce;
return 0;
}
1 change: 0 additions & 1 deletion lyra2/cuda_lyra2v2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,6 @@ __device__ void reduceDuplexRowtV2(const int rowIn, const int rowInOut, const in
#pragma unroll
for (j = 0; j < 11; j++)
((uint2*)state2)[j + 1] ^= ((uint2*)state)[j];

#if __CUDA_ARCH__ == 500
if (rowInOut != rowOut)
{
Expand Down
15 changes: 8 additions & 7 deletions lyra2/lyra2REv2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ extern "C" int scanhash_lyra2v2(int thr_id, uint32_t *pdata,
const uint32_t first_nonce = pdata[19];
uint32_t intensity = 256 * 256 * 8;
uint32_t tpb = 8;
bool mergeblakekeccak = false;
// bool mergeblakekeccak = false;
cudaDeviceProp props;
cudaGetDeviceProperties(&props, device_map[thr_id]);
if (strstr(props.name, "970"))
Expand All @@ -108,13 +108,13 @@ extern "C" int scanhash_lyra2v2(int thr_id, uint32_t *pdata,
{
intensity = 256 * 256 * 10;
tpb = 16;
mergeblakekeccak = true;
// mergeblakekeccak = true;
}
else if (strstr(props.name, "750"))
{
intensity = 256 * 256 * 5;
tpb = 16;
mergeblakekeccak = true;
// mergeblakekeccak = true;
}
else if (strstr(props.name, "960"))
{
Expand Down Expand Up @@ -158,16 +158,17 @@ extern "C" int scanhash_lyra2v2(int thr_id, uint32_t *pdata,
do {
uint32_t foundNonce[2] = { 0, 0 };

if (mergeblakekeccak)
{
// if (mergeblakekeccak)
// {
blakeKeccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]);
}

/* }
else
{
blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]);
keccak256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id]);
}

*/
cubehash256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id]);
lyra2v2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], tpb);
skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id]);
Expand Down
9 changes: 8 additions & 1 deletion quark/quarkcoin.cu
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,15 @@ extern "C" int scanhash_quark(int thr_id, uint32_t *pdata,

uint32_t intensity = 256*256*57;
intensity = intensity + ((1 << 22));
if (device_sm[device_map[thr_id]] > 500) intensity= 1 << 24;
cudaDeviceProp props;
cudaGetDeviceProperties(&props, device_map[thr_id]);

if (device_sm[device_map[thr_id]] > 500) intensity= 1 << 25;

if (strstr(props.name, "980 Ti"))
{
intensity = 1 << 25;
}
uint32_t throughput = device_intensity(device_map[thr_id], __func__, intensity); // 256*4096
throughput = min(throughput, max_nonce - first_nonce);

Expand Down
5 changes: 2 additions & 3 deletions x13/x13.cu
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ extern void cuda_jh512Keccak512_cpu_hash_64( uint32_t threads, uint32_t startNou



extern void x11_luffaCubehash512_cpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *d_hash, uint32_t luffacubehashthreads);
extern void x11_luffaCubehash512_cpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *d_hash);

extern void x11_shavite512_cpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *d_hash, uint32_t shavitethreads);

Expand Down Expand Up @@ -160,7 +160,6 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
uint32_t throughput = device_intensity(device_map[thr_id], __func__, intensity);
uint32_t simdthreads = (device_sm[device_map[thr_id]] > 500) ? 64 : 32;
uint32_t shavitethreads = (device_sm[device_map[thr_id]] == 500) ? 256 : 320;
uint32_t luffacubehashthreads = (device_sm[device_map[thr_id]] == 500) ? 512 : 256;

throughput = min(throughput, (max_nonce - first_nonce));

Expand Down Expand Up @@ -209,7 +208,7 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
quark_groestl512_cpu_hash_64(throughput, pdata[19], NULL, d_hash[thr_id]);
quark_skein512_cpu_hash_64(throughput, pdata[19], NULL, d_hash[thr_id]);
cuda_jh512Keccak512_cpu_hash_64(throughput, pdata[19], d_hash[thr_id]);
x11_luffaCubehash512_cpu_hash_64(throughput, pdata[19], d_hash[thr_id], luffacubehashthreads);
x11_luffaCubehash512_cpu_hash_64(throughput, pdata[19], d_hash[thr_id]);
x11_shavite512_cpu_hash_64(throughput, pdata[19], d_hash[thr_id], shavitethreads);
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id],simdthreads);
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id]);
Expand Down
5 changes: 2 additions & 3 deletions x15/x14.cu
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ extern void quark_skein512_cpu_hash_64(uint32_t threads, uint32_t startNounce, u

extern void cuda_jh512Keccak512_cpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *d_hash);

extern void x11_luffaCubehash512_cpu_hash_64( uint32_t threads, uint32_t startNounce, uint32_t *d_hash,uint32_t luffacubehashthreads);
extern void x11_luffaCubehash512_cpu_hash_64( uint32_t threads, uint32_t startNounce, uint32_t *d_hash);

extern void x11_shavite512_cpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *d_hash, uint32_t shavitethreads);

Expand Down Expand Up @@ -159,7 +159,6 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
int intensity = (device_sm[device_map[thr_id]] > 500) ? 256 * 256 * 20 : 256 * 256 * 10;
uint32_t simdthreads = (device_sm[device_map[thr_id]] > 500) ? 64 : 32;
uint32_t shavitethreads = (device_sm[device_map[thr_id]] == 500) ? 256 : 320;
uint32_t luffacubehashthreads = (device_sm[device_map[thr_id]] == 500) ? 512 : 256;

uint32_t throughput = device_intensity(device_map[thr_id], __func__, intensity); // 19=256*256*8;
throughput = min(throughput, max_nonce - first_nonce);
Expand Down Expand Up @@ -209,7 +208,7 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
quark_groestl512_cpu_hash_64(throughput, pdata[19], NULL, d_hash[thr_id]);
quark_skein512_cpu_hash_64( throughput, pdata[19], NULL, d_hash[thr_id]);
cuda_jh512Keccak512_cpu_hash_64(throughput, pdata[19], d_hash[thr_id]);
x11_luffaCubehash512_cpu_hash_64(throughput, pdata[19], d_hash[thr_id], luffacubehashthreads);
x11_luffaCubehash512_cpu_hash_64(throughput, pdata[19], d_hash[thr_id]);
x11_shavite512_cpu_hash_64(throughput, pdata[19], d_hash[thr_id], shavitethreads);
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id],simdthreads);
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id]);
Expand Down
7 changes: 3 additions & 4 deletions x15/x15.cu
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ extern void quark_skein512_cpu_hash_64(uint32_t threads, uint32_t startNounce, u
//extern void quark_jh512_cpu_hash_64( uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash);
extern void cuda_jh512Keccak512_cpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *d_hash);

extern void x11_luffaCubehash512_cpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *d_hash, uint32_t luffacubehashthreads);
extern void x11_luffaCubehash512_cpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *d_hash);
extern void x11_shavite512_cpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *d_hash, uint32_t shavitethreads);

extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
Expand Down Expand Up @@ -173,10 +173,9 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
throughput = min(throughput, (max_nonce - first_nonce));
uint32_t simdthreads = (device_sm[device_map[thr_id]] > 500) ? 64 : 32;
uint32_t shavitethreads = (device_sm[device_map[thr_id]] == 500) ? 256 : 320;
uint32_t luffacubehashthreads = (device_sm[device_map[thr_id]] == 500) ? 512 : 256;

if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0xff;
((uint32_t*)ptarget)[7] = 0xf;

if (!init[thr_id])
{
Expand Down Expand Up @@ -223,7 +222,7 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
quark_skein512_cpu_hash_64(throughput, pdata[19], NULL, d_hash[thr_id]);
cuda_jh512Keccak512_cpu_hash_64(throughput, pdata[19], d_hash[thr_id]);

x11_luffaCubehash512_cpu_hash_64(throughput, pdata[19], d_hash[thr_id], luffacubehashthreads);
x11_luffaCubehash512_cpu_hash_64(throughput, pdata[19], d_hash[thr_id]);
x11_shavite512_cpu_hash_64(throughput, pdata[19], d_hash[thr_id],shavitethreads);
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id],simdthreads);
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id]);
Expand Down
5 changes: 2 additions & 3 deletions x17/x17.cu
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ extern void quark_skein512_cpu_hash_64(uint32_t threads, uint32_t startNounce, u

extern void cuda_jh512Keccak512_cpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *d_hash);

extern void x11_luffaCubehash512_cpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *d_hash, uint32_t luffacubehashthreads);
extern void x11_luffaCubehash512_cpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *d_hash);

extern void x11_shavite512_cpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *d_hash, uint32_t shavitethreads);

Expand Down Expand Up @@ -186,7 +186,6 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata,
int intensity = 256 * 256 * 9;
uint32_t simdthreads = (device_sm[device_map[thr_id]] > 500) ? 64 : 32;
uint32_t shavitethreads = (device_sm[device_map[thr_id]] == 500) ? 256 : 320;
uint32_t luffacubehashthreads = (device_sm[device_map[thr_id]] == 500) ? 512 : 256;

if (device_sm[device_map[thr_id]] == 520) intensity = 256 * 256 * 15;
uint32_t throughput = device_intensity(device_map[thr_id], __func__, intensity); // 19=256*256*8;
Expand Down Expand Up @@ -238,7 +237,7 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata,
quark_groestl512_cpu_hash_64(throughput, pdata[19], NULL, d_hash[thr_id]);
quark_skein512_cpu_hash_64(throughput, pdata[19], NULL, d_hash[thr_id]);
cuda_jh512Keccak512_cpu_hash_64(throughput, pdata[19], d_hash[thr_id]);
x11_luffaCubehash512_cpu_hash_64(throughput, pdata[19], d_hash[thr_id], luffacubehashthreads);
x11_luffaCubehash512_cpu_hash_64(throughput, pdata[19], d_hash[thr_id]);
x11_shavite512_cpu_hash_64(throughput, pdata[19], d_hash[thr_id],shavitethreads);
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], simdthreads);
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id]);
Expand Down

0 comments on commit 3a726d9

Please sign in to comment.