Skip to content

Commit

Permalink
Show intensity on init for all algos
Browse files Browse the repository at this point in the history
  • Loading branch information
tpruvot committed Sep 26, 2016
1 parent 65cd430 commit 34e97bf
Show file tree
Hide file tree
Showing 38 changed files with 93 additions and 36 deletions.
1 change: 1 addition & 0 deletions Algo256/blake256.cu
Expand Up @@ -504,6 +504,7 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), -1); CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), -1);
CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)), -1); CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)), -1);
Expand Down
6 changes: 6 additions & 0 deletions Algo256/bmw.cu
Expand Up @@ -52,6 +52,12 @@ extern "C" int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, u


if (!init[thr_id]) { if (!init[thr_id]) {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
}
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


cuda_check_cpu_init(thr_id, throughput); cuda_check_cpu_init(thr_id, throughput);
bmw256_midstate_init(thr_id, throughput); bmw256_midstate_init(thr_id, throughput);
Expand Down
1 change: 1 addition & 0 deletions Algo256/decred.cu
Expand Up @@ -376,6 +376,7 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], maxResults*sizeof(uint32_t)), -1); CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], maxResults*sizeof(uint32_t)), -1);
CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], maxResults*sizeof(uint32_t)), -1); CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], maxResults*sizeof(uint32_t)), -1);
Expand Down
10 changes: 9 additions & 1 deletion Algo256/keccak256.cu
Expand Up @@ -48,8 +48,16 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no
if (opt_benchmark) if (opt_benchmark)
ptarget[7] = 0x000f; ptarget[7] = 0x000f;


if (!init[thr_id]) { if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], throughput * 64)); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], throughput * 64));
keccak256_cpu_init(thr_id, throughput); keccak256_cpu_init(thr_id, throughput);
Expand Down
12 changes: 7 additions & 5 deletions Algo256/vanilla.cu
Expand Up @@ -378,6 +378,11 @@ extern "C" int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonc
const uint32_t targetHigh = ptarget[6]; const uint32_t targetHigh = ptarget[6];
int dev_id = device_map[thr_id]; int dev_id = device_map[thr_id];


int intensity = (device_sm[dev_id] > 500 && !is_windows()) ? 30 : 24;
if (device_sm[dev_id] < 350) intensity = 22;
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity);
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);

if (!init[thr_id]) { if (!init[thr_id]) {
cudaSetDevice(dev_id); cudaSetDevice(dev_id);
if (opt_cudaschedule == -1 && gpu_threads == 1) { if (opt_cudaschedule == -1 && gpu_threads == 1) {
Expand All @@ -387,6 +392,8 @@ extern "C" int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonc
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);

CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), -1); CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), -1);
CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)), -1); CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)), -1);
cudaStreamCreate(&streams[thr_id]); cudaStreamCreate(&streams[thr_id]);
Expand All @@ -402,11 +409,6 @@ extern "C" int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonc


vanilla_cpu_setBlock_16(thr_id,endiandata,&pdata[16]); vanilla_cpu_setBlock_16(thr_id,endiandata,&pdata[16]);


int intensity = (device_sm[dev_id] > 500 && !is_windows()) ? 30 : 24;
if (device_sm[dev_id] < 350) intensity = 22;
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity);
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);

const dim3 grid((throughput + (NPT*TPB)-1)/(NPT*TPB)); const dim3 grid((throughput + (NPT*TPB)-1)/(NPT*TPB));
const dim3 block(TPB); const dim3 block(TPB);
int rc = 0; int rc = 0;
Expand Down
1 change: 1 addition & 0 deletions JHA/jackpotcoin.cu
Expand Up @@ -105,6 +105,7 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc
gpulog(LOG_ERR, thr_id, "Sorry, This algo is not supported by this GPU arch (SM 3.0 required)"); gpulog(LOG_ERR, thr_id, "Sorry, This algo is not supported by this GPU arch (SM 3.0 required)");
proper_exit(EXIT_CODE_CUDA_ERROR); proper_exit(EXIT_CODE_CUDA_ERROR);
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));


Expand Down
2 changes: 0 additions & 2 deletions ccminer.cpp
Expand Up @@ -2981,8 +2981,6 @@ void parse_arg(int key, char *arg)
} }
else if (gpus_intensity[n] != (1 << v)) { else if (gpus_intensity[n] != (1 << v)) {
gpus_intensity[n] = (1 << v); gpus_intensity[n] = (1 << v);
applog(LOG_INFO, "Intensity set to %u, %u cuda threads",
v, gpus_intensity[n]);
} }
} }
last = gpus_intensity[n]; last = gpus_intensity[n];
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
@@ -1,4 +1,4 @@
AC_INIT([ccminer], [1.8.2], [], [ccminer], [http://github.com/tpruvot/ccminer]) AC_INIT([ccminer], [1.8.3], [], [ccminer], [http://github.com/tpruvot/ccminer])


AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM
Expand Down
7 changes: 7 additions & 0 deletions fuguecoin.cpp
Expand Up @@ -40,6 +40,13 @@ int scanhash_fugue256(int thr_id, struct work* work, uint32_t max_nonce, unsigne
if(!init[thr_id]) if(!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


fugue256_cpu_init(thr_id, throughput); fugue256_cpu_init(thr_id, throughput);
init[thr_id] = true; init[thr_id] = true;
Expand Down
2 changes: 2 additions & 0 deletions groestlcoin.cpp
Expand Up @@ -48,6 +48,8 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);

CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
groestlcoin_cpu_init(thr_id, throughput); groestlcoin_cpu_init(thr_id, throughput);
init[thr_id] = true; init[thr_id] = true;
Expand Down
1 change: 1 addition & 0 deletions heavy/heavy.cu
Expand Up @@ -178,6 +178,7 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


hefty_cpu_init(thr_id, throughput); hefty_cpu_init(thr_id, throughput);
sha256_cpu_init(thr_id, throughput); sha256_cpu_init(thr_id, throughput);
Expand Down
43 changes: 20 additions & 23 deletions lyra2/lyra2RE.cu
Expand Up @@ -79,7 +79,7 @@ extern "C" void lyra2re_hash(void *state, const void *input)
} }


static bool init[MAX_GPUS] = { 0 }; static bool init[MAX_GPUS] = { 0 };
static uint32_t throughput[MAX_GPUS] = { 0 }; static __thread uint32_t throughput = 0;


extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done) extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
{ {
Expand All @@ -99,35 +99,32 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce,


int intensity = (device_sm[dev_id] >= 500 && !is_windows()) ? 17 : 16; int intensity = (device_sm[dev_id] >= 500 && !is_windows()) ? 17 : 16;
if (device_sm[device_map[thr_id]] == 500) intensity = 15; if (device_sm[device_map[thr_id]] == 500) intensity = 15;
int temp = intensity; throughput = cuda_default_throughput(thr_id, 1U << intensity); // 18=256*256*4;
throughput[thr_id] = cuda_default_throughput(thr_id, 1U << intensity); // 18=256*256*4; if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
if (init[thr_id]) throughput[thr_id] = min(throughput[thr_id], max_nonce - first_nonce);


cudaDeviceProp props; cudaDeviceProp props;
cudaGetDeviceProperties(&props, dev_id); cudaGetDeviceProperties(&props, dev_id);


if (strstr(props.name, "750 Ti")) gtx750ti = true; if (strstr(props.name, "750 Ti")) gtx750ti = true;
else gtx750ti = false; else gtx750ti = false;


blake256_cpu_init(thr_id, throughput[thr_id]); gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);
keccak256_cpu_init(thr_id, throughput[thr_id]);
skein256_cpu_init(thr_id, throughput[thr_id]); blake256_cpu_init(thr_id, throughput);
groestl256_cpu_init(thr_id, throughput[thr_id]); keccak256_cpu_init(thr_id, throughput);
skein256_cpu_init(thr_id, throughput);
groestl256_cpu_init(thr_id, throughput);


if (device_sm[dev_id] >= 500) if (device_sm[dev_id] >= 500)
{ {
size_t matrix_sz = device_sm[dev_id] > 500 ? sizeof(uint64_t) * 4 * 4 : sizeof(uint64_t) * 8 * 8 * 3 * 4; size_t matrix_sz = device_sm[dev_id] > 500 ? sizeof(uint64_t) * 4 * 4 : sizeof(uint64_t) * 8 * 8 * 3 * 4;
CUDA_SAFE_CALL(cudaMalloc(&d_matrix[thr_id], matrix_sz * throughput[thr_id])); CUDA_SAFE_CALL(cudaMalloc(&d_matrix[thr_id], matrix_sz * throughput));
lyra2_cpu_init(thr_id, throughput[thr_id], d_matrix[thr_id]); lyra2_cpu_init(thr_id, throughput, d_matrix[thr_id]);
} }


CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t)32 * throughput[thr_id])); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t)32 * throughput));


init[thr_id] = true; init[thr_id] = true;
if (temp != intensity){
gpulog(LOG_INFO, thr_id, "Intensity set to %u, %u cuda threads",
intensity, throughput[thr_id]);
}
} }


uint32_t _ALIGN(128) endiandata[20]; uint32_t _ALIGN(128) endiandata[20];
Expand All @@ -141,15 +138,15 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce,
int order = 0; int order = 0;
uint32_t foundNonce; uint32_t foundNonce;


blake256_cpu_hash_80(thr_id, throughput[thr_id], pdata[19], d_hash[thr_id], order++); blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
keccak256_cpu_hash_32(thr_id, throughput[thr_id], pdata[19], d_hash[thr_id], order++); keccak256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
lyra2_cpu_hash_32(thr_id, throughput[thr_id], pdata[19], d_hash[thr_id], gtx750ti); lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti);
skein256_cpu_hash_32(thr_id, throughput[thr_id], pdata[19], d_hash[thr_id], order++); skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
TRACE("S") TRACE("S")


*hashes_done = pdata[19] - first_nonce + throughput[thr_id]; *hashes_done = pdata[19] - first_nonce + throughput;


foundNonce = groestl256_cpu_hash_32(thr_id, throughput[thr_id], pdata[19], d_hash[thr_id], order++); foundNonce = groestl256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
if (foundNonce != UINT32_MAX) if (foundNonce != UINT32_MAX)
{ {
uint32_t _ALIGN(64) vhash64[8]; uint32_t _ALIGN(64) vhash64[8];
Expand Down Expand Up @@ -181,11 +178,11 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce,
} }
} }


if ((uint64_t)throughput[thr_id] + pdata[19] >= max_nonce) { if ((uint64_t)throughput + pdata[19] >= max_nonce) {
pdata[19] = max_nonce; pdata[19] = max_nonce;
break; break;
} }
pdata[19] += throughput[thr_id]; pdata[19] += throughput;


} while (!work_restart[thr_id].restart); } while (!work_restart[thr_id].restart);


Expand Down
1 change: 1 addition & 0 deletions lyra2/lyra2REv2.cu
Expand Up @@ -113,6 +113,7 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


blake256_cpu_init(thr_id, throughput); blake256_cpu_init(thr_id, throughput);
keccak256_cpu_init(thr_id,throughput); keccak256_cpu_init(thr_id,throughput);
Expand Down
2 changes: 2 additions & 0 deletions myriadgroestl.cpp
Expand Up @@ -55,6 +55,8 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t max_nonce, unsigned
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);

myriadgroestl_cpu_init(thr_id, throughput); myriadgroestl_cpu_init(thr_id, throughput);
init[thr_id] = true; init[thr_id] = true;
} }
Expand Down
1 change: 1 addition & 0 deletions neoscrypt/neoscrypt.cpp
Expand Up @@ -40,6 +40,7 @@ int scanhash_neoscrypt(int thr_id, struct work* work, uint32_t max_nonce, unsign
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
cudaGetLastError(); // reset errors if device is not "reset" cudaGetLastError(); // reset errors if device is not "reset"
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g (+5), %u cuda threads", throughput2intensity(throughput), throughput);


if (device_sm[dev_id] <= 300) { if (device_sm[dev_id] <= 300) {
gpulog(LOG_ERR, thr_id, "Sorry neoscrypt is not supported on SM 3.0 devices"); gpulog(LOG_ERR, thr_id, "Sorry neoscrypt is not supported on SM 3.0 devices");
Expand Down
1 change: 1 addition & 0 deletions pentablake.cu
Expand Up @@ -69,6 +69,7 @@ extern "C" int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_n
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));


Expand Down
1 change: 1 addition & 0 deletions quark/quarkcoin.cu
Expand Up @@ -145,6 +145,7 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce,
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


cudaGetLastError(); cudaGetLastError();
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));
Expand Down
3 changes: 2 additions & 1 deletion qubit/deep.cu
Expand Up @@ -66,8 +66,9 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce,
cudaDeviceReset(); cudaDeviceReset();
// reduce cpu usage // reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
} }
CUDA_LOG_ERROR(); gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));


Expand Down
1 change: 1 addition & 0 deletions qubit/luffa.cu
Expand Up @@ -51,6 +51,7 @@ extern "C" int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce,
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));


Expand Down
1 change: 1 addition & 0 deletions qubit/qubit.cu
Expand Up @@ -79,6 +79,7 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce,
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


qubit_luffa512_cpu_init(thr_id, throughput); qubit_luffa512_cpu_init(thr_id, throughput);
x11_cubehash512_cpu_init(thr_id, throughput); x11_cubehash512_cpu_init(thr_id, throughput);
Expand Down
3 changes: 2 additions & 1 deletion scrypt-jane.cpp
Expand Up @@ -489,8 +489,9 @@ int scanhash_scrypt_jane(int thr_id, struct work *work, uint32_t max_nonce, unsi
cudaDeviceSynchronize(); cudaDeviceSynchronize();
cudaDeviceReset(); cudaDeviceReset();
cudaSetDevice(dev_id); cudaSetDevice(dev_id);

throughput = cuda_throughput(thr_id); throughput = cuda_throughput(thr_id);
applog(LOG_INFO, "GPU #%d: cuda throughput is %d", dev_id, throughput); gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


init[thr_id] = true; init[thr_id] = true;
} }
Expand Down
3 changes: 2 additions & 1 deletion scrypt.cpp
Expand Up @@ -721,8 +721,9 @@ int scanhash_scrypt(int thr_id, struct work *work, uint32_t max_nonce, unsigned
cudaDeviceSynchronize(); cudaDeviceSynchronize();
cudaDeviceReset(); cudaDeviceReset();
cudaSetDevice(dev_id); cudaSetDevice(dev_id);

throughput = cuda_throughput(thr_id); throughput = cuda_throughput(thr_id);
applog(LOG_INFO, "GPU #%d: cuda throughput is %d", dev_id, throughput); gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


init[thr_id] = true; init[thr_id] = true;
} }
Expand Down
1 change: 1 addition & 0 deletions sia.cu
Expand Up @@ -214,6 +214,7 @@ int scanhash_sia(int thr_id, struct work *work, uint32_t max_nonce, unsigned lon
//cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); //cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonces[thr_id], NBN * sizeof(uint32_t)), -1); CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonces[thr_id], NBN * sizeof(uint32_t)), -1);
init[thr_id] = true; init[thr_id] = true;
Expand Down
1 change: 1 addition & 0 deletions skein.cu
Expand Up @@ -378,6 +378,7 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


if (sm5) { if (sm5) {
skeincoin_init(thr_id); skeincoin_init(thr_id);
Expand Down
1 change: 1 addition & 0 deletions skein2.cpp
Expand Up @@ -62,6 +62,7 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput); cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput);


Expand Down
1 change: 1 addition & 0 deletions x11/c11.cu
Expand Up @@ -125,6 +125,7 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


quark_blake512_cpu_init(thr_id, throughput); quark_blake512_cpu_init(thr_id, throughput);
quark_bmw512_cpu_init(thr_id, throughput); quark_bmw512_cpu_init(thr_id, throughput);
Expand Down
8 changes: 7 additions & 1 deletion x11/fresh.cu
Expand Up @@ -83,7 +83,13 @@ extern "C" int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce,
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
CUDA_LOG_ERROR(); if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t)64 * throughput + 4), -1); CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t)64 * throughput + 4), -1);


Expand Down
1 change: 1 addition & 0 deletions x11/s3.cu
Expand Up @@ -86,6 +86,7 @@ extern "C" int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, un
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));


Expand Down
1 change: 1 addition & 0 deletions x11/sib.cu
Expand Up @@ -120,6 +120,7 @@ extern "C" int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, u
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


quark_blake512_cpu_init(thr_id, throughput); quark_blake512_cpu_init(thr_id, throughput);
quark_bmw512_cpu_init(thr_id, throughput); quark_bmw512_cpu_init(thr_id, throughput);
Expand Down
1 change: 1 addition & 0 deletions x11/x11.cu
Expand Up @@ -114,6 +114,7 @@ extern "C" int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, u
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


quark_blake512_cpu_init(thr_id, throughput); quark_blake512_cpu_init(thr_id, throughput);
quark_bmw512_cpu_init(thr_id, throughput); quark_bmw512_cpu_init(thr_id, throughput);
Expand Down
1 change: 1 addition & 0 deletions x11/x11evo.cu
Expand Up @@ -257,6 +257,7 @@ extern "C" int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);


quark_blake512_cpu_init(thr_id, throughput); quark_blake512_cpu_init(thr_id, throughput);
quark_bmw512_cpu_init(thr_id, throughput); quark_bmw512_cpu_init(thr_id, throughput);
Expand Down

0 comments on commit 34e97bf

Please sign in to comment.