Skip to content

Commit

Permalink
polytimos algo (6 chained algos with streebog)
Browse files Browse the repository at this point in the history
  • Loading branch information
tpruvot committed Nov 16, 2017
1 parent e1575c5 commit 2e0a977
Show file tree
Hide file tree
Showing 12 changed files with 242 additions and 3 deletions.
1 change: 1 addition & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
sph/hamsi.c sph/hamsi_helper.c sph/streebog.c \
sph/shabal.c sph/whirlpool.c sph/sha2big.c sph/haval.c \
sph/ripemd.c sph/sph_sha2.c \
polytimos.cu \
lbry/lbry.cu lbry/cuda_sha256_lbry.cu lbry/cuda_sha512_lbry.cu lbry/cuda_lbry_merged.cu \
qubit/qubit.cu qubit/qubit_luffa512.cu qubit/deep.cu qubit/luffa.cu \
tribus/tribus.cu tribus/cuda_echo512_final.cu \
Expand Down
6 changes: 5 additions & 1 deletion README.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

ccminer 2.2.2 (Oct. 2017) "phi and hsr algos"
ccminer 2.2.3-dev (Nov. 2017) "polytimos algo"
---------------------------------------------------------------

***************************************************************
Expand Down Expand Up @@ -104,6 +104,7 @@ its command line interface and options.
nist5 use to mine TalkCoin
penta use to mine Joincoin / Pentablake
phi use to mine LUXCoin
polytimos use to mine Polytimos
quark use to mine Quarkcoin
qubit use to mine Qubit
scrypt use to mine Scrypt coins
Expand Down Expand Up @@ -280,6 +281,9 @@ so we can more efficiently implement new algorithms using the latest hardware
features.

>>> RELEASE HISTORY <<<
Nov. 16th 2017 v2.2.3
Polytimos Algo

Oct. 09th 2017 v2.2.2
Import and clean the hsr algo (x13 + custom hash)
Import and optimise phi algo from LuxCoin repository
Expand Down
2 changes: 2 additions & 0 deletions algos.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ enum sha_algos {
ALGO_NIST5,
ALGO_PENTABLAKE,
ALGO_PHI,
ALGO_POLYTIMOS,
ALGO_QUARK,
ALGO_QUBIT,
ALGO_SCRYPT,
Expand Down Expand Up @@ -104,6 +105,7 @@ static const char *algo_names[] = {
"nist5",
"penta",
"phi",
"polytimos",
"quark",
"qubit",
"scrypt",
Expand Down
1 change: 1 addition & 0 deletions bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ void algo_free_all(int thr_id)
free_nist5(thr_id);
free_pentablake(thr_id);
free_phi(thr_id);
free_polytimos(thr_id);
free_quark(thr_id);
free_qubit(thr_id);
free_skeincoin(thr_id);
Expand Down
5 changes: 5 additions & 0 deletions ccminer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ Options:\n\
nist5 NIST5 (TalkCoin)\n\
penta Pentablake hash (5x Blake 512)\n\
phi BHCoin\n\
polytimos Politimos\n\
quark Quark\n\
qubit Qubit\n\
sha256d SHA256d (bitcoin)\n\
Expand Down Expand Up @@ -2230,6 +2231,7 @@ static void *miner_thread(void *userdata)
case ALGO_HSR:
case ALGO_LYRA2v2:
case ALGO_PHI:
case ALGO_POLYTIMOS:
case ALGO_S3:
case ALGO_SKUNK:
case ALGO_TIMETRAVEL:
Expand Down Expand Up @@ -2417,6 +2419,9 @@ static void *miner_thread(void *userdata)
case ALGO_PHI:
rc = scanhash_phi(thr_id, &work, max_nonce, &hashes_done);
break;
case ALGO_POLYTIMOS:
rc = scanhash_polytimos(thr_id, &work, max_nonce, &hashes_done);
break;
case ALGO_SCRYPT:
rc = scanhash_scrypt(thr_id, &work, max_nonce, &hashes_done,
NULL, &tv_start, &tv_end);
Expand Down
1 change: 1 addition & 0 deletions ccminer.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,7 @@
<CudaCompile Include="lyra2\lyra2Z.cu" />
<CudaCompile Include="lyra2\cuda_lyra2Z.cu" />
<ClInclude Include="lyra2\cuda_lyra2Z_sm5.cuh" />
<CudaCompile Include="polytimos.cu" />
<CudaCompile Include="sia\sia.cu" />
<CudaCompile Include="skein.cu">
<MaxRegCount>64</MaxRegCount>
Expand Down
3 changes: 3 additions & 0 deletions ccminer.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,9 @@
<CudaCompile Include="pentablake.cu">
<Filter>Source Files\CUDA</Filter>
</CudaCompile>
<CudaCompile Include="polytimos.cu">
<Filter>Source Files\CUDA</Filter>
</CudaCompile>
<CudaCompile Include="skunk\skunk.cu">
<Filter>Source Files\CUDA\skunk</Filter>
</CudaCompile>
Expand Down
2 changes: 1 addition & 1 deletion compat/ccminer-config.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@
#define PACKAGE_URL "http://github.com/tpruvot/ccminer"

/* Define to the version of this package. */
#define PACKAGE_VERSION "2.2.2"
#define PACKAGE_VERSION "2.2.3"

/* If using the C implementation of alloca, define if you know the
direction of stack growth for your system; otherwise it will be
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
AC_INIT([ccminer], [2.2.2], [], [ccminer], [http://github.com/tpruvot/ccminer])
AC_INIT([ccminer], [2.2.3], [], [ccminer], [http://github.com/tpruvot/ccminer])

AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM
Expand Down
3 changes: 3 additions & 0 deletions miner.h
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ extern int scanhash_neoscrypt(int thr_id, struct work *work, uint32_t max_nonce,
extern int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_phi(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_polytimos(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_quark(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_sha256d(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done);
Expand Down Expand Up @@ -365,6 +366,7 @@ extern void free_neoscrypt(int thr_id);
extern void free_nist5(int thr_id);
extern void free_pentablake(int thr_id);
extern void free_phi(int thr_id);
extern void free_polytimos(int thr_id);
extern void free_quark(int thr_id);
extern void free_qubit(int thr_id);
extern void free_sha256d(int thr_id);
Expand Down Expand Up @@ -908,6 +910,7 @@ void neoscrypt(uchar *output, const uchar *input, uint32_t profile);
void nist5hash(void *state, const void *input);
void pentablakehash(void *output, const void *input);
void phihash(void *output, const void *input);
void polytimos_hash(void *output, const void *input);
void quarkhash(void *state, const void *input);
void qubithash(void *state, const void *input);
void scrypthash(void* output, const void* input);
Expand Down
216 changes: 216 additions & 0 deletions polytimos.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
/*
* Polytimos algorithm
*/
extern "C"
{
#include "sph/sph_skein.h"
#include "sph/sph_shabal.h"
#include "sph/sph_echo.h"
#include "sph/sph_luffa.h"
#include "sph/sph_fugue.h"
#include "sph/sph_streebog.h"
}

#include "miner.h"

#include "cuda_helper.h"
#include "x11/cuda_x11.h"

static uint32_t *d_hash[MAX_GPUS];
static uint32_t *d_resNonce[MAX_GPUS];

extern void skein512_cpu_setBlock_80(void *pdata);
extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap);
extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads);
extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_cubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x13_fugue512_cpu_free(int thr_id);
extern void streebog_sm3_set_target(uint32_t* ptarget);
extern void streebog_sm3_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce);
extern void skunk_streebog_set_target(uint32_t* ptarget);
extern void skunk_cuda_streebog(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce);

// CPU Hash
extern "C" void polytimos_hash(void *output, const void *input)
{
sph_skein512_context ctx_skein;
sph_shabal512_context ctx_shabal;
sph_echo512_context ctx_echo;
sph_luffa512_context ctx_luffa;
sph_fugue512_context ctx_fugue;
sph_gost512_context ctx_gost;

uint32_t _ALIGN(128) hash[16];
memset(hash, 0, sizeof hash);

sph_skein512_init(&ctx_skein);
sph_skein512(&ctx_skein, input, 80);
sph_skein512_close(&ctx_skein, (void*) hash);

sph_shabal512_init(&ctx_shabal);
sph_shabal512(&ctx_shabal, hash, 64);
sph_shabal512_close(&ctx_shabal, hash);

sph_echo512_init(&ctx_echo);
sph_echo512(&ctx_echo, hash, 64);
sph_echo512_close(&ctx_echo, hash);

sph_luffa512_init(&ctx_luffa);
sph_luffa512(&ctx_luffa, hash, 64);
sph_luffa512_close(&ctx_luffa, hash);

sph_fugue512_init(&ctx_fugue);
sph_fugue512(&ctx_fugue, hash, 64);
sph_fugue512_close(&ctx_fugue, hash);

sph_gost512_init(&ctx_gost);
sph_gost512(&ctx_gost, (const void*) hash, 64);
sph_gost512_close(&ctx_gost, (void*) hash);

memcpy(output, hash, 32);
}

static bool init[MAX_GPUS] = { 0 };
static bool use_compat_kernels[MAX_GPUS] = { 0 };

extern "C" int scanhash_polytimos(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
{
int dev_id = device_map[thr_id];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
int intensity = (device_sm[dev_id] > 500 && !is_windows()) ? 20 : 19;
uint32_t throughput = cuda_default_throughput(thr_id, 1 << intensity); // 19=256*256*8;
//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);

if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x000f;

if (!init[thr_id])
{
cudaSetDevice(dev_id);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);

cuda_get_arch(thr_id);
use_compat_kernels[thr_id] = (cuda_arch[dev_id] < 500);

quark_skein512_cpu_init(thr_id, throughput);
x14_shabal512_cpu_init(thr_id, throughput);
x11_echo512_cpu_init(thr_id, throughput);
x11_luffa512_cpu_init(thr_id, throughput);
x13_fugue512_cpu_init(thr_id, throughput);

CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], 2 * sizeof(uint32_t)), -1);

init[thr_id] = true;
}


uint32_t _ALIGN(64) h_resNonce[2];
uint32_t _ALIGN(64) endiandata[20];
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], pdata[k]);


cudaMemset(d_resNonce[thr_id], 0xff, 2*sizeof(uint32_t));
skein512_cpu_setBlock_80(endiandata);
if (use_compat_kernels[thr_id]) {
streebog_sm3_set_target(ptarget);
} else {
skunk_streebog_set_target(ptarget);
}

do {
int order = 0;

skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
if (use_compat_kernels[thr_id]) {
streebog_sm3_hash_64_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);
} else {
skunk_cuda_streebog(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);
}

*hashes_done = pdata[19] - first_nonce + throughput;

cudaMemcpy(h_resNonce, d_resNonce[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost);
CUDA_LOG_ERROR();

if (h_resNonce[0] != UINT32_MAX)
{
const uint32_t Htarg = ptarget[7];
const uint32_t startNounce = pdata[19];
uint32_t _ALIGN(64) vhash[8];

be32enc(&endiandata[19], startNounce + h_resNonce[0]);
polytimos_hash(vhash, endiandata);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work->nonces[0] = startNounce + h_resNonce[0];
work_set_target_ratio(work, vhash);
if (h_resNonce[1] != UINT32_MAX) {
uint32_t secNonce = work->nonces[1] = startNounce + h_resNonce[1];
be32enc(&endiandata[19], secNonce);
polytimos_hash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpu_increment_reject(thr_id);
if (!opt_quiet)
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
cudaMemset(d_resNonce[thr_id], 0xff, 2*sizeof(uint32_t));
pdata[19] = startNounce + h_resNonce[0] + 1;
continue;
}
}

if ((uint64_t)throughput + pdata[19] >= max_nonce) {
pdata[19] = max_nonce;
break;
}
pdata[19] += throughput;

} while (!work_restart[thr_id].restart);

*hashes_done = pdata[19] - first_nonce;

CUDA_LOG_ERROR();

return 0;
}

// cleanup
extern "C" void free_polytimos(int thr_id)
{
if (!init[thr_id])
return;

cudaThreadSynchronize();

cudaFree(d_hash[thr_id]);
x13_fugue512_cpu_free(thr_id);
cudaFree(d_resNonce[thr_id]);

CUDA_LOG_ERROR();

cudaDeviceSynchronize();
init[thr_id] = false;
}
3 changes: 3 additions & 0 deletions util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2246,6 +2246,9 @@ void print_hash_tests(void)
phihash(&hash[0], &buf[0]);
printpfx("phi", hash);

polytimos_hash(&hash[0], &buf[0]);
printpfx("polytimos", hash);

quarkhash(&hash[0], &buf[0]);
printpfx("quark", hash);

Expand Down

0 comments on commit 2e0a977

Please sign in to comment.