diff --git a/Makefile.am b/Makefile.am
index a9fb3396b..febcc9b28 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -55,7 +55,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
quark/nist5.cu \
quark/quarkcoin.cu quark/cuda_quark_compactionTest.cu \
neoscrypt/neoscrypt.cpp neoscrypt/neoscrypt-cpu.c neoscrypt/cuda_neoscrypt.cu \
- pentablake.cu skein.cu cuda_skeincoin.cu skein2.cpp zr5.cu \
+ pentablake.cu skein.cu cuda_skeincoin.cu skein2.cpp skunk.cu zr5.cu \
sha256/sha256d.cu sha256/cuda_sha256d.cu sha256/sha256t.cu sha256/cuda_sha256t.cu \
sia/sia.cu sia/sia-rpc.cpp sph/blake2b.c \
sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \
diff --git a/README.txt b/README.txt
index 9e4a9c9da..4967b5a8d 100644
--- a/README.txt
+++ b/README.txt
@@ -1,5 +1,5 @@
-ccminer 2.1-dev (June 2017) "Equihash"
+ccminer 2.2-dev (July 2017) "Equihash, tribus and skunk"
---------------------------------------------------------------
***************************************************************
@@ -9,7 +9,6 @@ If you find this tool useful and like to support its continuous
tpruvot@github:
BTC : 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo
DCR : DsUCcACGcyP8McNMRXQwbtpDxaVUYLDQDeU
- LBC : bKe6pLqELL3HHSbpJXxSdn5RrY2bfrkRhF
Alexis:
BTC : 14EgXD7fPYD4sHBXWUi46VeiTVXNq765B8
@@ -51,6 +50,8 @@ Scrypt and Scrypt:N
Scrypt-Jane (Chacha)
Sibcoin (sib)
Skein (Skein + SHA)
+Signatum (Skein cubehash fugue Streebog)
+Tribus (JH, keccak, simd)
Woodcoin (Double Skein)
Vanilla (Blake256 8-rounds - double sha256)
Vertcoin Lyra2RE
@@ -116,6 +117,7 @@ its command line interface and options.
sib use to mine Sibcoin
skein use to mine Skeincoin
skein2 use to mine Woodcoin
+ skunk use to mine Signatum
timetravel use to mine MachineCoin
tribus use to mine Denarius
x11evo use to mine Revolver
@@ -280,7 +282,12 @@ features.
>>> RELEASE HISTORY <<<
- v2.1 (unfinished)
+ v2.2 (under dev)
+ New skunk algo, using the heavy streebog algorithm
+ Enhance tribus algo (+10%)
+ equihash protocol enhancement on yiimp.ccminer.org and zpool.ca
+
+ June 16th 2017 v2.1-tribus
Interface equihash algo with djeZo solver (from nheqminer 0.5c)
New api parameters (and multicast announces for local networks)
New tribus algo
diff --git a/algos.h b/algos.h
index 77625f239..b7dd0f21f 100644
--- a/algos.h
+++ b/algos.h
@@ -45,6 +45,7 @@ enum sha_algos {
ALGO_SIB,
ALGO_SKEIN,
ALGO_SKEIN2,
+ ALGO_SKUNK,
ALGO_S3,
ALGO_TIMETRAVEL,
ALGO_TRIBUS,
@@ -109,6 +110,7 @@ static const char *algo_names[] = {
"sib",
"skein",
"skein2",
+ "skunk",
"s3",
"timetravel",
"tribus",
diff --git a/bench.cpp b/bench.cpp
index 71c0d7085..500176f8b 100644
--- a/bench.cpp
+++ b/bench.cpp
@@ -80,6 +80,7 @@ void algo_free_all(int thr_id)
free_qubit(thr_id);
free_skeincoin(thr_id);
free_skein2(thr_id);
+ free_skunk(thr_id);
free_sha256d(thr_id);
free_sha256t(thr_id);
free_sia(thr_id);
diff --git a/ccminer.cpp b/ccminer.cpp
index e32cdbd7e..2f357cc06 100644
--- a/ccminer.cpp
+++ b/ccminer.cpp
@@ -275,6 +275,7 @@ Options:\n\
scrypt-jane Scrypt-jane Chacha\n\
skein Skein SHA2 (Skeincoin)\n\
skein2 Double Skein (Woodcoin)\n\
+ skunk Skein Cube Fugue Streebog\n\
s3 S3 (1Coin)\n\
timetravel Machinecoin permuted x8\n\
tribus Denerius\n\
@@ -2401,6 +2402,9 @@ static void *miner_thread(void *userdata)
case ALGO_SKEIN2:
rc = scanhash_skein2(thr_id, &work, max_nonce, &hashes_done);
break;
+ case ALGO_SKUNK:
+ rc = scanhash_skunk(thr_id, &work, max_nonce, &hashes_done);
+ break;
case ALGO_SHA256D:
rc = scanhash_sha256d(thr_id, &work, max_nonce, &hashes_done);
break;
diff --git a/ccminer.vcxproj b/ccminer.vcxproj
index 552c59595..a68f66ba5 100644
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@@ -532,6 +532,7 @@
48
+
@@ -563,10 +564,8 @@
-
-
-
-
+
+
@@ -600,4 +599,4 @@
-
+
diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters
index f28d993b7..f6fef2c7e 100644
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@@ -754,6 +754,9 @@
Source Files\CUDA
+
+ Source Files\CUDA
+
Source Files\CUDA
diff --git a/compat/ccminer-config.h b/compat/ccminer-config.h
index dfd973c3b..d07e736d2 100644
--- a/compat/ccminer-config.h
+++ b/compat/ccminer-config.h
@@ -164,7 +164,7 @@
#define PACKAGE_URL "http://github.com/tpruvot/ccminer"
/* Define to the version of this package. */
-#define PACKAGE_VERSION "2.1"
+#define PACKAGE_VERSION "2.2"
/* If using the C implementation of alloca, define if you know the
direction of stack growth for your system; otherwise it will be
diff --git a/configure.ac b/configure.ac
index c92a6fcb1..c369201a8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([ccminer], [2.1], [], [ccminer], [http://github.com/tpruvot/ccminer])
+AC_INIT([ccminer], [2.2], [], [ccminer], [http://github.com/tpruvot/ccminer])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM
diff --git a/miner.h b/miner.h
index f3406dd9d..b9c72567e 100644
--- a/miner.h
+++ b/miner.h
@@ -308,6 +308,7 @@ extern int scanhash_sia(int thr_id, struct work *work, uint32_t max_nonce, unsig
extern int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
+extern int scanhash_skunk(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_timetravel(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_tribus(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
@@ -368,6 +369,7 @@ extern void free_sia(int thr_id);
extern void free_sib(int thr_id);
extern void free_skeincoin(int thr_id);
extern void free_skein2(int thr_id);
+extern void free_skunk(int thr_id);
extern void free_s3(int thr_id);
extern void free_timetravel(int thr_id);
extern void free_tribus(int thr_id);
@@ -909,6 +911,7 @@ void sha256t_hash(void *output, const void *input);
void sibhash(void *output, const void *input);
void skeincoinhash(void *output, const void *input);
void skein2hash(void *output, const void *input);
+void skunk_hash(void *state, const void *input);
void s3hash(void *output, const void *input);
void timetravel_hash(void *output, const void *input);
void bitcore_hash(void *output, const void *input);
diff --git a/skunk.cu b/skunk.cu
new file mode 100644
index 000000000..d7d9debad
--- /dev/null
+++ b/skunk.cu
@@ -0,0 +1,194 @@
+/**
+ * Skunk Algo for Signatum
+ * (skein, cube, fugue, gost streebog)
+ *
+ * tpruvot@github 06 2017 - GPLv3
+ */
+extern "C" {
+#include "sph/sph_skein.h"
+#include "sph/sph_cubehash.h"
+#include "sph/sph_fugue.h"
+#include "sph/sph_streebog.h"
+}
+
+#include "miner.h"
+#include "cuda_helper.h"
+
+extern void skein512_cpu_setBlock_80(void *pdata);
+extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
+extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap);
+
+extern void x11_cubehash512_cpu_init(int thr_id, uint32_t threads);
+extern void x11_cubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
+extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+extern void x13_fugue512_cpu_free(int thr_id);
+
+extern void streebog_cpu_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce);
+extern void streebog_set_target(const uint32_t* ptarget);
+
+#include
+#include
+
+#define NBN 2
+static uint32_t *d_hash[MAX_GPUS];
+static uint32_t *d_resNonce[MAX_GPUS];
+
+// CPU Hash
+extern "C" void skunk_hash(void *output, const void *input)
+{
+ unsigned char _ALIGN(128) hash[128] = { 0 };
+
+ sph_skein512_context ctx_skein;
+ sph_cubehash512_context ctx_cubehash;
+ sph_fugue512_context ctx_fugue;
+ sph_gost512_context ctx_gost;
+
+ sph_skein512_init(&ctx_skein);
+ sph_skein512(&ctx_skein, input, 80);
+ sph_skein512_close(&ctx_skein, (void*) hash);
+
+ sph_cubehash512_init(&ctx_cubehash);
+ sph_cubehash512(&ctx_cubehash, (const void*) hash, 64);
+ sph_cubehash512_close(&ctx_cubehash, (void*) hash);
+
+ sph_fugue512_init(&ctx_fugue);
+ sph_fugue512(&ctx_fugue, (const void*) hash, 64);
+ sph_fugue512_close(&ctx_fugue, (void*) hash);
+
+ sph_gost512_init(&ctx_gost);
+ sph_gost512(&ctx_gost, (const void*) hash, 64);
+ sph_gost512_close(&ctx_gost, (void*) hash);
+
+ memcpy(output, hash, 32);
+}
+
+static bool init[MAX_GPUS] = { 0 };
+
+extern "C" int scanhash_skunk(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
+{
+ int dev_id = device_map[thr_id];
+
+ uint32_t *pdata = work->data;
+ uint32_t *ptarget = work->target;
+ const uint32_t first_nonce = pdata[19];
+ int intensity = (device_sm[device_map[thr_id]] > 500) ? 18 : 17;
+ if (strstr(device_name[dev_id], "GTX 10")) intensity = 19;
+ uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity);
+ //if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
+
+ if (opt_benchmark)
+ ptarget[7] = 0xf;
+
+ if (!init[thr_id])
+ {
+ cudaSetDevice(device_map[thr_id]);
+ if (opt_cudaschedule == -1 && gpu_threads == 1) {
+ cudaDeviceReset();
+ // reduce cpu usage
+ cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
+ CUDA_LOG_ERROR();
+ }
+ gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);
+
+ quark_skein512_cpu_init(thr_id, throughput);
+ x11_cubehash512_cpu_init(thr_id, throughput);
+ x13_fugue512_cpu_init(thr_id, throughput);
+
+ CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
+ CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), -1);
+
+ init[thr_id] = true;
+ }
+
+ uint32_t _ALIGN(64) h_resNonce[NBN];
+ uint32_t _ALIGN(64) endiandata[20];
+ for (int k=0; k < 20; k++)
+ be32enc(&endiandata[k], pdata[k]);
+
+ skein512_cpu_setBlock_80(endiandata);
+
+ cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t));
+ streebog_set_target(ptarget);
+
+ do {
+ int order = 0;
+ skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 1); order++;
+ x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+ x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+ streebog_cpu_hash_64_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);
+
+ cudaMemcpy(h_resNonce, d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost);
+
+ *hashes_done = pdata[19] - first_nonce + throughput;
+
+ if (h_resNonce[0] != UINT32_MAX)
+ {
+ uint32_t _ALIGN(64) vhash[8];
+ const uint32_t Htarg = ptarget[7];
+ const uint32_t startNounce = pdata[19];
+
+ be32enc(&endiandata[19], startNounce + h_resNonce[0]);
+ skunk_hash(vhash, endiandata);
+ if (vhash[7] <= Htarg && fulltest(vhash, ptarget))
+ {
+ work->nonces[0] = startNounce + h_resNonce[0];
+ work->valid_nonces = 1;
+ work_set_target_ratio(work, vhash);
+ if (h_resNonce[1] != UINT32_MAX)
+ {
+ uint32_t secNonce = work->nonces[1] = startNounce + h_resNonce[1];
+ be32enc(&endiandata[19], secNonce);
+ skunk_hash(vhash, endiandata);
+ work->nonces[1] = secNonce;
+ if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0]) {
+ work_set_target_ratio(work, vhash);
+ xchg(work->nonces[1], work->nonces[0]);
+ } else {
+ bn_set_target_ratio(work, vhash, work->valid_nonces);
+ }
+ work->valid_nonces++;
+ pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
+ } else {
+ pdata[19] = work->nonces[0] + 1; // cursor
+ }
+ return work->valid_nonces;
+ }
+ else if (vhash[7] > Htarg) {
+ gpu_increment_reject(thr_id);
+ cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t));
+ pdata[19] = startNounce + h_resNonce[0] + 1;
+ continue;
+ }
+ }
+ if ((uint64_t) throughput + pdata[19] >= max_nonce) {
+ pdata[19] = max_nonce;
+ break;
+ }
+
+ pdata[19] += throughput;
+
+ } while (!work_restart[thr_id].restart);
+
+ *hashes_done = pdata[19] - first_nonce;
+
+ return 0;
+}
+
+// cleanup
+extern "C" void free_skunk(int thr_id)
+{
+ if (!init[thr_id])
+ return;
+
+ cudaThreadSynchronize();
+
+ x13_fugue512_cpu_free(thr_id);
+ cudaFree(d_hash[thr_id]);
+ cudaFree(d_resNonce[thr_id]);
+
+ init[thr_id] = false;
+
+ cudaDeviceSynchronize();
+}
diff --git a/util.cpp b/util.cpp
index 99187fd97..e601756c6 100644
--- a/util.cpp
+++ b/util.cpp
@@ -2270,6 +2270,9 @@ void print_hash_tests(void)
skein2hash(&hash[0], &buf[0]);
printpfx("skein2", hash);
+ skunk_hash(&hash[0], &buf[0]);
+ printpfx("skunk", hash);
+
s3hash(&hash[0], &buf[0]);
printpfx("S3", hash);
diff --git a/x13/cuda_x13_fugue512.cu b/x13/cuda_x13_fugue512.cu
index ba1afd84d..b69ddb635 100644
--- a/x13/cuda_x13_fugue512.cu
+++ b/x13/cuda_x13_fugue512.cu
@@ -254,7 +254,7 @@ void x13_fugue512_gpu_hash_64(uint32_t threads, uint64_t *g_hash)
mixtabs[thr+256] = ROR8(tmp);
mixtabs[thr+512] = ROL16(tmp);
mixtabs[thr+768] = ROL8(tmp);
-#if TPB < 256
+#if TPB <= 256
if (blockDim.x < 256) {
const uint32_t thr = (threadIdx.x + 0x80) & 0xFF;
const uint32_t tmp = tex1Dfetch(mixTab0Tex, thr);