Skip to content

Commit

Permalink
Add skunk algo, initial version
Browse files Browse the repository at this point in the history
  • Loading branch information
tpruvot committed Jul 21, 2017
1 parent 5aa50a4 commit cbede12
Show file tree
Hide file tree
Showing 13 changed files with 228 additions and 12 deletions.
2 changes: 1 addition & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
quark/nist5.cu \
quark/quarkcoin.cu quark/cuda_quark_compactionTest.cu \
neoscrypt/neoscrypt.cpp neoscrypt/neoscrypt-cpu.c neoscrypt/cuda_neoscrypt.cu \
pentablake.cu skein.cu cuda_skeincoin.cu skein2.cpp zr5.cu \
pentablake.cu skein.cu cuda_skeincoin.cu skein2.cpp skunk.cu zr5.cu \
sha256/sha256d.cu sha256/cuda_sha256d.cu sha256/sha256t.cu sha256/cuda_sha256t.cu \
sia/sia.cu sia/sia-rpc.cpp sph/blake2b.c \
sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \
Expand Down
13 changes: 10 additions & 3 deletions README.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

ccminer 2.1-dev (June 2017) "Equihash"
ccminer 2.2-dev (July 2017) "Equihash, tribus and skunk"
---------------------------------------------------------------

***************************************************************
Expand All @@ -9,7 +9,6 @@ If you find this tool useful and like to support its continuous
tpruvot@github:
BTC : 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo
DCR : DsUCcACGcyP8McNMRXQwbtpDxaVUYLDQDeU
LBC : bKe6pLqELL3HHSbpJXxSdn5RrY2bfrkRhF

Alexis:
BTC : 14EgXD7fPYD4sHBXWUi46VeiTVXNq765B8
Expand Down Expand Up @@ -51,6 +50,8 @@ Scrypt and Scrypt:N
Scrypt-Jane (Chacha)
Sibcoin (sib)
Skein (Skein + SHA)
Signatum (Skein cubehash fugue Streebog)
Tribus (JH, keccak, simd)
Woodcoin (Double Skein)
Vanilla (Blake256 8-rounds - double sha256)
Vertcoin Lyra2RE
Expand Down Expand Up @@ -116,6 +117,7 @@ its command line interface and options.
sib use to mine Sibcoin
skein use to mine Skeincoin
skein2 use to mine Woodcoin
skunk use to mine Signatum
timetravel use to mine MachineCoin
tribus use to mine Denarius
x11evo use to mine Revolver
Expand Down Expand Up @@ -280,7 +282,12 @@ features.

>>> RELEASE HISTORY <<<

v2.1 (unfinished)
v2.2 (under dev)
New skunk algo, using the heavy streebog algorithm
Enhance tribus algo (+10%)
equihash protocol enhancement on yiimp.ccminer.org and zpool.ca

June 16th 2017 v2.1-tribus
Interface equihash algo with djeZo solver (from nheqminer 0.5c)
New api parameters (and multicast announces for local networks)
New tribus algo
Expand Down
2 changes: 2 additions & 0 deletions algos.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ enum sha_algos {
ALGO_SIB,
ALGO_SKEIN,
ALGO_SKEIN2,
ALGO_SKUNK,
ALGO_S3,
ALGO_TIMETRAVEL,
ALGO_TRIBUS,
Expand Down Expand Up @@ -109,6 +110,7 @@ static const char *algo_names[] = {
"sib",
"skein",
"skein2",
"skunk",
"s3",
"timetravel",
"tribus",
Expand Down
1 change: 1 addition & 0 deletions bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ void algo_free_all(int thr_id)
free_qubit(thr_id);
free_skeincoin(thr_id);
free_skein2(thr_id);
free_skunk(thr_id);
free_sha256d(thr_id);
free_sha256t(thr_id);
free_sia(thr_id);
Expand Down
4 changes: 4 additions & 0 deletions ccminer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ Options:\n\
scrypt-jane Scrypt-jane Chacha\n\
skein Skein SHA2 (Skeincoin)\n\
skein2 Double Skein (Woodcoin)\n\
skunk Skein Cube Fugue Streebog\n\
s3 S3 (1Coin)\n\
timetravel Machinecoin permuted x8\n\
tribus Denerius\n\
Expand Down Expand Up @@ -2401,6 +2402,9 @@ static void *miner_thread(void *userdata)
case ALGO_SKEIN2:
rc = scanhash_skein2(thr_id, &work, max_nonce, &hashes_done);
break;
case ALGO_SKUNK:
rc = scanhash_skunk(thr_id, &work, max_nonce, &hashes_done);
break;
case ALGO_SHA256D:
rc = scanhash_sha256d(thr_id, &work, max_nonce, &hashes_done);
break;
Expand Down
9 changes: 4 additions & 5 deletions ccminer.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,7 @@
<CudaCompile Include="cuda_skeincoin.cu">
<MaxRegCount>48</MaxRegCount>
</CudaCompile>
<CudaCompile Include="skunk.cu" />
<CudaCompile Include="tribus.cu" />
<ClInclude Include="x11\cuda_x11_aes.cuh" />
<CudaCompile Include="x11\cuda_x11_cubehash512.cu" />
Expand Down Expand Up @@ -563,10 +564,8 @@
</CudaCompile>
<CudaCompile Include="x13\cuda_x13_fugue512.cu">
</CudaCompile>
<CudaCompile Include="x13\x13.cu">
</CudaCompile>
<CudaCompile Include="x15\x14.cu">
</CudaCompile>
<CudaCompile Include="x13\x13.cu" />
<CudaCompile Include="x15\x14.cu" />
<CudaCompile Include="x15\cuda_x14_shabal512.cu" />
<CudaCompile Include="x15\cuda_x15_whirlpool.cu" />
<CudaCompile Include="x17\hmq17.cu" />
Expand Down Expand Up @@ -600,4 +599,4 @@
<Target Name="AfterClean">
<Delete Files="@(FilesToCopy->'$(OutDir)%(Filename)%(Extension)')" TreatErrorsAsWarnings="true" />
</Target>
</Project>
</Project>
3 changes: 3 additions & 0 deletions ccminer.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,9 @@
<CudaCompile Include="pentablake.cu">
<Filter>Source Files\CUDA</Filter>
</CudaCompile>
<CudaCompile Include="skunk.cu">
<Filter>Source Files\CUDA</Filter>
</CudaCompile>
<CudaCompile Include="tribus.cu">
<Filter>Source Files\CUDA</Filter>
</CudaCompile>
Expand Down
2 changes: 1 addition & 1 deletion compat/ccminer-config.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@
#define PACKAGE_URL "http://github.com/tpruvot/ccminer"

/* Define to the version of this package. */
#define PACKAGE_VERSION "2.1"
#define PACKAGE_VERSION "2.2"

/* If using the C implementation of alloca, define if you know the
direction of stack growth for your system; otherwise it will be
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
AC_INIT([ccminer], [2.1], [], [ccminer], [http://github.com/tpruvot/ccminer])
AC_INIT([ccminer], [2.2], [], [ccminer], [http://github.com/tpruvot/ccminer])

AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM
Expand Down
3 changes: 3 additions & 0 deletions miner.h
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ extern int scanhash_sia(int thr_id, struct work *work, uint32_t max_nonce, unsig
extern int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_skunk(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_timetravel(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_tribus(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
Expand Down Expand Up @@ -368,6 +369,7 @@ extern void free_sia(int thr_id);
extern void free_sib(int thr_id);
extern void free_skeincoin(int thr_id);
extern void free_skein2(int thr_id);
extern void free_skunk(int thr_id);
extern void free_s3(int thr_id);
extern void free_timetravel(int thr_id);
extern void free_tribus(int thr_id);
Expand Down Expand Up @@ -909,6 +911,7 @@ void sha256t_hash(void *output, const void *input);
void sibhash(void *output, const void *input);
void skeincoinhash(void *output, const void *input);
void skein2hash(void *output, const void *input);
void skunk_hash(void *state, const void *input);
void s3hash(void *output, const void *input);
void timetravel_hash(void *output, const void *input);
void bitcore_hash(void *output, const void *input);
Expand Down
194 changes: 194 additions & 0 deletions skunk.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
/**
* Skunk Algo for Signatum
* (skein, cube, fugue, gost streebog)
*
* tpruvot@github 06 2017 - GPLv3
*/
extern "C" {
#include "sph/sph_skein.h"
#include "sph/sph_cubehash.h"
#include "sph/sph_fugue.h"
#include "sph/sph_streebog.h"
}

#include "miner.h"
#include "cuda_helper.h"

extern void skein512_cpu_setBlock_80(void *pdata);
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap);

extern void x11_cubehash512_cpu_init(int thr_id, uint32_t threads);
extern void x11_cubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);

extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x13_fugue512_cpu_free(int thr_id);

extern void streebog_cpu_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce);
extern void streebog_set_target(const uint32_t* ptarget);

#include <stdio.h>
#include <memory.h>

#define NBN 2
static uint32_t *d_hash[MAX_GPUS];
static uint32_t *d_resNonce[MAX_GPUS];

// CPU Hash
extern "C" void skunk_hash(void *output, const void *input)
{
unsigned char _ALIGN(128) hash[128] = { 0 };

sph_skein512_context ctx_skein;
sph_cubehash512_context ctx_cubehash;
sph_fugue512_context ctx_fugue;
sph_gost512_context ctx_gost;

sph_skein512_init(&ctx_skein);
sph_skein512(&ctx_skein, input, 80);
sph_skein512_close(&ctx_skein, (void*) hash);

sph_cubehash512_init(&ctx_cubehash);
sph_cubehash512(&ctx_cubehash, (const void*) hash, 64);
sph_cubehash512_close(&ctx_cubehash, (void*) hash);

sph_fugue512_init(&ctx_fugue);
sph_fugue512(&ctx_fugue, (const void*) hash, 64);
sph_fugue512_close(&ctx_fugue, (void*) hash);

sph_gost512_init(&ctx_gost);
sph_gost512(&ctx_gost, (const void*) hash, 64);
sph_gost512_close(&ctx_gost, (void*) hash);

memcpy(output, hash, 32);
}

static bool init[MAX_GPUS] = { 0 };

extern "C" int scanhash_skunk(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
{
int dev_id = device_map[thr_id];

uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
int intensity = (device_sm[device_map[thr_id]] > 500) ? 18 : 17;
if (strstr(device_name[dev_id], "GTX 10")) intensity = 19;
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity);
//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);

if (opt_benchmark)
ptarget[7] = 0xf;

if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);

quark_skein512_cpu_init(thr_id, throughput);
x11_cubehash512_cpu_init(thr_id, throughput);
x13_fugue512_cpu_init(thr_id, throughput);

CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), -1);

init[thr_id] = true;
}

uint32_t _ALIGN(64) h_resNonce[NBN];
uint32_t _ALIGN(64) endiandata[20];
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], pdata[k]);

skein512_cpu_setBlock_80(endiandata);

cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t));
streebog_set_target(ptarget);

do {
int order = 0;
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 1); order++;
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
streebog_cpu_hash_64_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);

cudaMemcpy(h_resNonce, d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost);

*hashes_done = pdata[19] - first_nonce + throughput;

if (h_resNonce[0] != UINT32_MAX)
{
uint32_t _ALIGN(64) vhash[8];
const uint32_t Htarg = ptarget[7];
const uint32_t startNounce = pdata[19];

be32enc(&endiandata[19], startNounce + h_resNonce[0]);
skunk_hash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget))
{
work->nonces[0] = startNounce + h_resNonce[0];
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
if (h_resNonce[1] != UINT32_MAX)
{
uint32_t secNonce = work->nonces[1] = startNounce + h_resNonce[1];
be32enc(&endiandata[19], secNonce);
skunk_hash(vhash, endiandata);
work->nonces[1] = secNonce;
if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0]) {
work_set_target_ratio(work, vhash);
xchg(work->nonces[1], work->nonces[0]);
} else {
bn_set_target_ratio(work, vhash, work->valid_nonces);
}
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpu_increment_reject(thr_id);
cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t));
pdata[19] = startNounce + h_resNonce[0] + 1;
continue;
}
}
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
pdata[19] = max_nonce;
break;
}

pdata[19] += throughput;

} while (!work_restart[thr_id].restart);

*hashes_done = pdata[19] - first_nonce;

return 0;
}

// cleanup
extern "C" void free_skunk(int thr_id)
{
if (!init[thr_id])
return;

cudaThreadSynchronize();

x13_fugue512_cpu_free(thr_id);
cudaFree(d_hash[thr_id]);
cudaFree(d_resNonce[thr_id]);

init[thr_id] = false;

cudaDeviceSynchronize();
}
3 changes: 3 additions & 0 deletions util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2270,6 +2270,9 @@ void print_hash_tests(void)
skein2hash(&hash[0], &buf[0]);
printpfx("skein2", hash);

skunk_hash(&hash[0], &buf[0]);
printpfx("skunk", hash);

s3hash(&hash[0], &buf[0]);
printpfx("S3", hash);

Expand Down
2 changes: 1 addition & 1 deletion x13/cuda_x13_fugue512.cu
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ void x13_fugue512_gpu_hash_64(uint32_t threads, uint64_t *g_hash)
mixtabs[thr+256] = ROR8(tmp);
mixtabs[thr+512] = ROL16(tmp);
mixtabs[thr+768] = ROL8(tmp);
#if TPB < 256
#if TPB <= 256
if (blockDim.x < 256) {
const uint32_t thr = (threadIdx.x + 0x80) & 0xFF;
const uint32_t tmp = tex1Dfetch(mixTab0Tex, thr);
Expand Down

0 comments on commit cbede12

Please sign in to comment.