Skip to content

Commit

Permalink
Support of CryptoNight v8 ReverseWaltz
Browse files Browse the repository at this point in the history
rebased version of fireice-uk#2261

Added support of CryptoNight v8 Reverse Waltz (named cryptonight_v8_reversewaltz here) - equal to CryptoNight v8 but with 3/4 iterations of CryptoNight v8 and with reversed shuffle operation

We plan to use CryptoNight v8 Reverse Waltz as new PoW algorithm for Graft (graft-project/GraftNetwork#234).
  • Loading branch information
EDDragonWolf authored and psychocrypt committed Mar 7, 2019
1 parent c1f83fe commit 2d9087c
Show file tree
Hide file tree
Showing 12 changed files with 135 additions and 33 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ If your prefered coin is not listed, you can choose one of the following algorit
- cryptonight_v7_stellite
- cryptonight_v8
- cryptonight_v8_half (used by masari and stellite)
- cryptonight_v8_reversewaltz (used by graft)
- cryptonight_v8_zelerius
- 4MiB scratchpad memory
- cryptonight_haven
Expand Down
2 changes: 1 addition & 1 deletion xmrstak/backend/amd/amd_gpu/gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
* this is required if the dev pool is mining monero
* but the user tuned there settings for another currency
*/
if(miner_algo == cryptonight_monero_v8)
if(miner_algo == cryptonight_monero_v8 || miner_algo == cryptonight_v8_reversewaltz)
{
if(ctx->memChunk < 2)
mem_chunk_exp = 1u << 2;
Expand Down
33 changes: 24 additions & 9 deletions xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ R"===(
#define cryptonight_superfast 12
#define cryptonight_gpu 13
#define cryptonight_conceal 14
#define cryptonight_v8_reversewaltz 17

/* For Mesa clover support */
#ifdef cl_clang_storage_class_specifiers
Expand Down Expand Up @@ -639,7 +640,7 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad,
R"===(

// __NV_CL_C_VERSION checks if NVIDIA opencl is used
#if(ALGO == cryptonight_monero_v8 && defined(__NV_CL_C_VERSION))
#if((ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) && defined(__NV_CL_C_VERSION))
# define SCRATCHPAD_CHUNK(N) (*(__local uint4*)((__local uchar*)(scratchpad_line) + (idxS ^ (N << 4))))
# define SCRATCHPAD_CHUNK_GLOBAL (*((__global uint16*)(Scratchpad + (IDX((idx0 & 0x1FFFC0U) >> 4)))))
#else
Expand All @@ -659,7 +660,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
float4 conc_var = (float4)(0.0f);
#endif

#if(ALGO == cryptonight_monero_v8)
#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz)
ulong b[4];
uint4 b_x[2];
// NVIDIA
Expand All @@ -673,7 +674,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
#endif
__local uint AES0[256], AES1[256];

#if(ALGO == cryptonight_monero_v8)
#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz)
# if defined(__clang__) && !defined(__NV_CL_C_VERSION)
__local uint RCP[256];
# endif
Expand All @@ -689,7 +690,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
AES0[i] = tmp;
AES1[i] = rotate(tmp, 8U);

#if(ALGO == cryptonight_monero_v8 && (defined(__clang__) && !defined(__NV_CL_C_VERSION)))
#if((ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) && (defined(__clang__) && !defined(__NV_CL_C_VERSION)))
RCP[i] = RCP_C[i];
#endif
}
Expand Down Expand Up @@ -723,7 +724,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states

b_x[0] = ((uint4 *)b)[0];

#if(ALGO == cryptonight_monero_v8)
#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz)
a[1] = states[1] ^ states[5];
b[2] = states[8] ^ states[10];
b[3] = states[9] ^ states[11];
Expand Down Expand Up @@ -755,7 +756,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
{
ulong c[2];

#if(ALGO == cryptonight_monero_v8 && defined(__NV_CL_C_VERSION))
#if((ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) && defined(__NV_CL_C_VERSION))
uint idxS = idx0 & 0x30U;
*scratchpad_line = SCRATCHPAD_CHUNK_GLOBAL;
#endif
Expand Down Expand Up @@ -792,6 +793,15 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
SCRATCHPAD_CHUNK(2) = as_uint4(chunk1 + ((ulong2 *)b_x)[0]);
SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]);
}
#elif(ALGO == cryptonight_v8_reversewaltz)
{
ulong2 chunk3 = as_ulong2(SCRATCHPAD_CHUNK(1));
ulong2 chunk2 = as_ulong2(SCRATCHPAD_CHUNK(2));
ulong2 chunk1 = as_ulong2(SCRATCHPAD_CHUNK(3));
SCRATCHPAD_CHUNK(1) = as_uint4(chunk3 + ((ulong2 *)(b_x + 1))[0]);
SCRATCHPAD_CHUNK(2) = as_uint4(chunk1 + ((ulong2 *)b_x)[0]);
SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]);
}
#endif

#if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2)
Expand All @@ -807,7 +817,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
SCRATCHPAD_CHUNK(0) = b_x[0];
idx0 = as_uint2(c[0]).s0 & MASK;

#elif(ALGO == cryptonight_monero_v8)
#elif(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz)
SCRATCHPAD_CHUNK(0) = b_x[0] ^ ((uint4 *)c)[0];
# ifdef __NV_CL_C_VERSION
// flush shuffled data
Expand All @@ -826,7 +836,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
uint4 tmp;
tmp = SCRATCHPAD_CHUNK(0);

#if(ALGO == cryptonight_monero_v8)
#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz)
// Use division and square root results from the _previous_ iteration to hide the latency
tmp.s0 ^= division_result.s0;
tmp.s1 ^= division_result.s1 ^ sqrt_result;
Expand All @@ -853,8 +863,13 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
ulong2 chunk2 = as_ulong2(SCRATCHPAD_CHUNK(2));
result_mul ^= chunk2;
ulong2 chunk3 = as_ulong2(SCRATCHPAD_CHUNK(3));
#if(ALGO == cryptonight_v8_reversewaltz)
SCRATCHPAD_CHUNK(1) = as_uint4(chunk1 + ((ulong2 *)(b_x + 1))[0]);
SCRATCHPAD_CHUNK(2) = as_uint4(chunk3 + ((ulong2 *)b_x)[0]);
#else
SCRATCHPAD_CHUNK(1) = as_uint4(chunk3 + ((ulong2 *)(b_x + 1))[0]);
SCRATCHPAD_CHUNK(2) = as_uint4(chunk1 + ((ulong2 *)b_x)[0]);
#endif
SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]);
a[0] += result_mul.s0;
a[1] += result_mul.s1;
Expand Down Expand Up @@ -882,7 +897,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states

((uint4 *)a)[0] ^= tmp;

#if (ALGO == cryptonight_monero_v8)
#if (ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz)
# if defined(__NV_CL_C_VERSION)
// flush shuffled data
SCRATCHPAD_CHUNK_GLOBAL = *scratchpad_line;
Expand Down
2 changes: 1 addition & 1 deletion xmrstak/backend/amd/amd_gpu/opencl/fast_int_math_v2.cl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ R"===(
* @author SChernykh
*/

#if(ALGO == cryptonight_monero_v8)
#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz)

static const __constant uint RCP_C[256] =
{
Expand Down
31 changes: 26 additions & 5 deletions xmrstak/backend/cpu/crypto/cryptonight_aesni.h
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,16 @@ inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
_mm_store_si128((__m128i *)&l0[idx1 ^ 0x30], _mm_add_epi64(chunk2, ax0)); \
if (ALGO == cryptonight_r) \
cx = _mm_xor_si128(_mm_xor_si128(cx, chunk3), _mm_xor_si128(chunk1, chunk2)); \
} \
if(ALGO == cryptonight_v8_reversewaltz) \
{ \
const uint64_t idx1 = idx0 & MASK; \
const __m128i chunk3 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x10]); \
const __m128i chunk2 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x20]); \
const __m128i chunk1 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x30]); \
_mm_store_si128((__m128i *)&l0[idx1 ^ 0x10], _mm_add_epi64(chunk3, bx1)); \
_mm_store_si128((__m128i *)&l0[idx1 ^ 0x20], _mm_add_epi64(chunk1, bx0)); \
_mm_store_si128((__m128i *)&l0[idx1 ^ 0x30], _mm_add_epi64(chunk2, ax0)); \
}

#define CN_MONERO_V8_SHUFFLE_1(n, l0, idx0, ax0, bx0, bx1, lo, hi) \
Expand All @@ -653,10 +663,22 @@ inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
_mm_store_si128((__m128i *)&l0[idx1 ^ 0x10], _mm_add_epi64(chunk3, bx1)); \
_mm_store_si128((__m128i *)&l0[idx1 ^ 0x20], _mm_add_epi64(chunk1, bx0)); \
_mm_store_si128((__m128i *)&l0[idx1 ^ 0x30], _mm_add_epi64(chunk2, ax0)); \
} \
if(ALGO == cryptonight_v8_reversewaltz) \
{ \
const uint64_t idx1 = idx0 & MASK; \
const __m128i chunk3 = _mm_xor_si128(_mm_load_si128((__m128i *)&l0[idx1 ^ 0x10]), _mm_set_epi64x(lo, hi)); \
const __m128i chunk2 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x20]); \
hi ^= ((uint64_t*)&chunk2)[0]; \
lo ^= ((uint64_t*)&chunk2)[1]; \
const __m128i chunk1 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x30]); \
_mm_store_si128((__m128i *)&l0[idx1 ^ 0x10], _mm_add_epi64(chunk3, bx1)); \
_mm_store_si128((__m128i *)&l0[idx1 ^ 0x20], _mm_add_epi64(chunk1, bx0)); \
_mm_store_si128((__m128i *)&l0[idx1 ^ 0x30], _mm_add_epi64(chunk2, ax0)); \
}

#define CN_MONERO_V8_DIV(n, cx, sqrt_result, division_result_xmm, cl) \
if(ALGO == cryptonight_monero_v8) \
if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) \
{ \
uint64_t sqrt_result_tmp; \
assign(sqrt_result_tmp, sqrt_result); \
Expand Down Expand Up @@ -735,7 +757,7 @@ inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
idx0 = h0[0] ^ h0[4]; \
ax0 = _mm_set_epi64x(h0[1] ^ h0[5], idx0); \
bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); \
if(ALGO == cryptonight_monero_v8) \
if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) \
{ \
bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]); \
division_result_xmm = _mm_cvtsi64_si128(h0[12]); \
Expand Down Expand Up @@ -782,7 +804,7 @@ inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
ptr0 = (__m128i *)&l0[idx0 & MASK]; \
if(PREFETCH) \
_mm_prefetch((const char*)ptr0, _MM_HINT_T0); \
if(ALGO != cryptonight_monero_v8 && ALGO != cryptonight_r && ALGO != cryptonight_r_wow) \
if(ALGO != cryptonight_monero_v8 && ALGO != cryptonight_r && ALGO != cryptonight_r_wow && ALGO != cryptonight_v8_reversewaltz) \
bx0 = cx

#define CN_STEP3(n, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0, cx, bx1, sqrt_result, division_result_xmm, cn_r_data) \
Expand All @@ -807,7 +829,7 @@ inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
ah0 += lo; \
al0 += hi; \
} \
if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_r || ALGO == cryptonight_r_wow) \
if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_r || ALGO == cryptonight_r_wow || ALGO == cryptonight_v8_reversewaltz) \
{ \
bx1 = bx0; \
bx0 = cx; \
Expand Down Expand Up @@ -1081,7 +1103,6 @@ struct Cryptonight_hash_asm
keccak((const uint8_t *)input + len * i, len, ctx[i]->hash_state, 200);
cn_explode_scratchpad<false, false, ALGO>((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state, algo);
}

if(ALGO == cryptonight_r)
{
// API ATTRIBUTE is only required for cryptonight_r
Expand Down
20 changes: 19 additions & 1 deletion xmrstak/backend/cpu/minethd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,16 @@ bool minethd::self_test()
ctx[0]->hash_fn("\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74\x20\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74\x20\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74", 44, out, ctx, algo);
bResult = bResult && memcmp(out, "\xf7\x59\x58\x8a\xd5\x7e\x75\x84\x67\x29\x54\x43\xa9\xbd\x71\x49\x0a\xbf\xf8\xe9\xda\xd1\xb9\x5b\x6b\xf2\xf5\xd0\xd7\x83\x87\xbc", 32) == 0;
}
else if(algo == POW(cryptonight_v8_reversewaltz))
{
func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), false, algo);
ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo);
bResult = memcmp(out, "\x32\xf7\x36\xec\x1d\x2f\x3f\xc5\x4c\x49\xbe\xb8\xa0\x47\x6c\xbf\xdd\x14\xc3\x51\xb9\xc6\xd7\x2c\x6f\x9f\xfc\xb5\x87\x5b\xe6\xb3", 32) == 0;

func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), true, algo);
ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo);
bResult &= memcmp(out, "\x32\xf7\x36\xec\x1d\x2f\x3f\xc5\x4c\x49\xbe\xb8\xa0\x47\x6c\xbf\xdd\x14\xc3\x51\xb9\xc6\xd7\x2c\x6f\x9f\xfc\xb5\x87\x5b\xe6\xb3", 32) == 0;
}
else
printer::inst()->print_msg(L0,
"Cryptonight hash self-test NOT defined for POW %s", algo.Name().c_str());
Expand Down Expand Up @@ -610,6 +620,9 @@ void minethd::func_multi_selector(cryptonight_ctx** ctx, minethd::cn_on_new_job&
case cryptonight_r:
algv = 14;
break;
case cryptonight_v8_reversewaltz:
algv = 15;
break;
default:
algv = 2;
break;
Expand Down Expand Up @@ -689,7 +702,12 @@ void minethd::func_multi_selector(cryptonight_ctx** ctx, minethd::cn_on_new_job&
Cryptonight_hash<N>::template hash<cryptonight_r, false, false>,
Cryptonight_hash<N>::template hash<cryptonight_r, true, false>,
Cryptonight_hash<N>::template hash<cryptonight_r, false, true>,
Cryptonight_hash<N>::template hash<cryptonight_r, true, true>
Cryptonight_hash<N>::template hash<cryptonight_r, true, true>,

Cryptonight_hash<N>::template hash<cryptonight_v8_reversewaltz, false, false>,
Cryptonight_hash<N>::template hash<cryptonight_v8_reversewaltz, true, false>,
Cryptonight_hash<N>::template hash<cryptonight_v8_reversewaltz, false, true>,
Cryptonight_hash<N>::template hash<cryptonight_v8_reversewaltz, true, true>
};

std::bitset<2> digit;
Expand Down
2 changes: 1 addition & 1 deletion xmrstak/backend/cpu/minethd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class minethd : public iBackend
bool bHaveAes, bool bNoPrefetch, const xmrstak_algo& algo, const std::string& asm_version_str = "off");

private:

minethd(miner_work& pWork, size_t iNo, int iMultiway, bool no_prefetch, int64_t affinity, const std::string& asm_version);

template<uint32_t N>
Expand Down
13 changes: 9 additions & 4 deletions xmrstak/backend/cryptonight.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ enum xmrstak_algo_id
cryptonight_conceal = 14,
cryptonight_r_wow = 15,
cryptonight_r = 16,
cryptonight_v8_reversewaltz = 17, //equal to cryptonight_monero_v8 but with 3/4 iterations and reversed shuffle operation

cryptonight_turtle = start_derived_algo_id,
cryptonight_v8_half = (start_derived_algo_id + 1),
Expand All @@ -39,7 +40,7 @@ enum xmrstak_algo_id
*/
inline std::string get_algo_name(xmrstak_algo_id algo_id)
{
static std::array<std::string, 17> base_algo_names =
static std::array<std::string, 18> base_algo_names =
{{
"invalid_algo",
"cryptonight",
Expand All @@ -57,7 +58,8 @@ inline std::string get_algo_name(xmrstak_algo_id algo_id)
"cryptonight_gpu",
"cryptonight_conceal",
"cryptonight_r_wow",
"cryptonight_r"
"cryptonight_r",
"cryptonight_v8_reversewaltz" // used by graft
}};

static std::array<std::string, 3> derived_algo_names =
Expand Down Expand Up @@ -177,9 +179,11 @@ constexpr uint32_t CN_TURTLE_MASK = 0x1FFF0;

constexpr uint32_t CN_ZELERIUS_ITER = 0x60000;

constexpr uint32_t CN_WALTZ_ITER = 0x60000;

inline xmrstak_algo POW(xmrstak_algo_id algo_id)
{
static std::array<xmrstak_algo, 17> pow = {{
static std::array<xmrstak_algo, 18> pow = {{
{invalid_algo, invalid_algo},
{cryptonight, cryptonight, CN_ITER, CN_MEMORY},
{cryptonight_lite, cryptonight_lite, CN_ITER/2, CN_MEMORY/2},
Expand All @@ -196,7 +200,8 @@ inline xmrstak_algo POW(xmrstak_algo_id algo_id)
{cryptonight_gpu, cryptonight_gpu, CN_GPU_ITER, CN_MEMORY, CN_GPU_MASK},
{cryptonight_conceal, cryptonight_conceal, CN_ITER/2, CN_MEMORY},
{cryptonight_r_wow, cryptonight_r_wow, CN_ITER, CN_MEMORY},
{cryptonight_r, cryptonight_r, CN_ITER, CN_MEMORY}
{cryptonight_r, cryptonight_r, CN_ITER, CN_MEMORY},
{cryptonight_v8_reversewaltz, cryptonight_v8_reversewaltz, CN_WALTZ_ITER, CN_MEMORY}
}};

static std::array<xmrstak_algo, 3> derived_pow =
Expand Down
Loading

0 comments on commit 2d9087c

Please sign in to comment.