Skip to content

Commit

Permalink
Version 1.5.1 - GTX 20xx support, Cuda 10.1 and some bug fixes
Browse files Browse the repository at this point in the history
Version 1.5.1 - 14 May 2019
* Support for CUDA 10.1 with drivers 419.x(Windows) and 418.x(Linux)
* Added suppot for Nvidia Turing architecture (GTX 20xx) Untested.
* Fixed : Cannot Disable gpu mining when using config.txton gpu binaries.
  Now you can simply empty the value of "gpu" and "gputhreads" to disable gpu mining on gpu binaries. Ex: "gpu":"" and "gputhreads":""
* Little performance boost on linux binaries (CPU and GPU)
* Added missing screensaver source files
* change line ending in many files
  • Loading branch information
polyminer1 committed May 14, 2019
1 parent 9d40449 commit 2b1f2fe
Show file tree
Hide file tree
Showing 42 changed files with 1,068 additions and 214 deletions.
3 changes: 2 additions & 1 deletion BuildInfo.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#define RH_PROJECT_NAME "rhminer"
#define RH_PROJECT_VERSION "1.5"
#define RH_PROJECT_VERSION "1.5.1"


10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# rhminer update and bugfix history

Version 1.5.1 - 14 May 2019
* Support for CUDA 10.1 with drivers 419.x(Windows) and 418.x(Linux)
* Added suppot for Nvidia Turing architecture (GTX 20xx) Untested.
* Fixed : Cannot Disable gpu mining when using config.txton gpu binaries.
Now you can simply empty the value of "gpu" and "gputhreads" to disable gpu mining on gpu binaries. Ex: "gpu":"" and "gputhreads":""
* Little performance boost on linux binaries (CPU and GPU)
* Added missing screensaver source files
* change line ending in many files


Version 1.5 - 2 April 2019
* Added ETHman and AwesomeMiner support (miner_getstat1, miner_restart, miner_reboot, miner_file, control_gpu)
It is strongly recommented to NOT put rhminer in a loop in a script if you intend to use EthMan or AwseomeMiner.
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/CMake")

include_directories(./)
include_directories(..)
include_directories(/usr/local/cuda/include)
include_directories(rhminer)

include (CheckCCompilerFlag)
Expand Down
2 changes: 1 addition & 1 deletion MinersLib/Global.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ bool GlobalMiningPreset::UpdateToDevModeState(string& connectionParams)
{
std::lock_guard<std::mutex> g(*devFeeMutex);

//TODO: change that to lower time, it causes sopt-mining-emails from nanopool
// TODO: change that to lower time, it causes sopt-mining-emails from nanopool

if (TimeGetMilliSec() > m_devFeeTimer24hMS)
{
Expand Down
2 changes: 0 additions & 2 deletions MinersLib/Pascal/RandomHash_Blake2b.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,6 @@ void CUDA_SYM_DECL(RandomHash_blake2b)(RH_StridePtr roundInput, RH_StridePtr out
ctx.c = 0; // pointer within buffer
ctx.outlen = outlen;

//TODO: optimiz
for (i = 0; i < 128; i++) // zero input block
ctx.b[i] = 0;

Expand All @@ -160,7 +159,6 @@ void CUDA_SYM_DECL(RandomHash_blake2b)(RH_StridePtr roundInput, RH_StridePtr out
ctx.c = 0; // counter to zero
}

//TODO: optimiz
ctx.b[ctx.c++] = ((const uint8_t *) in)[i];
}

Expand Down
1 change: 0 additions & 1 deletion MinersLib/Pascal/RandomHash_Blake2s.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ inline int CUDA_SYM_DECL(blake2s_compress_SSE2)( blake2s_state *S, const uint8_t
uint32_t m[16];
uint32_t v[16];

//TODO: optimiz -> unroll + MACRO
for( size_t i = 0; i < 16; ++i )
m[i] = load32_SSE2( block + i * sizeof( m[i] ) );

Expand Down
47 changes: 9 additions & 38 deletions MinersLib/Pascal/RandomHash_Cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ struct RH_StrideArrayStruct
};
#define RH_StrideArrayStruct_GetAccum(strideArray) (&((RH_StrideArrayStruct*)strideArray)->accum)



#include "corelib/CommonData.h"

//--------------------------------------------------------------------------------------------------
Expand All @@ -80,7 +78,6 @@ inline RH_StridePtr CUDA_SYM(RH_StrideArrayGet)(RH_StridePtrArray strideArrayVar
CUDA_DECL_HOST_AND_DEVICE
inline RH_StridePtr CUDA_SYM(RH_StrideArrayAllocOutput)(RandomHash_State* state, U32 initialSize)
{

if (state->m_isMidStateRound)
{
RHMINER_ASSERT(state->m_stridesAllocIndex + initialSize + 8 < state->m_stridesAllocMidstateBarrier);
Expand All @@ -98,7 +95,6 @@ inline RH_StridePtr CUDA_SYM(RH_StrideArrayAllocOutput)(RandomHash_State* state,
state->m_stridesAllocIndex += initialSize + RH_IDEAL_ALIGNMENT;
RH_ASSERT(state->m_stridesAllocIndex < RH_STRIDE_BANK_SIZE);


RH_STRIDE_SET_SIZE(stride, initialSize);
RH_STRIDE_INIT_INTEGRITY(stride);

Expand All @@ -121,7 +117,6 @@ inline void CUDA_SYM(RH_StrideArrayGrow)(RandomHash_State* state, RH_StridePtr s
state->m_stridesAllocIndex += growSize;
RH_ASSERT(state->m_stridesAllocIndex < RH_STRIDE_BANK_SIZE);


RH_STRIDE_SET_SIZE(stride, RH_STRIDE_GET_SIZE(stride) + growSize);
RH_STRIDE_INIT_INTEGRITY(stride);
}
Expand All @@ -144,7 +139,6 @@ inline void CUDA_SYM(RH_StrideArrayClose)(RandomHash_State* state, RH_StridePtr
RH_ASSERT(state->m_stridesAllocIndex < RH_STRIDE_BANK_SIZE);
RH_ASSERT((size_t(state->m_stridesAllocIndex) % 32) == 0);


RH_STRIDE_CHECK_INTEGRITY(stride);
}

Expand Down Expand Up @@ -198,37 +192,31 @@ inline void CUDA_DECL_HOST_AND_DEVICE CUDA_SYM(RandomHash_Initialize)(RandomHash
state->m_strideID = 0;
RH_STRIDEARRAY_RESET(state->m_round5Phase2PrecalcArray);


if (state->m_isCachedOutputs)
{

state->m_isMidStateRound = true;

RH_ASSERT(state->m_stridesAllocIndex);
if (state->m_stridesAllocMidstateBarrierNext != RH_STRIDE_BANK_SIZE)
{

state->m_stridesAllocIndex = 0;
state->m_stridesAllocMidstateBarrier = state->m_stridesAllocMidstateBarrierNext;

#ifdef RHMINER_DEBUG_STRIDE_INTEGRITY_CHECK

memset(state->m_stridesInstances + 0, (U8)0xBA, state->m_stridesAllocMidstateBarrierNext);
U64* check = (U64*)(state->m_stridesInstances + RH_STRIDE_BANK_SIZE);
RHMINER_ASSERT(*check == 0xFF55AA44BB8800DDLLU);
#endif
}
else
{

const U32 ReqDelta = 4096;
state->m_stridesAllocIndex = RHMINER_ALIGN(state->m_stridesAllocIndex, 4096) + ReqDelta;
state->m_stridesAllocMidstateBarrierNext = state->m_stridesAllocMidstateBarrier;
state->m_stridesAllocMidstateBarrier = RH_STRIDE_BANK_SIZE;


#ifdef RHMINER_DEBUG_STRIDE_INTEGRITY_CHECK

memset(state->m_stridesInstances + state->m_stridesAllocIndex, (U8)0xBA, RH_STRIDE_BANK_SIZE - state->m_stridesAllocIndex);
U64* check = (U64*)(state->m_stridesInstances + RH_STRIDE_BANK_SIZE);
RHMINER_ASSERT(*check == 0xFF55AA44BB8800DDLLU);
Expand Down Expand Up @@ -289,7 +277,6 @@ void CUDA_SYM(AllocateArray)(U8*& arrayData, int count)

RH_STRIDEARRAY_GET_MAXSIZE(arrayData) = count;
RH_STRIDEARRAY_GET_EXTRA(arrayData, memoryboost) = g_memoryBoostLevel;

RH_STRIDEARRAY_GET_EXTRA(arrayData, sseoptimization) = g_sseOptimization;
RH_CUDA_ERROR_CHECK();
}
Expand Down Expand Up @@ -387,7 +374,6 @@ void CUDA_SYM(RandomHash_SetHeader)(RandomHash_State* state, U8* sourceHeader, U
RHMINER_ASSERT(PascalHeaderSize <= PascalHeaderSize);
memcpy(targetInput, sourceHeader, PascalHeaderSize);


}


Expand Down Expand Up @@ -500,6 +486,7 @@ void CUDA_SYM_DECL(RandomHash_Compress)(RandomHash_State* state, RH_StridePtrArr
#ifdef RH_ENABLE_OPTIM_STRIDE_ARRAY_MURMUR3
MurmurHash3_x86_32_State tstate = *RH_StrideArrayStruct_GetAccum(inputs);
U32 seed = _CM(MurmurHash3_x86_32_Finalize)(&tstate);

#else
U32 seed = _CM(RandomHash_ChecksumArray)(inputs);
#endif
Expand All @@ -522,9 +509,6 @@ void CUDA_SYM_DECL(RandomHash_Compress)(RandomHash_State* state, RH_StridePtrArr
inline void CUDA_SYM_DECL(RandomHash_MiddlePoint)(RandomHash_State* state)
{
state->m_midStateNonce = *(U32*)(RH_STRIDE_GET_DATA(state->m_roundInput)+PascalHeaderNoncePosV4(PascalHeaderSize));



if (!state->m_isMidStateRound)
{
const U32 ReqDelta = 4096;
Expand All @@ -548,7 +532,6 @@ inline void CUDA_SYM_DECL(RandomHash_Phase_1_push)(RandomHash_State* state, int
{
if (in_round == 5)
{

if (state->m_midStateNonce == *(U32*)(RH_STRIDE_GET_DATA(state->m_roundInput) + PascalHeaderNoncePosV4(PascalHeaderSize)))
{
state->m_skipPhase1 = 1;
Expand All @@ -559,7 +542,6 @@ inline void CUDA_SYM_DECL(RandomHash_Phase_1_push)(RandomHash_State* state, int

state->m_data[in_round-1].backup_io_results = state->m_data[in_round-1].io_results;


if (in_round == RH_N)
state->m_data[in_round - 1].io_results = state->m_data[RH_N].parenAndNeighbortOutputs;
else
Expand All @@ -576,14 +558,12 @@ inline void CUDA_SYM_DECL(RandomHash_Phase_1_pop)(RandomHash_State* state, int i
{
RH_StridePtrArray testCache = state->m_data[RH_N].parenAndNeighbortOutputs;


state->m_isCachedOutputs = false;
state->m_skipPhase1 = 0;

skipLastUpdate = true;
}


pano = state->m_data[RH_N].parenAndNeighbortOutputs;
}
else
Expand All @@ -594,15 +574,13 @@ inline void CUDA_SYM_DECL(RandomHash_Phase_1_pop)(RandomHash_State* state, int i
}

#ifdef RH_ENABLE_OPTIM_STRIDE_ARRAY_MURMUR3

U32 seed;

{
if (skipLastUpdate)
{
_CM(RH_STRIDE_ARRAY_UPDATE_MURMUR3)(pano, RH_STRIDEARRAY_GET_SIZE(pano) - 1);


RH_STRIDEARRAY_PUSHBACK_MANY_ALL(state->m_round5Phase2PrecalcArray, pano);
}
else
Expand All @@ -614,6 +592,7 @@ inline void CUDA_SYM_DECL(RandomHash_Phase_1_pop)(RandomHash_State* state, int i
MurmurHash3_x86_32_State tstate = *RH_StrideArrayStruct_GetAccum(pano);
seed = _CM(MurmurHash3_x86_32_Finalize)(&tstate);


#else
U32 seed = _CM(RandomHash_ChecksumArray)(state->m_data[in_round].parenAndNeighbortOutputs);
#endif
Expand Down Expand Up @@ -649,7 +628,6 @@ void CUDA_SYM_DECL(RandomHash_Phase_2_pop)(RandomHash_State* state, int in_round
{
pano = state->m_data[RH_N].parenAndNeighbortOutputs;


state->m_isCachedOutputs = true;
}
else
Expand All @@ -661,7 +639,7 @@ void CUDA_SYM_DECL(RandomHash_Phase_2_pop)(RandomHash_State* state, int in_round

if (in_round == 5)
{

{
RH_STRIDEARRAY_PUSHBACK(state->m_round5Phase2PrecalcArray, RH_STRIDEARRAY_GET(pano, RH_STRIDEARRAY_GET_SIZE(pano)-1));

Expand All @@ -675,7 +653,6 @@ void CUDA_SYM_DECL(RandomHash_Phase_2_pop)(RandomHash_State* state, int in_round
{
state->m_data[in_round].first_round_consume = true;



RH_STRIDEARRAY_PUSHBACK(state->m_round5Phase2PrecalcArray, RH_STRIDEARRAY_GET(pano, RH_STRIDEARRAY_GET_SIZE(pano)-1));
_CM(RH_STRIDE_ARRAY_UPDATE_MURMUR3)(state->m_round5Phase2PrecalcArray, RH_STRIDEARRAY_GET_SIZE(state->m_round5Phase2PrecalcArray) - 1);
Expand All @@ -696,12 +673,10 @@ void CUDA_SYM_DECL(RandomHash_Phase_2_pop)(RandomHash_State* state, int in_round
_CM(RandomHash_Compress)(state, state->m_data[in_round].roundOutputs, state->m_workBytes, in_round);
RH_ASSERT(RH_STRIDE_GET_SIZE(state->m_workBytes) <= 100);


if (in_round != RH_N)
{
if (in_round == 4 && state->m_isMidStateRound)
{

if (state->m_stridesAllocMidstateBarrier != RH_STRIDE_BANK_SIZE)
state->m_stridesAllocMidstateBarrierNext = RH_STRIDE_BANK_SIZE;
}
Expand Down Expand Up @@ -819,10 +794,8 @@ inline void CUDA_SYM_DECL(RandomHash_end)(RandomHash_State* state, int in_round)
_CM(RandomHash_Expand)(state, output, in_round, RH_N - in_round, state->m_data[in_round].roundOutputs);
RH_STRIDEARRAY_RESET(state->m_data[in_round].io_results);


RH_STRIDEARRAY_PUSHBACK_MANY_ALL(state->m_data[in_round].io_results, state->m_data[in_round].roundOutputs);


if (in_round == 5)
_CM(RH_STRIDE_ARRAY_UPDATE_MURMUR3_DUO)(state->m_data[5].roundOutputs, RH_STRIDEARRAY_GET_SIZE(state->m_data[5].roundOutputs) - 1, state->m_round5Phase2PrecalcArray);

Expand All @@ -846,12 +819,8 @@ inline void CUDA_SYM_DECL(RandomHash_FirstCall_push)(RandomHash_State* state, in

CUDA_DECL_KERNEL void CUDA_SYM(RandomHash_Block0)(RandomHash_State* allStates)
{
#ifdef RH_FULLDEBUG_CPU
_hash++;
#endif

CUDA_DECLARE_STATE();

/*#define RH_B0*/
RandomHash_FirstCall_push(state, 5);
RandomHash_Phase_init(state, 5);
RandomHash_Phase_1_push(state, 5);
Expand Down Expand Up @@ -1123,6 +1092,11 @@ CUDA_DECL_KERNEL void CUDA_SYM(RandomHash_Init)(RandomHash_State* allStates, U8*

if (state->m_isCachedOutputs)
startNonce = state->m_midStateNonce;

#ifdef RH_SCREEN_SAVER_MODE
extern void ScreensaverFeed(U32 nonce);
ScreensaverFeed(startNonce);
#endif

state->m_startNonce = startNonce;
*(U32*)(RH_STRIDE_GET_DATA(state->m_roundInput) + PascalHeaderNoncePosV4(PascalHeaderSize)) = startNonce;
Expand Down Expand Up @@ -1164,9 +1138,6 @@ void RandomHash_Alloc(void** out_ptr, size_t size)

void RandomHash_Search(RandomHash_State* in_state, U8* out_hash, U32 startNonce)
{
#ifdef RH_FULLDEBUG_CPU
_n = 0;
#endif
RandomHash_State* allStates = in_state;
RandomHash_Init(allStates, out_hash, startNonce);
RH_CALL_ALL_KERNEL_BLOCKS
Expand Down
1 change: 0 additions & 1 deletion MinersLib/Pascal/RandomHash_MD5.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ error

/* MD5_F, MD5_G and MD5_H are basic MD5 functions: selection, majority, parity */

//todo optimiz -> intrinsics
#define MD5_F(x, y, z) (((x) & (y)) | ((~x) & (z)))
#define MD5_G(x, y, z) (((x) & (z)) | ((y) & (~z)))
#define MD5_H(x, y, z) ((x) ^ (y) ^ (z))
Expand Down
7 changes: 5 additions & 2 deletions MinersLib/Pascal/RandomHash_MurMur3_32.h
Original file line number Diff line number Diff line change
Expand Up @@ -552,9 +552,12 @@ uint32_t CUDA_SYM_DECL(MurmurHash3_x86_32_Fast)(const U8* key, int len)
while (key != keyEnd)
{
r0 = *(U64*)(key);
MURMUR3_BODY((U32)(r0));
key += sizeof(U64);
#if defined(RH_USE_CUDA_MEM_BOOST)
RH_PREFETCH_MEM((const char*)key);
#endif
MURMUR3_BODY((U32)(r0));
MURMUR3_BODY((U32)(r0 >> 32));
key += sizeof(U64);
}

if (m >= 4)
Expand Down
1 change: 0 additions & 1 deletion MinersLib/Pascal/RandomHash_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,6 @@ struct RH_StrideStruct

#else //!CPU

//TODO: Optmiz - Test memcpy
#define RH_INPLACE_MEMCPY_128(pDst, pSrc, byteCount) \
{U8* end = pDst + byteCount; \
while(pDst < end) \
Expand Down
5 changes: 4 additions & 1 deletion MinersLib/Pascal/RandomHash_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,10 @@ inline U32 _mm_extract_epi32_( __m128i V)
m_tmp = _mm_srli_epi32(m_tmp,(32-count)); \
m = _mm_or_si128(m,m_tmp);}

#define RH_PREFETCH_MEM(addr) _mm_prefetch((char*)addr,_MM_HINT_T0);
//#define RH_PREFETCH_MEM(addr) _mm_prefetch((char*)addr,_MM_HINT_T0);
#define RH_PREFETCH_MEM(addr) _mm_prefetch((char*)addr,_MM_HINT_NTA);


#define BIG_CONSTANT(x) (x)
#define KERNEL_LOG(...) PrintOutCritical(__VA_ARGS__)
#define KERNEL0_LOG(...) PrintOutCritical(__VA_ARGS__)
Expand Down
Loading

0 comments on commit 2b1f2fe

Please sign in to comment.