Skip to content

Commit

Permalink
V1.1 - GPU 7% faster, CPU 5% faster
Browse files Browse the repository at this point in the history
* Optimization up to 7% faster on cuda (Tested on gtx 1070 8gb)
* Optimization up to 5% faster on cpu (Tested on Xenon and i5 7th)
* Fixed "unknown error" when disconnected from pool
  • Loading branch information
polyminer1 committed Dec 5, 2018
1 parent e73e530 commit 396f1af
Show file tree
Hide file tree
Showing 16 changed files with 444 additions and 94 deletions.
2 changes: 1 addition & 1 deletion BuildInfo.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#pragma once

#define RH_PROJECT_NAME "rhminer"
#define RH_PROJECT_VERSION "1.0"
#define RH_PROJECT_VERSION "1.1"

5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# rhminer update and bugfix history

Version 1.1 - 30 Nov 2018
* Optimization up to 7% faster on cuda (Tested on gtx 1070 8gb)
* Optimization up to 5% faster on cpu (Tested on Xenon and i5 7th)
* Fixed "unknown error" when disconnected from pool

Version 1.0 - 30 Nov 2018
* Fixed network error with coinotron and f2pool where miner was in limbo after pool disconnected.
* Fixed miner not starting on cpu without SSe4.1
Expand Down
10 changes: 7 additions & 3 deletions MinersLib/Global.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ RHMINER_COMMAND_LINE_DEFINE_GLOBAL_STRING(g_logFileName, "");
RHMINER_COMMAND_LINE_DEFINE_GLOBAL_BOOL(g_useCPU, false);
RHMINER_COMMAND_LINE_DEFINE_GLOBAL_INT(g_cpuMinerThreads, 1);
RHMINER_COMMAND_LINE_DEFINE_GLOBAL_INT(g_testPerformance, 0);
RHMINER_COMMAND_LINE_DEFINE_GLOBAL_INT(g_testPerformanceThreads, 0);
RHMINER_COMMAND_LINE_DEFINE_GLOBAL_INT(g_setProcessPrio, 3)
RHMINER_COMMAND_LINE_DEFINE_GLOBAL_BOOL(g_disableFastTransfo, false);

bool g_useGPU = false;

Expand Down Expand Up @@ -95,7 +97,6 @@ GlobalMiningPreset::GlobalMiningPreset()
}
});
#endif //RH_COMPILE_CPU_ONLY

CmdLineManager::GlobalOptions().RegisterValue("devfee", "General", "Set devfee raward percentage. To disable devfee, simply put 0 here. But, before disabling developer fees, consider that it takes time and energy to maintain, develop and optimize this software. Your help is very appreciated.", [&](const string& val)
{
if (val == "0" || val == "0.0")
Expand Down Expand Up @@ -367,14 +368,17 @@ void GlobalMiningPreset::DoPerformanceTest()
mersenne_twister_state rnd;
_CM(merssen_twister_seed)(0xF923A401, &rnd);

const size_t ThreadCount = GpuManager::CpuInfos.numberOfProcessors;
if (g_testPerformanceThreads == 0 || g_testPerformanceThreads > GpuManager::CpuInfos.numberOfProcessors)
g_testPerformanceThreads = GpuManager::CpuInfos.numberOfProcessors;

const size_t ThreadCount = g_testPerformanceThreads;
RandomHash_State* g_threadsData = new RandomHash_State[ThreadCount];
RandomHash_CreateMany(&g_threadsData, ThreadCount);
U32 nonce2 = 0;

PrintOut("CPU: %s\n", GpuManager::CpuInfos.cpuBrandName.c_str());
PrintOut("Testing raw cpu performance for %d sec on %d threads\n", g_testPerformance, ThreadCount);

U64 timeout[] = { 10 * 1000, g_testPerformance * 1000 };
std::vector<U64> hashes;
hashes.resize(ThreadCount);
Expand Down
2 changes: 2 additions & 0 deletions MinersLib/Global.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ RHMINER_COMMAND_LINE_DECLARE_GLOBAL_STRING("logfilename", g_logFileName, "Genera
RHMINER_COMMAND_LINE_DECLARE_GLOBAL_BOOL("cpu", g_useCPU, "Gpu", "Enable the use of CPU to mine. ex '-cpu -cputhreads 4' will enable mining on cpu while gpu mining.");
RHMINER_COMMAND_LINE_DECLARE_GLOBAL_INT("cputhreads", g_cpuMinerThreads, "Gpu", "Number of CPU miner threads when mining with CPU. ex: -cpu -cputhreads 4", 0, S32_Max);
RHMINER_COMMAND_LINE_DECLARE_GLOBAL_INT("testperformance", g_testPerformance, "Debug", "Run performance test for an amount of seconds", 0, 120)
RHMINER_COMMAND_LINE_DECLARE_GLOBAL_INT("testperformancethreads", g_testPerformanceThreads, "Debug", "Amount of threads to use for performance test", 0, 256)
RHMINER_COMMAND_LINE_DECLARE_GLOBAL_INT("processpriority", g_setProcessPrio, "General", "On windows only. Set miner's process priority. 0=Background Process, 1=Low Priority, 2=Normal Priority, 3=High Priority. Default is 3. WARNING: Changing this value will affect GPU mining.", 0, 10);
RHMINER_COMMAND_LINE_DECLARE_GLOBAL_BOOL("disablefasttransfo", g_disableFastTransfo, "General", "Disable fast transfo. This can help old cpu go faster.");

class FarmFace;

Expand Down
2 changes: 1 addition & 1 deletion MinersLib/Pascal/PascalCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ const static U32 PascalHeaderNoncePosV3 = (PascalHeaderSize-4);
#define RH_StrideArrayCount 31
#define RH_StrideSize 208896


#define RH_CheckerSize (sizeof(U64))
#define RH_WorkSize RH_StrideSize
#define RH_IDEAL_ALIGNMENT 64 //NOTE : optimiz -> This should be changed for CUDA (some gpu are 256 bytes align, TODO: test ! )

13 changes: 6 additions & 7 deletions MinersLib/Pascal/RandomHashCPUMiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ void RandomHashCPUMiner::RandomHashCpuKernel(CPUKernelData* kernelData)
U32 gid = (U32)KernelOffsetManager::Increment(workWindow) - workWindow;
U32 endFrame = gid + workWindow;
bool paused = false;
U64 oldID = 0;
U64 oldID = U64_Max;
while(!kernelData->m_abortThread)
{
RHMINER_RETURN_ON_EXIT_FLAG();
Expand All @@ -90,22 +90,20 @@ void RandomHashCPUMiner::RandomHashCpuKernel(CPUKernelData* kernelData)
{
paused = false;
}
oldID = packageID;

//handle pause request from ::Pause()
if (packageData->m_requestPause)
{
//PrintOut("--> Request: Pause\n");
packageData->m_requestPause = 0;
paused = true;
}

if (!paused)
{
if (g_disableCachedNonceReuse == true ||
(g_disableCachedNonceReuse == false && memcmp(m_randomHashArray[kernelData->m_id].m_cachedHheader, packageData->m_header.asU8, PascalHeaderSize - 4) != 0))
if (g_disableCachedNonceReuse == true ||
(g_disableCachedNonceReuse == false && oldID != packageID))
{
RandomHash_SetHeader(&m_randomHashArray[kernelData->m_id], packageData->m_header.asU8, (U32)packageData->m_nonce2); //copy header
RandomHash_SetHeader(&m_randomHashArray[kernelData->m_id], packageData->m_header.asU8, (U32)packageData->m_nonce2); //copy header
}

#ifdef RH_FORCE_PASCAL_V3_ON_CPU
Expand Down Expand Up @@ -159,6 +157,7 @@ void RandomHashCPUMiner::RandomHashCpuKernel(CPUKernelData* kernelData)
{
CpuSleep(20);
}
oldID = packageID;
kernelData->m_hashes++;
}
AtomicSet(kernelData->m_abortThread, U32_Max);
Expand Down
56 changes: 36 additions & 20 deletions MinersLib/Pascal/RandomHash_Cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,13 +298,14 @@ U32 CUDA_SYM_DECL(RandomHash_ChecksumArray)(RH_StridePtrArray inputs)
return csum;
}

extern bool g_disableFastTransfo;
void CUDA_SYM_DECL(RandomHash_Expand)(RandomHash_State* state, RH_StridePtr input, int round, int ExpansionFactor/*, RH_StridePtr Result*/)
{
U32 inputSize = RH_STRIDE_GET_SIZE(input);
U32 seed = _CM(RandomHash_Checksum)(input);
_CM(RandomHash_Reseed)(state->m_rndGenExpand, seed);
size_t sizeExp = inputSize + ExpansionFactor * RH_M;

RH_StridePtr output = input;

S64 bytesToAdd = sizeExp - inputSize;
Expand All @@ -324,26 +325,40 @@ void CUDA_SYM_DECL(RandomHash_Expand)(RandomHash_State* state, RH_StridePtr inpu
}

RH_ASSERT(nextChunk + nextChunkSize < output + RH_StrideSize);
_CM(RH_STRIDE_MEMCPY_UNALIGNED_SIZE8)(nextChunk, outputPtr, nextChunkSize);
RH_STRIDE_CHECK_INTEGRITY(output);

U32 random = _CM(GetNextRnd)(&state->m_rndGenExpand);

U8* workBytes = state->m_workBytes;
U32 r = random % 8;
RH_ASSERT((nextChunkSize & 1) == 0);

switch(r)
if (g_disableFastTransfo)
{
case 0: _CM(Transfo0)(nextChunk, nextChunkSize, workBytes);break;
case 1: _CM(Transfo1)(nextChunk, nextChunkSize, workBytes);break;
case 2: _CM(Transfo2)(nextChunk, nextChunkSize, workBytes);break;
case 3: _CM(Transfo3)(nextChunk, nextChunkSize, workBytes);break;
case 4: _CM(Transfo4)(nextChunk, nextChunkSize, workBytes);break;
case 5: _CM(Transfo5)(nextChunk, nextChunkSize, workBytes);break;
case 6: _CM(Transfo6)(nextChunk, nextChunkSize, workBytes);break;
case 7: _CM(Transfo7)(nextChunk, nextChunkSize, workBytes);break;

U8* workBytes = state->m_workBytes;
_CM(RH_STRIDE_MEMCPY_UNALIGNED_SIZE8)(nextChunk, outputPtr, nextChunkSize);
RH_STRIDE_CHECK_INTEGRITY(output);
switch(r)
{
case 0: _CM(Transfo0)(nextChunk, nextChunkSize, workBytes); break;
case 1: _CM(Transfo1)(nextChunk, nextChunkSize, workBytes); break;
case 2: _CM(Transfo2)(nextChunk, nextChunkSize, workBytes); break;
case 3: _CM(Transfo3)(nextChunk, nextChunkSize, workBytes); break;
case 4: _CM(Transfo4)(nextChunk, nextChunkSize, workBytes); break;
case 5: _CM(Transfo5)(nextChunk, nextChunkSize, workBytes); break;
case 6: _CM(Transfo6)(nextChunk, nextChunkSize); break;
case 7: _CM(Transfo7)(nextChunk, nextChunkSize); break;
}
}
else
{
switch(r)
{
case 0: _CM(Transfo0_2)(nextChunk, nextChunkSize, outputPtr); break;
case 1: _CM(Transfo1_2)(nextChunk, nextChunkSize, outputPtr); break;
case 2: _CM(Transfo2_2)(nextChunk, nextChunkSize, outputPtr); break;
case 3: _CM(Transfo3_2)(nextChunk, nextChunkSize, outputPtr); break;
case 4: _CM(Transfo4_2)(nextChunk, nextChunkSize, outputPtr); break;
case 5: _CM(Transfo5_2)(nextChunk, nextChunkSize, outputPtr); break;
case 6: _CM(Transfo6_2)(nextChunk, nextChunkSize, outputPtr); break;
case 7: _CM(Transfo7_2)(nextChunk, nextChunkSize, outputPtr); break;
}
}

RH_STRIDE_GET_SIZE(output) += nextChunkSize;
Expand Down Expand Up @@ -396,10 +411,11 @@ inline void CUDA_SYM_DECL(RandomHash_start)(RandomHash_State* state, U32 in_roun
inline void CUDA_SYM_DECL(RandomHash_Phase_1_push)(RandomHash_State* state, int in_round)
{
if (in_round == RH_N && !g_disableCachedNonceReuse)
{
{
RH_ASSERT((RH_STRIDE_GET_SIZE(state->m_data[RH_N].in_blockHeader) <= PascalHeaderSize));
U32* headPtr = (U32*)RH_STRIDE_GET_DATA(state->m_data[RH_N].in_blockHeader);
U32* tailPtr = headPtr + (RH_STRIDE_GET_SIZE(state->m_data[RH_N].in_blockHeader)/4) - 1;
U32* headPtr = (U32*)RH_STRIDE_GET_DATA(state->m_data[RH_N].in_blockHeader);

U32* tailPtr = headPtr + (RH_STRIDE_GET_SIZE(state->m_data[RH_N].in_blockHeader)/4) - 1;
U32* cachedtailPtr = (U32*)(state->m_cachedHheader + PascalHeaderSize - 4);

if (*headPtr == *(U32*)(state->m_cachedHheader))
Expand Down Expand Up @@ -889,7 +905,7 @@ CUDA_DECL_KERNEL void CUDA_SYM(RandomHash_Init)(RandomHash_State* allStates, U8*

(*(U32*)(state->m_data[0].roundInput)) = PascalHeaderSize;
_CM(RH_STRIDE_MEMCPY_UNALIGNED_SIZE8)(RH_STRIDE_GET_DATA(state->m_data[0].roundInput), &state->m_header[0], PascalHeaderSize);

RH_STRIDE_INIT(state->m_workBytes);

if (RH_STRIDEARRAY_GET_SIZE(state->m_cachedOutputs) && !g_disableCachedNonceReuse)
Expand Down
Loading

0 comments on commit 396f1af

Please sign in to comment.