Skip to content

Commit

Permalink
Version 1.3 (CPU only) with optimizations up to 260%
Browse files Browse the repository at this point in the history
*Major CPU optimizations. Up to 260% on some CPU
* +155% on Xeon E52665
* +260% on Intel i5 2th gen
* +195% on Intel i5 4th gen
* +250% on Intel Core 2
* New commandline option -sseboost. This option will give up to 10% on some cpu. BUT can also make it slower on other. TEST it before using it, to assert it gives a boost to your cpu.
  • Loading branch information
polyminer1 committed Jan 11, 2019
1 parent e189c09 commit d67df73
Show file tree
Hide file tree
Showing 29 changed files with 2,023 additions and 1,575 deletions.
2 changes: 1 addition & 1 deletion BuildInfo.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#pragma once

#define RH_PROJECT_NAME "rhminer"
#define RH_PROJECT_VERSION "1.2"
#define RH_PROJECT_VERSION "1.3"

8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# rhminer update and bugfix history

Version 1.3 - 11 Jan 2019
*Major CPU optimizations. Up to 260% on some CPU
* +155% on Xeon E52665
* +260% on Intel i5 2th gen
* +195% on Intel i5 4th gen
* +250% on Intel Core 2
* New commandline option -sseboost. This option will give up to 10% on some cpu. BUT can also make it slower on other. TEST it before using it, to assert it gives a boost to your cpu.

Version 1.2 - 13 dec 2018
* Major optimization on cuda miner (+32% on gtx 1070 8gb, +37% on gtx 1060 3gb, +40% on gtx 950)
* Simple optimization on Linux. Up to 5% depending on the cpu
Expand Down
12 changes: 11 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 2.8)
option(RH_DEBUG_TARGET "Compile in Debug" OFF)
option(RH_CPU_ONLY "Compile only cpu code" OFF)
option(RH_CUDA_ARCH "Cuda architecture name" Maxwell)
option(RH_NO_SSE4 "Disable sse4 optimizations" OFF)

if(RH_CPU_ONLY)
project(rhminer)
Expand All @@ -12,7 +13,16 @@ else()
endif(RH_CPU_ONLY)

set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -pthread")

if(RH_NO_SSE4)
message(STATUS "Disabling SSe4 intrinsics")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -pthread")
set(RH_CUDA_ARCH CPU_OLDGEN)
add_definitions(-DRHMINER_NO_SSE4)
else()
message(STATUS "ENABLING SSe4 intrinsics")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -pthread -msse4.1")
endif(RH_NO_SSE4)

set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/CMake")

Expand Down
39 changes: 8 additions & 31 deletions MinersLib/Global.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ RHMINER_COMMAND_LINE_DEFINE_GLOBAL_BOOL(g_useCPU, false);
RHMINER_COMMAND_LINE_DEFINE_GLOBAL_INT(g_cpuMinerThreads, 1);
RHMINER_COMMAND_LINE_DEFINE_GLOBAL_INT(g_testPerformance, 0);
RHMINER_COMMAND_LINE_DEFINE_GLOBAL_INT(g_testPerformanceThreads, 0);
RHMINER_COMMAND_LINE_DEFINE_GLOBAL_INT(g_setProcessPrio, 3)
RHMINER_COMMAND_LINE_DEFINE_GLOBAL_BOOL(g_disableFastTransfo, false);

RHMINER_COMMAND_LINE_DEFINE_GLOBAL_INT(g_setProcessPrio, 3);
RHMINER_COMMAND_LINE_DEFINE_GLOBAL_INT(g_memoryBoostLevel, RH_OPT_UNSET);
RHMINER_COMMAND_LINE_DEFINE_GLOBAL_INT(g_sseOptimization, 0);
bool g_useGPU = false;

const U64 t1M = 1000 * 60;
Expand Down Expand Up @@ -97,29 +97,7 @@ GlobalMiningPreset::GlobalMiningPreset()
}
});
#endif //RH_COMPILE_CPU_ONLY
CmdLineManager::GlobalOptions().RegisterValue("devfee", "General", "Set devfee raward percentage. To disable devfee, simply put 0 here. But, before disabling developer fees, consider that it takes time and energy to maintain, develop and optimize this software. Your help is very appreciated.", [&](const string& val)
{
if (val == "0" || val == "0.0")
m_devfeePercent = 0.0f;
else
{
for(auto c : val)
{
if (!((c >= '0' && c <= '9') || c == '.'))
{
m_devfeePercent = 1.0f;
return;
}
}
m_devfeePercent = ToFloat(val);

if (m_devfeePercent > 50.0f)
m_devfeePercent = 50.0f;

if (m_devfeePercent < 1.0f)
m_devfeePercent = 1.0f;
}
});
CmdLineManager::GlobalOptions().RegisterFlag("list", "General", "List all gpu in the system", [&]()
{
GpuManager::listGPU();
Expand Down Expand Up @@ -188,7 +166,7 @@ void GlobalMiningPreset::SetStratumInfo(const string& val)

void GlobalMiningPreset::Initialize(char** argv, int argc)
{
CmdLineManager::GlobalOptions().RegisterValue("s", "Network", "Stratum/wallet server address:port. NOTE: You can also use http://address.xyz to connect to local wallet.", [&](const string& val)
CmdLineManager::GlobalOptions().RegisterValue("s", "Network", "Stratum/wallet server address:port. NOTE: You can also use http://address to connect to local wallet.", [&](const string& val)
{
SetStratumInfo(val);
});
Expand All @@ -200,7 +178,7 @@ void GlobalMiningPreset::Initialize(char** argv, int argc)
CmdLineManager::GlobalOptions().RegisterValue("fop", "Network", "Failover password for stratum or local wallet", [&](const string& val) { m_presets.m_fpass = val; });
CmdLineManager::GlobalOptions().RegisterValue("r", "Network", "Retries connection count for stratum or local wallet", [&](const string& val) { m_presets.m_maxFarmRetries = ToInt(val); });

CmdLineManager::GlobalOptions().RegisterValueMultiple("diff", "General", "Set local difficulyu. ex: -diff 0.832", [&](const string& val)
CmdLineManager::GlobalOptions().RegisterValueMultiple("diff", "General", "Set local difficulyu. ex: -diff 999", [&](const string& val)
{
m_localDifficulty = ToFloat(val);
if (m_localDifficulty != 0.0f)
Expand Down Expand Up @@ -385,7 +363,7 @@ void GlobalMiningPreset::DoPerformanceTest()
hashes.resize(ThreadCount);

auto kernelFunc = [&](RandomHash_State* allStates, U32 startNonce, U64 to)
{
{
while (TimeGetMilliSec() < to)
{
RandomHash_Search(allStates, out_hash, startNonce);
Expand All @@ -399,7 +377,6 @@ void GlobalMiningPreset::DoPerformanceTest()
for (int i = 0; i < PascalHeaderSize / 4; i++)
input[i] = _CM(merssen_twister_rand)(&rnd);

//match DUDA thread #0
input[PascalHeaderNoncePosV4(PascalHeaderSize) / 4] = 0;

//NOTE: the header must allready be in device mem (via SetWork)
Expand Down Expand Up @@ -436,7 +413,7 @@ void GlobalMiningPreset::DoPerformanceTest()
U64 hashCnt = 0;
for (auto h : hashes)
hashCnt += h;
PrintOut("RandomHash speed is %.2f H/S \n", hashCnt / (float)g_testPerformance);
PrintOut("RandomHash speed is %.2f H/S\n", hashCnt / (float)g_testPerformance);

exit(0);
}
}
6 changes: 4 additions & 2 deletions MinersLib/Global.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,16 @@
#include "MinersLib/CLMinerBase.h"
#include "rhminer/CommandLineManager.h"

#define RH_OPT_UNSET 9

RHMINER_COMMAND_LINE_DECLARE_GLOBAL_STRING("logfilename", g_logFileName, "General", "Set the name of the log's filename. Note: the log file will be overwritten every time you start rhminer");
RHMINER_COMMAND_LINE_DECLARE_GLOBAL_BOOL("cpu", g_useCPU, "Gpu", "Enable the use of CPU to mine. ex '-cpu -cputhreads 4' will enable mining on cpu while gpu mining.");
RHMINER_COMMAND_LINE_DECLARE_GLOBAL_INT("cputhreads", g_cpuMinerThreads, "Gpu", "Number of CPU miner threads when mining with CPU. ex: -cpu -cputhreads 4", 0, S32_Max);
RHMINER_COMMAND_LINE_DECLARE_GLOBAL_INT("testperformance", g_testPerformance, "Debug", "Run performance test for an amount of seconds", 0, 120)
RHMINER_COMMAND_LINE_DECLARE_GLOBAL_INT("testperformancethreads", g_testPerformanceThreads, "Debug", "Amount of threads to use for performance test", 0, 256)
RHMINER_COMMAND_LINE_DECLARE_GLOBAL_INT("processpriority", g_setProcessPrio, "General", "On windows only. Set miner's process priority. 0=Background Process, 1=Low Priority, 2=Normal Priority, 3=High Priority. Default is 3. WARNING: Changing this value will affect GPU mining.", 0, 10);
RHMINER_COMMAND_LINE_DECLARE_GLOBAL_BOOL("disablefasttransfo", g_disableFastTransfo, "General", "Disable fast transfo. This can help old cpu go faster.");

RHMINER_COMMAND_LINE_DECLARE_GLOBAL_INT("memoryboost", g_memoryBoostLevel, "Optimizations", "This option will enable some memory optimizations that could make the miner slower on some cpu. Test it with -testperformance before using it. 1 to enable boost. 0 to disable boost. Enabled, by default, on cpu with hyperthreading.", 0, RH_OPT_UNSET+1);
RHMINER_COMMAND_LINE_DECLARE_GLOBAL_INT("sseboost", g_sseOptimization, "Optimizations", "This option will enable some sse4 optimizations. It could make the miner slower on some cpu. Test it with -testperformance before using it. 1 to enable SSe4.1 optimizations. Disabled by default. ", 0, 2);
class FarmFace;

using namespace std;
Expand Down
141 changes: 128 additions & 13 deletions MinersLib/GpuManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,14 @@
#include <iostream>
#include <fstream>
#include <sstream>
#include <sys/sysinfo.h>
#endif

#define RHMINER_MAKE_ARCH_NAME(devname, gpuToken, archname, var) if (stristr(devname.c_str(), gpuToken)) {var = string(gpuToken) + "_" + archname;}

std::vector<GpuManager::GPUInfos> GpuManager::Gpus;
CPUInfo GpuManager::CpuInfos;
bool g_isSSE41Supported = false;
bool g_isSSE3Supported = false;

GpuManager::GpuManager()
{
Expand Down Expand Up @@ -257,11 +258,7 @@ void GpuManager::listDevices()
#endif //RH_COMPILE_CPU_ONLY

//list cpu
for (U32 i = 0; i < GpuManager::Gpus.size(); ++i)
{
if (GpuManager::Gpus[i].gpuType == GpuType_CPU)
printf("CPU : %s with %d logical cores\n", GpuManager::Gpus[i].description.c_str(), GpuManager::Gpus[i].maxCU);
}
printf("CPU : %s with %d logical cores on %d physical cores\n", CpuInfos.cpuBrandName.c_str(), CpuInfos.numberOfProcessors, CpuInfos.numberOfCores);

exit(0);
}
Expand Down Expand Up @@ -290,6 +287,24 @@ U32 GpuManager::GetEnabledGPUCount()
return cnt;
}

void GpuManager::SetPostCommandLineOptions()
{
if (g_memoryBoostLevel == RH_OPT_UNSET)
{
g_memoryBoostLevel = CpuInfos.numberOfCores == CpuInfos.numberOfProcessors ? 0 : 1; //enable boost on hyperthreads cpu
}

#ifdef RHMINER_ENABLE_SSE4
if (g_sseOptimization > 2)
g_sseOptimization = 2;
#else
if (g_sseOptimization)
PrintOut("SSE4 not available in this binary. -sseboost option ignored. \n");
g_sseOptimization = 0;
PrintOut("Detecting old-gen cpu.\n");
#endif
}

void GpuManager::LoadGPUMap()
{
std::map<string, GPUInfos> gpuIndent;
Expand Down Expand Up @@ -560,6 +575,30 @@ bool GpuManager::SetupGPU()
}
}

if (g_memoryBoostLevel)
PrintOutCritical("Enabling Memory boost.\n");

if (g_sseOptimization)
{
const char* ss[] = {"", "SSE4.1", "AVX2"};
PrintOutCritical("Enbling %s optimizations.\n", ss[g_sseOptimization]);
PrintOutCritical("\n");
PrintOutCritical("*** WARNING *** This option can make the miner slower on some CPU. Test for 2 minutes and compare, before using it.\n");
PrintOutCritical("\n");

if (CpuInfos.sse4_1Supportted == false && g_sseOptimization == 1)
{
PrintOut("SSE4.1 not supported on this cpu.\n");
exit(-1);
}

if (/*CpuInfos.avxSupportted == false && */g_sseOptimization == 2)
{
PrintOut("AVX2 not supported yet.\n");
exit(-1);
}
}

return atleaseone;
}

Expand Down Expand Up @@ -618,6 +657,56 @@ unsigned long long _xgetbv(unsigned int index)
);
return ((unsigned long long)edx << 32) | eax;
}
#else
// Helper function to count set bits in the processor mask.
DWORD CountSetBits(ULONG_PTR bitMask)
{
DWORD LSHIFT = sizeof(ULONG_PTR)*8 - 1;
DWORD bitSetCount = 0;
ULONG_PTR bitTest = (ULONG_PTR)1 << LSHIFT;
DWORD i;

for (i = 0; i <= LSHIFT; ++i)
{
bitSetCount += ((bitMask & bitTest)?1:0);
bitTest/=2;
}

return bitSetCount;
}

void GetPhysicalCoreCount(U32& processorCoreCount, U32& logicalProcessorCount)
{
BOOL done = FALSE;
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = NULL;
DWORD returnLength = 0;
processorCoreCount = 0;
logicalProcessorCount = 0;
DWORD byteOffset = 0;

GetLogicalProcessorInformation(NULL, &returnLength);
RHMINER_ASSERT(returnLength);
buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(returnLength);
DWORD rc = GetLogicalProcessorInformation(buffer, &returnLength);
RHMINER_ASSERT(rc == 1);
ptr = buffer;

while (byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength)
{
if (ptr->Relationship == RelationProcessorCore)
{
processorCoreCount++;

// A hyperthreaded core supplies more than one logical processor.
logicalProcessorCount += CountSetBits(ptr->ProcessorMask);
}
byteOffset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
ptr++;
}

free(buffer);
}

#endif

Expand Down Expand Up @@ -661,18 +750,27 @@ void GpuManager::TestExtraInstructions()
CpuInfos.sse5Supportted = cpuinfo[2] & (1 << 11) || false;
}

g_isSSE41Supported = CpuInfos.sse4_1Supportted;
g_isSSE3Supported = CpuInfos.sse3Supportted;
PrintOutSilent("SSe3 supported : %s\n", CpuInfos.sse3Supportted ? "Yes" : "No");
PrintOutSilent("SSe4.1 supported : %s\n", CpuInfos.sse4_1Supportted ? "Yes" : "No");
PrintOutSilent("avx supported : %s\n", CpuInfos.avxSupportted ? "Yes" : "No");

#if defined(RHMINER_ENABLE_SSE4)
if (!CpuInfos.sse4_1Supportted)
{
PrintOutCritical("SSE4 is not supported by this CPU. Please use rhminer for oldgen cpu\n");
exit( 1);
}

#endif
}

void GpuManager::LoadCPUInfos()
{
#ifdef _WIN32_WINNT
SYSTEM_INFO siSysInfo;
GetSystemInfo(&siSysInfo);
CpuInfos.numberOfProcessors = siSysInfo.dwNumberOfProcessors;
GetPhysicalCoreCount(CpuInfos.numberOfCores, CpuInfos.numberOfProcessors);
CpuInfos.activeProcessorMask = (size_t)siSysInfo.dwActiveProcessorMask;
CpuInfos.allocationGranularity = siSysInfo.dwAllocationGranularity;
CpuInfos.cpuArchName = "CPU";
Expand Down Expand Up @@ -712,23 +810,38 @@ void GpuManager::LoadCPUInfos()
string brand = CPUBrandString;
#else
CpuInfos.numberOfProcessors = sysconf(_SC_NPROCESSORS_ONLN);
CpuInfos.numberOfCores = 0;
CpuInfos.allocationGranularity = 1024*64;
CpuInfos.avaiablelMem = sysconf(_SC_PAGESIZE) * sysconf(_SC_AVPHYS_PAGES);
CpuInfos.cpuBrandName = "x64";

string line;
string brand;
ifstream finfo("/proc/cpuinfo");
while(getline(finfo,line)) {
while(getline(finfo,line))
{
stringstream str(line);
string itype;
string info;
if ( getline( str, itype, ':' ) && getline(str,info) && itype.substr(0,10) == "model name" )
{

//model name : Intel(R) Core(TM) i5-2400 CPU @ 3.10GHz
if ( brand.length() == 0 && getline( str, itype, ':' ) && getline(str, info) && itype.substr(0,10) == "model name" )
{
brand = info;
break;
}

//cpu cores : 4
if ( CpuInfos.numberOfCores == 0 && getline( str, itype, ':' ) && getline(str, info) && itype.substr(0,9) == "cpu cores" )
{
CpuInfos.numberOfCores = ToUInt(TrimString(info));
}
}

if (!CpuInfos.numberOfCores)
{
CpuInfos.numberOfCores = sysconf(_SC_NPROCESSORS_ONLN);
PrintOutCritical("Error. Cannot read cpu core count. Defaulting physical core count to %d\n", sysconf(_SC_NPROCESSORS_ONLN));
}

#endif
brand = TrimString(brand);
Expand All @@ -743,4 +856,6 @@ void GpuManager::LoadCPUInfos()
CpuInfos.cpuBrandName = brand;

TestExtraInstructions();

PrintOutSilent("%s with %d logical cores on %d physical cores\n", CpuInfos.cpuBrandName.c_str(), CpuInfos.numberOfProcessors, CpuInfos.numberOfCores);
}
4 changes: 3 additions & 1 deletion MinersLib/GpuManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ struct CPUInfo
bool sse5Supportted = false;
bool avxSupportted = false;
U64 avaiablelMem;
U32 numberOfProcessors;
U32 numberOfProcessors; //counting hyperthreads
U32 numberOfCores;
size_t activeProcessorMask;
U32 allocationGranularity;
U64 UserSelectedCores = 0x0; //mask used by SetProcessAffinityMask when application starts
Expand Down Expand Up @@ -135,6 +136,7 @@ class GpuManager
static void LoadGPUMap();
static void LoadCPUInfos();
static void TestExtraInstructions();
static void SetPostCommandLineOptions();

static std::vector<cl::Device> GetDevices(std::vector<cl::Platform> const& _platforms, unsigned _platformId);
static std::vector<cl::Platform>GetPlatforms();
Expand Down
Loading

0 comments on commit d67df73

Please sign in to comment.