diff --git a/.gitignore b/.gitignore index dd437d19..ec94c2c6 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ obj/ .vs x64/ Release/ -Debug/ \ No newline at end of file +Debug/ +build/ diff --git a/CMakeLists.txt b/CMakeLists.txt index ccbc575f..9af9741a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,32 +51,126 @@ src/virtual_machine.cpp src/vm_compiled_light.cpp src/blake2/blake2b.c) -if (NOT ARCH_ID) - set(ARCH_ID ${CMAKE_HOST_SYSTEM_PROCESSOR}) +if(NOT ARCH_ID) + # allow cross compiling + if(CMAKE_SYSTEM_PROCESSOR STREQUAL "") + set(CMAKE_SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR}) + endif() + string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" ARCH_ID) +endif() + +if(NOT ARM_ID) + set(ARM_ID "${ARCH_ID}") +endif() + +if(NOT ARCH) + set(ARCH "default") endif() if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) + message(STATUS "Setting default build type: ${CMAKE_BUILD_TYPE}") endif() +include(CheckCXXCompilerFlag) +include(CheckCCompilerFlag) + +function(add_flag flag) + string(REPLACE "-" "_" supported_cxx ${flag}_cxx) + check_cxx_compiler_flag(${flag} ${supported_cxx}) + if(${${supported_cxx}}) + message(STATUS "Setting CXX flag ${flag}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flag}" PARENT_SCOPE) + endif() + string(REPLACE "-" "_" supported_c ${flag}_c) + check_c_compiler_flag(${flag} ${supported_c}) + if(${${supported_c}}) + message(STATUS "Setting C flag ${flag}") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${flag}" PARENT_SCOPE) + endif() +endfunction() + +# x86-64 if (ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64") list(APPEND randomx_sources src/jit_compiler_x86_static.S src/jit_compiler_x86.cpp) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes") + # cheat because cmake and ccache hate each other + set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY LANGUAGE C) + + if(ARCH STREQUAL "native") + add_flag("-march=native") + else() + # default build has hardware AES enabled (software AES can be selected at runtime) + add_flag("-maes") + endif() +endif() + +# PowerPC +if (ARCH_ID STREQUAL "ppc64" OR ARCH_ID STREQUAL "ppc64le") + if(ARCH STREQUAL "native") + add_flag("-mcpu=native") + endif() + # PowerPC AES requires ALTIVEC (POWER7+), so it cannot be enabled in the default build +endif() + +# ARMv8 +if (ARM_ID STREQUAL "aarch64" OR ARM_ID STREQUAL "arm64" OR ARM_ID STREQUAL "armv8-a") + if(ARCH STREQUAL "native") + add_flag("-march=native") + else() + # default build has hardware AES enabled (software AES can be selected at runtime) + add_flag("-march=armv8-a+crypto") + endif() endif() set(RANDOMX_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/src" CACHE STRING "RandomX Include path") add_library(randomx ${randomx_sources}) -target_link_libraries(randomx - PRIVATE - ${CMAKE_THREAD_LIBS_INIT}) set_property(TARGET randomx PROPERTY POSITION_INDEPENDENT_CODE ON) set_property(TARGET randomx PROPERTY CXX_STANDARD 11) set_property(TARGET randomx PROPERTY CXX_STANDARD_REQUIRED ON) -# cheat because cmake and ccache hate each other -set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY LANGUAGE C) +add_executable(randomx-tests + src/tests/tests.cpp) +target_link_libraries(randomx-tests + PRIVATE randomx) +set_property(TARGET randomx-tests PROPERTY POSITION_INDEPENDENT_CODE ON) +set_property(TARGET randomx-tests PROPERTY CXX_STANDARD 11) + +add_executable(randomx-codegen + src/tests/code-generator.cpp) +target_link_libraries(randomx-codegen + PRIVATE randomx) + +set_property(TARGET randomx-codegen PROPERTY POSITION_INDEPENDENT_CODE ON) +set_property(TARGET randomx-codegen PROPERTY CXX_STANDARD 11) + +if (NOT Threads_FOUND AND UNIX AND NOT APPLE) + set(THREADS_PREFER_PTHREAD_FLAG ON) + find_package(Threads) +endif() + +add_executable(randomx-benchmark + src/tests/benchmark.cpp + src/tests/affinity.cpp) +target_link_libraries(randomx-benchmark + PRIVATE randomx + PRIVATE ${CMAKE_THREAD_LIBS_INIT}) + +include(CheckCXXSourceCompiles) +check_cxx_source_compiles(" +#include +#include +int main() { + std::atomic a; + a.is_lock_free(); +}" HAVE_CXX_ATOMICS) + +if(NOT HAVE_CXX_ATOMICS) + target_link_libraries(randomx-benchmark + PRIVATE "atomic") +endif() +set_property(TARGET randomx-benchmark PROPERTY POSITION_INDEPENDENT_CODE ON) +set_property(TARGET randomx-benchmark PROPERTY CXX_STANDARD 11) diff --git a/README.md b/README.md index 89ae3e43..70f8d97f 100644 --- a/README.md +++ b/README.md @@ -20,23 +20,28 @@ Design description and analysis is available in [design.md](doc/design.md). ## Build -RandomX is written in C++11 and builds a static library with a C API provided by header file [randomx.h](src/randomx.h). Minimal API usage example is provided in [api-example1.c](src/tests/api-example1.c). The reference code includes a `benchmark` executable for testing. +RandomX is written in C++11 and builds a static library with a C API provided by header file [randomx.h](src/randomx.h). Minimal API usage example is provided in [api-example1.c](src/tests/api-example1.c). The reference code includes a `randomx-benchmark` and `randomx-tests` executables for testing. ### Linux -Build dependencies: `make` and `gcc` (minimum version 4.8, but version 7+ is recommended). +Build dependencies: `cmake` (minimum 2.8.7) and `gcc` (minimum version 4.8, but version 7+ is recommended). -Build using the provided makefile. +To build optimized binaries for your machine, run: +``` +git clone https://github.com/tevador/RandomX.git +cd RandomX +mkdir build && cd build +cmake -DARCH=native .. +make +``` ### Windows -Build dependencies: Visual Studio 2017. - -A solution file is provided. +On Windows, it is possible to build using MinGW (same procedure as on Linux) or using Visual Studio 2017 (solution file is provided). ### Precompiled binaries -Precompiled `benchmark` binaries are available on the [Releases page](https://github.com/tevador/RandomX/releases). +Precompiled `randomx-benchmark` binaries are available on the [Releases page](https://github.com/tevador/RandomX/releases). ## Proof of work diff --git a/doc/configuration.md b/doc/configuration.md index 4e564eb5..7f471d0b 100644 --- a/doc/configuration.md +++ b/doc/configuration.md @@ -192,7 +192,22 @@ There is a total of 29 different instructions. The sum of frequencies must be eq #### Notes -Making large changes to the default values is not recommended. The only exceptions are the instruction pairs IROR_R/IROL_R, FADD_R/FSUB_R and FADD_M/FSUB_M, which are functionally equivalent. +Making changes to the default values is not recommended. The only exceptions are the instruction pairs IROR_R/IROL_R, FADD_R/FSUB_R and FADD_M/FSUB_M, which are functionally equivalent. Example of a safe custom configuration: + +||default|custom| +|-|------|------|-| +|`RANDOMX_FREQ_IROR_R`|8|5| +|`RANDOMX_FREQ_IROL_R`|2|5| + +||default|custom| +|-|------|------| +|`RANDOMX_FREQ_FADD_R`|16|17| +|`RANDOMX_FREQ_FSUB_R`|16|15| + +||default|custom| +|-|------|------| +|`RANDOMX_FREQ_FADD_M`|5|4| +|`RANDOMX_FREQ_FSUB_M`|5|6| ## Unsafe configurations @@ -200,6 +215,7 @@ There are some configurations that are considered 'unsafe' because they affect t These checks can be disabled by definining `RANDOMX_UNSAFE` when building RandomX, e.g. by using `-DRANDOMX_UNSAFE` command line switch in GCC or MSVC. It is not recommended to disable these checks except for testing purposes. + ### 1. Memory-time tradeoffs #### Condition diff --git a/makefile b/makefile deleted file mode 100644 index 49913bc2..00000000 --- a/makefile +++ /dev/null @@ -1,200 +0,0 @@ -#CXX=g++-8 -#CC=gcc-8 -AR=gcc-ar -PLATFORM=$(shell uname -m) -OS=$(shell uname -s) -CXXFLAGS=-std=c++11 -CCFLAGS=-std=c99 -ARFLAGS=rcs -BINDIR=bin -SRCDIR=src -TESTDIR=src/tests -OBJDIR=obj -LDFLAGS=-lpthread -RXA=$(BINDIR)/librandomx.a -BINARIES=$(RXA) $(BINDIR)/randomx-benchmark $(BINDIR)/randomx-generator $(BINDIR)/randomx-tests -RXOBJS=$(addprefix $(OBJDIR)/,aes_hash.o argon2_ref.o bytecode_machine.o dataset.o soft_aes.o virtual_memory.o vm_interpreted.o allocator.o assembly_generator_x86.o instruction.o randomx.o superscalar.o vm_compiled.o vm_interpreted_light.o argon2_core.o blake2_generator.o instructions_portable.o reciprocal.o virtual_machine.o vm_compiled_light.o blake2b.o) -ifeq ($(PLATFORM),amd64) - RXOBJS += $(addprefix $(OBJDIR)/,jit_compiler_x86_static.o jit_compiler_x86.o) - CXXFLAGS += -maes -endif -ifeq ($(PLATFORM),x86_64) - RXOBJS += $(addprefix $(OBJDIR)/,jit_compiler_x86_static.o jit_compiler_x86.o) - CXXFLAGS += -maes -endif -ifeq ($(OS),Darwin) - AR=ar -endif - -ifeq ($(PLATFORM),ppc64) - CXXFLAGS += -mcpu=native -endif - -ifeq ($(PLATFORM),ppc64le) - CXXFLAGS += -mcpu=native -endif - -release: CXXFLAGS += -O3 -flto -release: CCFLAGS += -O3 -flto -release: LDFLAGS += -flto -release: $(BINARIES) - -native: CXXFLAGS += -march=native -O3 -flto -native: CCFLAGS += -march=native -O3 -flto -native: $(BINARIES) - -nolto: CXXFLAGS += -O3 -nolto: CCFLAGS += -O3 -nolto: $(BINARIES) - -debug: CXXFLAGS += -g -debug: CCFLAGS += -g -debug: LDFLAGS += -g -debug: $(BINARIES) - -profile: CXXFLAGS += -pg -profile: CCFLAGS += -pg -profile: LDFLAGS += -pg -profile: $(BINDIR)/randomx-benchmark - -test: CXXFLAGS += -O0 - -$(RXA): $(RXOBJS) | $(BINDIR) - $(AR) $(ARFLAGS) $@ $(RXOBJS) -$(OBJDIR): - mkdir $(OBJDIR) -$(BINDIR): - mkdir $(BINDIR) -$(OBJDIR)/affinity.o: $(TESTDIR)/affinity.cpp $(TESTDIR)/affinity.hpp - $(CXX) $(CXXFLAGS) -c $< -o $@ -$(OBJDIR)/benchmark.o: $(TESTDIR)/benchmark.cpp $(TESTDIR)/stopwatch.hpp \ - $(TESTDIR)/utility.hpp $(SRCDIR)/randomx.h $(SRCDIR)/blake2/endian.h $(TESTDIR)/affinity.hpp - $(CXX) $(CXXFLAGS) -pthread -c $< -o $@ -$(BINDIR)/randomx-benchmark: $(OBJDIR)/benchmark.o $(OBJDIR)/affinity.o $(RXA) - $(CXX) $(LDFLAGS) -pthread $< $(OBJDIR)/affinity.o $(RXA) -o $@ -$(OBJDIR)/code-generator.o: $(TESTDIR)/code-generator.cpp $(TESTDIR)/utility.hpp \ - $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ - $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ - $(SRCDIR)/assembly_generator_x86.hpp $(SRCDIR)/superscalar.hpp \ - $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/instruction.hpp \ - $(SRCDIR)/blake2_generator.hpp $(SRCDIR)/aes_hash.hpp \ - $(SRCDIR)/blake2/blake2.h $(SRCDIR)/program.hpp - $(CXX) $(CXXFLAGS) -c $< -o $@ -$(BINDIR)/randomx-generator: $(OBJDIR)/code-generator.o $(RXA) - $(CXX) $(LDFLAGS) $< $(RXA) -o $@ -$(OBJDIR)/tests.o: $(TESTDIR)/tests.cpp $(TESTDIR)/utility.hpp \ - $(SRCDIR)/bytecode_machine.hpp $(SRCDIR)/common.hpp \ - $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h \ - $(SRCDIR)/randomx.h $(SRCDIR)/intrin_portable.h \ - $(SRCDIR)/instruction.hpp $(SRCDIR)/program.hpp \ - $(SRCDIR)/dataset.hpp $(SRCDIR)/superscalar_program.hpp \ - $(SRCDIR)/allocator.hpp $(SRCDIR)/blake2/blake2.h \ - $(SRCDIR)/blake2_generator.hpp $(SRCDIR)/superscalar.hpp \ - $(SRCDIR)/reciprocal.h $(SRCDIR)/jit_compiler.hpp \ - $(SRCDIR)/jit_compiler_x86.hpp - $(CXX) $(CXXFLAGS) -c $< -o $@ -$(BINDIR)/randomx-tests: $(OBJDIR)/tests.o $(RXA) - $(CXX) $(LDFLAGS) $< $(RXA) -o $@ -$(OBJDIR)/aes_hash.o: $(SRCDIR)/aes_hash.cpp $(SRCDIR)/soft_aes.h $(SRCDIR)/intrin_portable.h | $(OBJDIR) -$(OBJDIR)/argon2_ref.o: $(SRCDIR)/argon2_ref.c $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h \ - $(SRCDIR)/blake2/blamka-round-ref.h $(SRCDIR)/blake2/blake2.h \ - $(SRCDIR)/blake2/blake2-impl.h $(SRCDIR)/blake2/endian.h $(SRCDIR)/blake2/blake2-impl.h \ - $(SRCDIR)/blake2/blake2.h -$(OBJDIR)/bytecode_machine.o: $(SRCDIR)/bytecode_machine.cpp $(SRCDIR)/bytecode_machine.hpp \ - $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ - $(SRCDIR)/intrin_portable.h $(SRCDIR)/instruction.hpp $(SRCDIR)/program.hpp \ - $(SRCDIR)/reciprocal.h -$(OBJDIR)/blake2b.o: $(SRCDIR)/blake2/blake2b.c $(SRCDIR)/blake2/blake2.h \ - $(SRCDIR)/blake2/blake2-impl.h $(SRCDIR)/blake2/endian.h - $(CC) $(CCFLAGS) -c $< -o $@ -$(OBJDIR)/dataset.o: $(SRCDIR)/dataset.cpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ - $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/dataset.hpp \ - $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/jit_compiler_x86.hpp \ - $(SRCDIR)/allocator.hpp $(SRCDIR)/virtual_memory.hpp $(SRCDIR)/superscalar.hpp \ - $(SRCDIR)/blake2_generator.hpp $(SRCDIR)/reciprocal.h $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h \ - $(SRCDIR)/intrin_portable.h -$(OBJDIR)/jit_compiler_x86.o: $(SRCDIR)/jit_compiler_x86.cpp $(SRCDIR)/jit_compiler_x86.hpp \ - $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ - $(SRCDIR)/jit_compiler_x86_static.hpp $(SRCDIR)/superscalar.hpp \ - $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/blake2_generator.hpp \ - $(SRCDIR)/program.hpp $(SRCDIR)/reciprocal.h $(SRCDIR)/virtual_memory.hpp \ - $(SRCDIR)/instruction_weights.hpp -$(OBJDIR)/jit_compiler_x86_static.o: $(SRCDIR)/jit_compiler_x86_static.S $(SRCDIR)/configuration.h \ - $(SRCDIR)/asm/program_prologue_linux.inc $(SRCDIR)/asm/program_xmm_constants.inc \ - $(SRCDIR)/asm/program_loop_load.inc $(SRCDIR)/asm/program_read_dataset.inc \ - $(SRCDIR)/asm/program_read_dataset_sshash_init.inc \ - $(SRCDIR)/asm/program_read_dataset_sshash_fin.inc \ - $(SRCDIR)/asm/program_loop_store.inc $(SRCDIR)/asm/program_epilogue_linux.inc \ - $(SRCDIR)/asm/program_epilogue_store.inc $(SRCDIR)/asm/program_sshash_load.inc \ - $(SRCDIR)/asm/program_sshash_prefetch.inc $(SRCDIR)/asm/program_sshash_constants.inc \ - $(SRCDIR)/asm/randomx_reciprocal.inc -$(OBJDIR)/soft_aes.o: $(SRCDIR)/soft_aes.cpp $(SRCDIR)/soft_aes.h $(SRCDIR)/intrin_portable.h -$(OBJDIR)/virtual_memory.o: $(SRCDIR)/virtual_memory.cpp $(SRCDIR)/virtual_memory.hpp -$(OBJDIR)/vm_interpreted.o: $(SRCDIR)/vm_interpreted.cpp $(SRCDIR)/vm_interpreted.hpp \ - $(SRCDIR)/bytecode_machine.hpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ - $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/virtual_machine.hpp \ - $(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/instruction_weights.hpp \ - $(SRCDIR)/intrin_portable.h $(SRCDIR)/allocator.hpp $(SRCDIR)/dataset.hpp \ - $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/reciprocal.h -$(OBJDIR)/allocator.o: $(SRCDIR)/allocator.cpp $(SRCDIR)/allocator.hpp $(SRCDIR)/intrin_portable.h \ - $(SRCDIR)/virtual_memory.hpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ - $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h -$(OBJDIR)/assembly_generator_x86.o: $(SRCDIR)/assembly_generator_x86.cpp \ - $(SRCDIR)/assembly_generator_x86.hpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ - $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/reciprocal.h $(SRCDIR)/program.hpp \ - $(SRCDIR)/instruction.hpp $(SRCDIR)/superscalar.hpp $(SRCDIR)/superscalar_program.hpp \ - $(SRCDIR)/blake2_generator.hpp $(SRCDIR)/instruction_weights.hpp -$(OBJDIR)/instruction.o: $(SRCDIR)/instruction.cpp $(SRCDIR)/instruction.hpp \ - $(SRCDIR)/blake2/endian.h $(SRCDIR)/common.hpp $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ - $(SRCDIR)/instruction_weights.hpp -$(OBJDIR)/randomx.o: $(SRCDIR)/randomx.cpp $(SRCDIR)/randomx.h $(SRCDIR)/dataset.hpp $(SRCDIR)/common.hpp \ - $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/superscalar_program.hpp \ - $(SRCDIR)/instruction.hpp $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/allocator.hpp \ - $(SRCDIR)/vm_interpreted.hpp $(SRCDIR)/virtual_machine.hpp $(SRCDIR)/program.hpp \ - $(SRCDIR)/intrin_portable.h $(SRCDIR)/vm_interpreted_light.hpp $(SRCDIR)/vm_compiled.hpp \ - $(SRCDIR)/vm_compiled_light.hpp $(SRCDIR)/blake2/blake2.h $(SRCDIR)/bytecode_machine.hpp -$(OBJDIR)/superscalar.o: $(SRCDIR)/superscalar.cpp $(SRCDIR)/configuration.h $(SRCDIR)/program.hpp \ - $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/randomx.h $(SRCDIR)/instruction.hpp \ - $(SRCDIR)/superscalar.hpp $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/blake2_generator.hpp \ - $(SRCDIR)/intrin_portable.h $(SRCDIR)/reciprocal.h -$(OBJDIR)/vm_compiled.o: $(SRCDIR)/vm_compiled.cpp $(SRCDIR)/vm_compiled.hpp \ - $(SRCDIR)/virtual_machine.hpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ - $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp \ - $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/allocator.hpp $(SRCDIR)/dataset.hpp \ - $(SRCDIR)/superscalar_program.hpp -$(OBJDIR)/vm_interpreted_light.o: $(SRCDIR)/vm_interpreted_light.cpp \ - $(SRCDIR)/vm_interpreted_light.hpp $(SRCDIR)/vm_interpreted.hpp $(SRCDIR)/common.hpp \ - $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ - $(SRCDIR)/virtual_machine.hpp $(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp \ - $(SRCDIR)/intrin_portable.h $(SRCDIR)/allocator.hpp $(SRCDIR)/dataset.hpp \ - $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/jit_compiler_x86.hpp \ - $(SRCDIR)/bytecode_machine.hpp -$(OBJDIR)/argon2_core.o: $(SRCDIR)/argon2_core.c $(SRCDIR)/argon2_core.h $(SRCDIR)/argon2.h \ - $(SRCDIR)/blake2/blake2.h $(SRCDIR)/blake2/blake2-impl.h $(SRCDIR)/blake2/endian.h -$(OBJDIR)/blake2_generator.o: $(SRCDIR)/blake2_generator.cpp $(SRCDIR)/blake2/blake2.h \ - $(SRCDIR)/blake2/endian.h $(SRCDIR)/blake2_generator.hpp -$(OBJDIR)/instructions_portable.o: $(SRCDIR)/instructions_portable.cpp $(SRCDIR)/common.hpp \ - $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ - $(SRCDIR)/intrin_portable.h -$(OBJDIR)/reciprocal.o: $(SRCDIR)/reciprocal.c $(SRCDIR)/reciprocal.h -$(OBJDIR)/virtual_machine.o: $(SRCDIR)/virtual_machine.cpp $(SRCDIR)/virtual_machine.hpp \ - $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ - $(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/aes_hash.hpp $(SRCDIR)/blake2/blake2.h \ - $(SRCDIR)/intrin_portable.h $(SRCDIR)/allocator.hpp -$(OBJDIR)/vm_compiled_light.o: $(SRCDIR)/vm_compiled_light.cpp $(SRCDIR)/vm_compiled_light.hpp \ - $(SRCDIR)/vm_compiled.hpp $(SRCDIR)/virtual_machine.hpp $(SRCDIR)/common.hpp \ - $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/program.hpp \ - $(SRCDIR)/instruction.hpp $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/allocator.hpp \ - $(SRCDIR)/dataset.hpp $(SRCDIR)/superscalar_program.hpp - -$(OBJDIR)/%.o: $(SRCDIR)/%.c - $(CC) $(CCFLAGS) -c $< -o $@ - -$(OBJDIR)/%.o: $(SRCDIR)/%.cpp - $(CXX) $(CXXFLAGS) -c $< -o $@ - -$(OBJDIR)/%.o: $(SRCDIR)/%.S - $(CXX) -x assembler-with-cpp -c $< -o $@ - -clean: - rm -f $(BINARIES) $(OBJDIR)/*.o diff --git a/src/common.hpp b/src/common.hpp index 72961ea5..0c504fe8 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -43,6 +43,7 @@ namespace randomx { static_assert((RANDOMX_DATASET_BASE_SIZE & (RANDOMX_DATASET_BASE_SIZE - 1)) == 0, "RANDOMX_DATASET_BASE_SIZE must be a power of 2."); static_assert(RANDOMX_DATASET_BASE_SIZE <= 4294967296ULL, "RANDOMX_DATASET_BASE_SIZE must not exceed 4294967296."); static_assert(RANDOMX_DATASET_EXTRA_SIZE % 64 == 0, "RANDOMX_DATASET_EXTRA_SIZE must be divisible by 64."); + static_assert((uint64_t)RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE <= 17179869184, "Dataset size must not exceed 16 GiB."); static_assert(RANDOMX_PROGRAM_SIZE > 0, "RANDOMX_PROGRAM_SIZE must be greater than 0"); static_assert(RANDOMX_PROGRAM_ITERATIONS > 0, "RANDOMX_PROGRAM_ITERATIONS must be greater than 0"); static_assert(RANDOMX_PROGRAM_COUNT > 0, "RANDOMX_PROGRAM_COUNT must be greater than 0"); @@ -72,7 +73,7 @@ namespace randomx { constexpr uint32_t ArgonBlockSize = 1024; constexpr int ArgonSaltSize = sizeof("" RANDOMX_ARGON_SALT) - 1; constexpr int SuperscalarMaxSize = 3 * RANDOMX_SUPERSCALAR_LATENCY + 2; - constexpr int CacheLineSize = RANDOMX_DATASET_ITEM_SIZE; + constexpr size_t CacheLineSize = RANDOMX_DATASET_ITEM_SIZE; constexpr int ScratchpadSize = RANDOMX_SCRATCHPAD_L3; constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_BASE_SIZE - 1) & ~(CacheLineSize - 1); constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * ArgonBlockSize; diff --git a/src/dataset.hpp b/src/dataset.hpp index 58a67f48..192d63a2 100644 --- a/src/dataset.hpp +++ b/src/dataset.hpp @@ -50,6 +50,10 @@ struct randomx_cache { randomx::DatasetInitFunc* datasetInit; randomx::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES]; std::vector reciprocalCache; + + bool isInitialized() { + return programs[0].getSize() != 0; + } }; //A pointer to a standard-layout struct object points to its initial member diff --git a/src/intrin_portable.h b/src/intrin_portable.h index 14176d92..b4f1b503 100644 --- a/src/intrin_portable.h +++ b/src/intrin_portable.h @@ -53,6 +53,11 @@ constexpr int RoundToZero = 3; #define __SSE2__ 1 #endif +//MSVC doesn't define __AES__ +#if defined(_MSC_VER) && defined(__SSE2__) +#define __AES__ +#endif + //the library "sqrt" function provided by MSVC for x86 targets doesn't give //the correct results, so we have to use inline assembly to call x87 fsqrt directly #if !defined(__SSE2__) @@ -121,9 +126,16 @@ FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) { #define rx_xor_vec_f128 _mm_xor_pd #define rx_and_vec_f128 _mm_and_pd #define rx_or_vec_f128 _mm_or_pd + +#ifdef __AES__ + #define rx_aesenc_vec_i128 _mm_aesenc_si128 #define rx_aesdec_vec_i128 _mm_aesdec_si128 +#define HAVE_AES + +#endif //__AES__ + FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) { return _mm_cvtsi128_si32(a); } @@ -164,7 +176,7 @@ FORCE_INLINE void rx_set_rounding_mode(uint32_t mode) { #include #include #include -#include +#include #undef vector #undef pixel #undef bool @@ -190,7 +202,6 @@ typedef union{ #define rx_aligned_free(a) free(a) #define rx_prefetch_nta(x) - /* Splat 64-bit long long to 2 64-bit long longs */ FORCE_INLINE __m128i vec_splat2sd (int64_t scalar) { return (__m128i) vec_splats (scalar); } @@ -268,6 +279,7 @@ FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { return (rx_vec_f128)vec_or(a,b); } + #if defined(__CRYPTO__) FORCE_INLINE __m128ll vrev(__m128i v){ @@ -291,18 +303,9 @@ FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { __m128ll out = vrev((__m128i)__builtin_crypto_vncipher(_v,zero)); return (rx_vec_i128)vec_xor((__m128i)out,rkey); } -#else -static const char* platformError = "Platform doesn't support hardware AES"; - -FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { - throw std::runtime_error(platformError); -} - -FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { - throw std::runtime_error(platformError); -} -#endif +#define HAVE_AES +#endif //__CRYPTO__ FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) { vec_u _a; @@ -506,16 +509,6 @@ FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { return x; } -static const char* platformError = "Platform doesn't support hardware AES"; - -FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { - throw std::runtime_error(platformError); -} - -FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { - throw std::runtime_error(platformError); -} - FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) { return a.u32[0]; } @@ -591,6 +584,20 @@ void rx_set_rounding_mode(uint32_t mode); #endif +#ifndef HAVE_AES +static const char* platformError = "Platform doesn't support hardware AES"; + +#include + +FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { + throw std::runtime_error(platformError); +} + +FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { + throw std::runtime_error(platformError); +} +#endif + double loadDoublePortable(const void* addr); uint64_t mulh(uint64_t, uint64_t); int64_t smulh(int64_t, int64_t); diff --git a/src/randomx.cpp b/src/randomx.cpp index 8598e6e1..6feafdec 100644 --- a/src/randomx.cpp +++ b/src/randomx.cpp @@ -33,13 +33,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "vm_compiled.hpp" #include "vm_compiled_light.hpp" #include "blake2/blake2.h" +#include extern "C" { randomx_cache *randomx_alloc_cache(randomx_flags flags) { - randomx_cache *cache = new randomx_cache(); + randomx_cache *cache; try { + cache = new randomx_cache(); switch (flags & (RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES)) { case RANDOMX_FLAG_DEFAULT: cache->dealloc = &randomx::deallocCache; @@ -78,26 +80,32 @@ extern "C" { } } catch (std::exception &ex) { - randomx_release_cache(cache); - cache = nullptr; + if (cache != nullptr) { + randomx_release_cache(cache); + cache = nullptr; + } } return cache; } void randomx_init_cache(randomx_cache *cache, const void *key, size_t keySize) { + assert(cache != nullptr); + assert(keySize == 0 || key != nullptr); cache->initialize(cache, key, keySize); } void randomx_release_cache(randomx_cache* cache) { + assert(cache != nullptr); cache->dealloc(cache); delete cache; } randomx_dataset *randomx_alloc_dataset(randomx_flags flags) { - randomx_dataset *dataset = new randomx_dataset(); + randomx_dataset *dataset; try { + dataset = new randomx_dataset(); if (flags & RANDOMX_FLAG_LARGE_PAGES) { dataset->dealloc = &randomx::deallocDataset; dataset->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::DatasetSize); @@ -108,31 +116,45 @@ extern "C" { } } catch (std::exception &ex) { - randomx_release_dataset(dataset); - dataset = nullptr; + if (dataset != nullptr) { + randomx_release_dataset(dataset); + dataset = nullptr; + } } return dataset; } + constexpr unsigned long DatasetItemCount = randomx::DatasetSize / RANDOMX_DATASET_ITEM_SIZE; + unsigned long randomx_dataset_item_count() { - return randomx::DatasetSize / RANDOMX_DATASET_ITEM_SIZE; + return DatasetItemCount; } void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount) { + assert(dataset != nullptr); + assert(cache != nullptr); + assert(startItem < DatasetItemCount && itemCount <= DatasetItemCount); + assert(startItem + itemCount <= DatasetItemCount); cache->datasetInit(cache, dataset->memory + startItem * randomx::CacheLineSize, startItem, startItem + itemCount); } void *randomx_get_dataset_memory(randomx_dataset *dataset) { + assert(dataset != nullptr); return dataset->memory; } void randomx_release_dataset(randomx_dataset *dataset) { + assert(dataset != nullptr); dataset->dealloc(dataset); delete dataset; } randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset) { + assert(cache != nullptr || (flags & RANDOMX_FLAG_FULL_MEM)); + assert(cache == nullptr || cache->isInitialized()); + assert(dataset != nullptr || !(flags & RANDOMX_FLAG_FULL_MEM)); + randomx_vm *vm = nullptr; try { @@ -222,25 +244,35 @@ extern "C" { } void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache) { + assert(machine != nullptr); + assert(cache != nullptr && cache->isInitialized()); machine->setCache(cache); } void randomx_vm_set_dataset(randomx_vm *machine, randomx_dataset *dataset) { + assert(machine != nullptr); + assert(dataset != nullptr); machine->setDataset(dataset); } void randomx_destroy_vm(randomx_vm *machine) { + assert(machine != nullptr); delete machine; } void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output) { + assert(machine != nullptr); + assert(inputSize == 0 || input != nullptr); + assert(output != nullptr); alignas(16) uint64_t tempHash[8]; - blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0); + int blakeResult = blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0); + assert(blakeResult == 0); machine->initScratchpad(&tempHash); machine->resetRoundingMode(); for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) { machine->run(&tempHash); - blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0); + blakeResult = blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0); + assert(blakeResult == 0); } machine->run(&tempHash); machine->getFinalResult(output, RANDOMX_HASH_SIZE); diff --git a/src/reciprocal.c b/src/reciprocal.c index 5552821a..22620f53 100644 --- a/src/reciprocal.c +++ b/src/reciprocal.c @@ -26,6 +26,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include #include "reciprocal.h" /* @@ -45,6 +46,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ uint64_t randomx_reciprocal(uint64_t divisor) { + assert(divisor != 0); + const uint64_t p2exp63 = 1ULL << 63; uint64_t quotient = p2exp63 / divisor, remainder = p2exp63 % divisor; diff --git a/src/superscalar_program.hpp b/src/superscalar_program.hpp index 38c2ae47..7bcd484b 100644 --- a/src/superscalar_program.hpp +++ b/src/superscalar_program.hpp @@ -57,7 +57,11 @@ namespace randomx { } Instruction programBuffer[SuperscalarMaxSize]; - uint32_t size; + uint32_t size +#ifndef NDEBUG + = 0 +#endif + ; int addrReg; double ipc; int codeSize; diff --git a/src/tests/benchmark.cpp b/src/tests/benchmark.cpp index 21ec5865..c7f1fcfe 100644 --- a/src/tests/benchmark.cpp +++ b/src/tests/benchmark.cpp @@ -204,7 +204,10 @@ int main(int argc, char** argv) { try { if (jit && !RANDOMX_HAVE_COMPILER) { - throw std::runtime_error("JIT compilation is not supported on this platform"); + throw std::runtime_error("JIT compilation is not supported on this platform. Try without --jit"); + } + if (!jit && RANDOMX_HAVE_COMPILER) { + std::cout << "WARNING: You are using the interpreter mode. Use --jit for optimal performance." << std::endl; } Stopwatch sw(true); @@ -243,7 +246,13 @@ int main(int argc, char** argv) { for (int i = 0; i < threadCount; ++i) { randomx_vm *vm = randomx_create_vm(flags, cache, dataset); if (vm == nullptr) { - throw std::runtime_error("Unsupported virtual machine options"); + if (!softAes) { + throw std::runtime_error("Cannot create VM with the selected options. Try using --softAes"); + } + if (largePages) { + throw std::runtime_error("Cannot create VM with the selected options. Try without --largePages"); + } + throw std::runtime_error("Cannot create VM"); } vms.push_back(vm); } diff --git a/src/tests/tests.cpp b/src/tests/tests.cpp index 7e45f81f..ba5806d5 100644 --- a/src/tests/tests.cpp +++ b/src/tests/tests.cpp @@ -118,6 +118,26 @@ int main() { } }); + runTest("randomx_reciprocal", true, []() { + assert(randomx_reciprocal(3) == 12297829382473034410U); + assert(randomx_reciprocal(13) == 11351842506898185609U); + assert(randomx_reciprocal(33) == 17887751829051686415U); + assert(randomx_reciprocal(65537) == 18446462603027742720U); + assert(randomx_reciprocal(15000001) == 10316166306300415204U); + assert(randomx_reciprocal(3845182035) == 10302264209224146340U); + assert(randomx_reciprocal(0xffffffff) == 9223372039002259456U); + }); + + runTest("randomx_reciprocal_fast", RANDOMX_HAVE_FAST_RECIPROCAL, []() { + assert(randomx_reciprocal_fast(3) == 12297829382473034410U); + assert(randomx_reciprocal_fast(13) == 11351842506898185609U); + assert(randomx_reciprocal_fast(33) == 17887751829051686415U); + assert(randomx_reciprocal_fast(65537) == 18446462603027742720U); + assert(randomx_reciprocal_fast(15000001) == 10316166306300415204U); + assert(randomx_reciprocal_fast(3845182035) == 10302264209224146340U); + assert(randomx_reciprocal_fast(0xffffffff) == 9223372039002259456U); + }); + runTest("Dataset initialization (interpreter)", stringsEqual(RANDOMX_ARGON_SALT, "RandomXL\x12"), []() { initCache("test key 000"); uint64_t datasetItem[8]; @@ -154,26 +174,6 @@ int main() { assert(equalsHex(state, "fa89397dd6ca422513aeadba3f124b5540324c4ad4b6db434394307a17c833ab")); }); - runTest("randomx_reciprocal", true, []() { - assert(randomx_reciprocal(3) == 12297829382473034410U); - assert(randomx_reciprocal(13) == 11351842506898185609U); - assert(randomx_reciprocal(33) == 17887751829051686415U); - assert(randomx_reciprocal(65537) == 18446462603027742720U); - assert(randomx_reciprocal(15000001) == 10316166306300415204U); - assert(randomx_reciprocal(3845182035) == 10302264209224146340U); - assert(randomx_reciprocal(0xffffffff) == 9223372039002259456U); - }); - - runTest("randomx_reciprocal_fast", RANDOMX_HAVE_FAST_RECIPROCAL, []() { - assert(randomx_reciprocal_fast(3) == 12297829382473034410U); - assert(randomx_reciprocal_fast(13) == 11351842506898185609U); - assert(randomx_reciprocal_fast(33) == 17887751829051686415U); - assert(randomx_reciprocal_fast(65537) == 18446462603027742720U); - assert(randomx_reciprocal_fast(15000001) == 10316166306300415204U); - assert(randomx_reciprocal_fast(3845182035) == 10302264209224146340U); - assert(randomx_reciprocal_fast(0xffffffff) == 9223372039002259456U); - }); - randomx::NativeRegisterFile reg; randomx::BytecodeMachine decoder; randomx::InstructionByteCode ibc; @@ -1011,6 +1011,7 @@ int main() { cache = randomx_alloc_cache(RANDOMX_FLAG_JIT); currentKey.size = 0; randomx_destroy_vm(vm); + initCache("test key 000"); vm = randomx_create_vm(RANDOMX_FLAG_JIT, cache, nullptr); runTest("Hash test 2a (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomXL\x12"), test_a); diff --git a/vcxproj/randomx-dll.vcxproj b/vcxproj/randomx-dll.vcxproj index e0cf2f34..03770433 100644 --- a/vcxproj/randomx-dll.vcxproj +++ b/vcxproj/randomx-dll.vcxproj @@ -59,6 +59,7 @@ + diff --git a/vcxproj/randomx-dll.vcxproj.filters b/vcxproj/randomx-dll.vcxproj.filters index a30fa8e9..038fb718 100644 --- a/vcxproj/randomx-dll.vcxproj.filters +++ b/vcxproj/randomx-dll.vcxproj.filters @@ -169,5 +169,8 @@ Source Files + + Source Files + \ No newline at end of file