Skip to content

Commit

Permalink
Optimized loading from scratchpad
Browse files Browse the repository at this point in the history
  • Loading branch information
SChernykh committed Sep 11, 2019
1 parent f1ff014 commit 93fec18
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 14 deletions.
4 changes: 0 additions & 4 deletions src/asm/program_loop_load.inc
@@ -1,5 +1,3 @@
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
lea rcx, [rsi+rax]
push rcx
xor r8, qword ptr [rcx+0]
Expand All @@ -10,8 +8,6 @@
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
lea rcx, [rsi+rdx]
push rcx
cvtdq2pd xmm0, qword ptr [rcx+0]
Expand Down
1 change: 0 additions & 1 deletion src/asm/program_loop_store.inc
@@ -1,4 +1,3 @@
xor eax, eax
pop rcx
mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9
Expand Down
20 changes: 13 additions & 7 deletions src/jit_compiler_x86.cpp
Expand Up @@ -243,7 +243,7 @@ namespace randomx {
generateProgramPrologue(prog, pcfg);
memcpy(code + codePos, codeReadDataset, readDatasetSize);
codePos += readDatasetSize;
generateProgramEpilogue(prog);
generateProgramEpilogue(prog, pcfg);
}

void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) {
Expand All @@ -254,7 +254,7 @@ namespace randomx {
emitByte(CALL);
emit32(superScalarHashOffset - (codePos + 4));
emit(codeReadDatasetLightSshFin, readDatasetLightFinSize);
generateProgramEpilogue(prog);
generateProgramEpilogue(prog, pcfg);
}

template<size_t N>
Expand Down Expand Up @@ -298,12 +298,13 @@ namespace randomx {
for (unsigned i = 0; i < 8; ++i) {
registerUsage[i] = -1;
}

codePos = ((uint8_t*)randomx_program_prologue_first_load) - ((uint8_t*)randomx_program_prologue);
code[codePos + sizeof(REX_XOR_RAX_R64)] = 0xc0 + pcfg.readReg0;
code[codePos + sizeof(REX_XOR_RAX_R64) * 2 + 1] = 0xc0 + pcfg.readReg1;

codePos = prologueSize;
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
emit(REX_XOR_RAX_R64);
emitByte(0xc0 + pcfg.readReg0);
emit(REX_XOR_RAX_R64);
emitByte(0xc0 + pcfg.readReg1);
memcpy(code + codePos, codeLoopLoad, loopLoadSize);
codePos += loopLoadSize;
for (unsigned i = 0; i < prog.getSize(); ++i) {
Expand All @@ -318,7 +319,12 @@ namespace randomx {
emitByte(0xc0 + pcfg.readReg3);
}

void JitCompilerX86::generateProgramEpilogue(Program& prog) {
void JitCompilerX86::generateProgramEpilogue(Program& prog, ProgramConfiguration& pcfg) {
emit(REX_MOV_RR64);
emitByte(0xc0 + pcfg.readReg0);
emit(REX_XOR_RAX_R64);
emitByte(0xc0 + pcfg.readReg1);
emit((const uint8_t*)&randomx_prefetch_scratchpad, ((uint8_t*)&randomx_prefetch_scratchpad_end) - ((uint8_t*)&randomx_prefetch_scratchpad));
memcpy(code + codePos, codeLoopStore, loopStoreSize);
codePos += loopStoreSize;
emit(SUB_EBX);
Expand Down
2 changes: 1 addition & 1 deletion src/jit_compiler_x86.hpp
Expand Up @@ -73,7 +73,7 @@ namespace randomx {
int32_t codePos;

void generateProgramPrologue(Program&, ProgramConfiguration&);
void generateProgramEpilogue(Program&);
void generateProgramEpilogue(Program&, ProgramConfiguration&);
void genAddressReg(Instruction&, bool);
void genAddressRegDst(Instruction&);
void genAddressImm(Instruction&);
Expand Down
21 changes: 21 additions & 0 deletions src/jit_compiler_x86_static.S
Expand Up @@ -37,7 +37,10 @@
#define WINABI
#endif

.global DECL(randomx_prefetch_scratchpad)
.global DECL(randomx_prefetch_scratchpad_end)
.global DECL(randomx_program_prologue)
.global DECL(randomx_program_prologue_first_load)
.global DECL(randomx_program_loop_begin)
.global DECL(randomx_program_loop_load)
.global DECL(randomx_program_start)
Expand Down Expand Up @@ -65,6 +68,16 @@

#define db .byte

DECL(randomx_prefetch_scratchpad):
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
prefetcht0 [rsi+rax]
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
prefetcht0 [rsi+rdx]

DECL(randomx_prefetch_scratchpad_end):

.balign 64
DECL(randomx_program_prologue):
#if defined(WINABI)
Expand All @@ -75,6 +88,14 @@ DECL(randomx_program_prologue):
movapd xmm13, xmmword ptr [mantissaMask+rip]
movapd xmm14, xmmword ptr [exp240+rip]
movapd xmm15, xmmword ptr [scaleMask+rip]

DECL(randomx_program_prologue_first_load):
xor rax, r8
xor rax, r8
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
jmp DECL(randomx_program_loop_begin)

.balign 64
Expand Down
26 changes: 25 additions & 1 deletion src/jit_compiler_x86_static.asm
Expand Up @@ -28,7 +28,10 @@ IFDEF RAX

_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE

PUBLIC randomx_prefetch_scratchpad
PUBLIC randomx_prefetch_scratchpad_end
PUBLIC randomx_program_prologue
PUBLIC randomx_program_prologue_first_load
PUBLIC randomx_program_loop_begin
PUBLIC randomx_program_loop_load
PUBLIC randomx_program_start
Expand All @@ -54,15 +57,36 @@ RANDOMX_CACHE_MASK EQU (RANDOMX_ARGON_MEMORY*16-1)
RANDOMX_ALIGN EQU 4096
SUPERSCALAR_OFFSET EQU ((((RANDOMX_ALIGN + 32 * RANDOMX_PROGRAM_SIZE) - 1) / (RANDOMX_ALIGN) + 1) * (RANDOMX_ALIGN))

randomx_prefetch_scratchpad PROC
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
prefetcht0 [rsi+rax]
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
prefetcht0 [rsi+rdx]
randomx_prefetch_scratchpad ENDP

randomx_prefetch_scratchpad_end PROC
randomx_prefetch_scratchpad_end ENDP

ALIGN 64
randomx_program_prologue PROC
include asm/program_prologue_win64.inc
movapd xmm13, xmmword ptr [mantissaMask]
movapd xmm14, xmmword ptr [exp240]
movapd xmm15, xmmword ptr [scaleMask]
jmp randomx_program_loop_begin
randomx_program_prologue ENDP

randomx_program_prologue_first_load PROC
xor rax, r8
xor rax, r8
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
jmp randomx_program_loop_begin
randomx_program_prologue_first_load ENDP

ALIGN 64
include asm/program_xmm_constants.inc

Expand Down
3 changes: 3 additions & 0 deletions src/jit_compiler_x86_static.hpp
Expand Up @@ -29,7 +29,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

extern "C" {
void randomx_prefetch_scratchpad();
void randomx_prefetch_scratchpad_end();
void randomx_program_prologue();
void randomx_program_prologue_first_load();
void randomx_program_loop_begin();
void randomx_program_loop_load();
void randomx_program_start();
Expand Down

0 comments on commit 93fec18

Please sign in to comment.