diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/segment-registers-subprocess.asm b/llvm/test/tools/llvm-exegesis/X86/latency/segment-registers-subprocess.asm new file mode 100644 index 0000000000000..5d5219f9375f2 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/X86/latency/segment-registers-subprocess.asm @@ -0,0 +1,29 @@ +# REQUIRES: exegesis-can-measure-latency, x86_64-linux + +# Check that the value of the segment registers is set properly when in +# subprocess mode. + +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -snippets-file=%s -execution-mode=subprocess | FileCheck %s + +# LLVM-EXEGESIS-DEFREG FS 12345600 +# LLVM-EXEGESIS-DEFREG GS 2468ac00 +# LLVM-EXEGESIS-DEFREG R13 0 +# LLVM-EXEGESIS-DEFREG R14 127 +# LLVM-EXEGESIS-DEFREG R15 0 +# LLVM-EXEGESIS-MEM-DEF MEM1 4096 0000000012345600 +# LLVM-EXEGESIS-MEM-DEF MEM2 4096 000000002468ac00 +# LLVM-EXEGESIS-MEM-MAP MEM1 305418240 +# LLVM-EXEGESIS-MEM-MAP MEM2 610836480 + +movq %fs:0, %r13 +cmpq $0x12345600, %r13 +cmovneq %r14, %r15 +movq %gs:0, %r13 +cmpq $0x2468ac00, %r13 +cmovneq %r14, %r15 + +movq $60, %rax +movq %r15, %rdi +syscall + +# CHECK-NOT: error: 'Child benchmarking process exited with non-zero exit code: Child process returned with unknown exit code' diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp index 2c2d1adb0fcf0..537417c7a0796 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -39,6 +39,9 @@ #endif #ifdef __linux__ +#ifdef __x86_64__ +#include +#endif // __x86_64__ #include #include #include @@ -907,9 +910,90 @@ void ExegesisX86Target::decrementLoopCounterAndJump( .addImm(X86::COND_NE); } +void generateRegisterStackPush(unsigned int Register, + std::vector &GeneratedCode) { + GeneratedCode.push_back(MCInstBuilder(X86::PUSH64r).addReg(Register)); +} + +void generateRegisterStackPop(unsigned int Register, + std::vector &GeneratedCode) { + GeneratedCode.push_back(MCInstBuilder(X86::POP64r).addReg(Register)); +} + +void generateSyscall(long SyscallNumber, std::vector &GeneratedCode) { + GeneratedCode.push_back( + loadImmediate(X86::RAX, 64, APInt(64, SyscallNumber))); + GeneratedCode.push_back(MCInstBuilder(X86::SYSCALL)); +} + +constexpr std::array SyscallArgumentRegisters{ + X86::RDI, X86::RSI, X86::RDX, X86::R10, X86::R8, X86::R9}; + +static void saveSyscallRegisters(std::vector &GeneratedCode, + unsigned ArgumentCount) { + assert(ArgumentCount <= 6 && + "System calls only X86-64 Linux can only take six arguments"); + // Preserve RCX and R11 (Clobbered by the system call). + generateRegisterStackPush(X86::RCX, GeneratedCode); + generateRegisterStackPush(X86::R11, GeneratedCode); + // Preserve RAX (used for the syscall number/return value). + generateRegisterStackPush(X86::RAX, GeneratedCode); + // Preserve the registers used to pass arguments to the system call. + for (unsigned I = 0; I < ArgumentCount; ++I) + generateRegisterStackPush(SyscallArgumentRegisters[I], GeneratedCode); +} + +static void restoreSyscallRegisters(std::vector &GeneratedCode, + unsigned ArgumentCount) { + assert(ArgumentCount <= 6 && + "System calls only X86-64 Linux can only take six arguments"); + // Restore the argument registers, in the opposite order of the way they are + // saved. + for (unsigned I = ArgumentCount; I > 0; --I) { + generateRegisterStackPop(SyscallArgumentRegisters[I - 1], GeneratedCode); + } + generateRegisterStackPop(X86::RAX, GeneratedCode); + generateRegisterStackPop(X86::R11, GeneratedCode); + generateRegisterStackPop(X86::RCX, GeneratedCode); +} + +static std::vector loadImmediateSegmentRegister(unsigned Reg, + const APInt &Value) { +#ifdef __x86_64__ + assert(Value.getBitWidth() <= 64 && "Value must fit in the register."); + std::vector loadSegmentRegisterCode; + // Preserve the syscall registers here as we don't + // want to make any assumptions about the ordering of what registers are + // loaded in first, and we might have already loaded in registers that we are + // going to be clobbering here. + saveSyscallRegisters(loadSegmentRegisterCode, 2); + // Generate the instructions to make the arch_prctl system call to set + // the registers. + int SyscallCode = 0; + if (Reg == X86::FS) + SyscallCode = ARCH_SET_FS; + else if (Reg == X86::GS) + SyscallCode = ARCH_SET_GS; + else + llvm_unreachable("Only the segment registers GS and FS are supported"); + loadSegmentRegisterCode.push_back( + loadImmediate(X86::RDI, 64, APInt(64, SyscallCode))); + loadSegmentRegisterCode.push_back(loadImmediate(X86::RSI, 64, Value)); + generateSyscall(SYS_arch_prctl, loadSegmentRegisterCode); + // Restore the registers in reverse order + restoreSyscallRegisters(loadSegmentRegisterCode, 2); + return loadSegmentRegisterCode; +#else + llvm_unreachable("Loading immediate segment registers is only supported with " + "x86-64 llvm-exegesis"); +#endif +} + std::vector ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI, unsigned Reg, const APInt &Value) const { + if (X86::SEGMENT_REGRegClass.contains(Reg)) + return loadImmediateSegmentRegister(Reg, Value); if (X86::GR8RegClass.contains(Reg)) return {loadImmediate(Reg, 8, Value)}; if (X86::GR16RegClass.contains(Reg)) @@ -992,12 +1076,6 @@ static constexpr const intptr_t VAddressSpaceCeiling = 0xC0000000; static constexpr const intptr_t VAddressSpaceCeiling = 0x0000800000000000; #endif -void generateSyscall(long SyscallNumber, std::vector &GeneratedCode) { - GeneratedCode.push_back( - loadImmediate(X86::RAX, 64, APInt(64, SyscallNumber))); - GeneratedCode.push_back(MCInstBuilder(X86::SYSCALL)); -} - void generateRoundToNearestPage(unsigned int Register, std::vector &GeneratedCode) { int PageSizeShift = static_cast(round(log2(getpagesize()))); @@ -1157,29 +1235,11 @@ intptr_t ExegesisX86Target::getAuxiliaryMemoryStartAddress() const { return VAddressSpaceCeiling - 2 * getpagesize(); } -void generateRegisterStackPush(unsigned int Register, - std::vector &GeneratedCode) { - GeneratedCode.push_back(MCInstBuilder(X86::PUSH64r).addReg(Register)); -} - -void generateRegisterStackPop(unsigned int Register, - std::vector &GeneratedCode) { - GeneratedCode.push_back(MCInstBuilder(X86::POP64r).addReg(Register)); -} - std::vector ExegesisX86Target::configurePerfCounter(long Request, bool SaveRegisters) const { std::vector ConfigurePerfCounterCode; - if(SaveRegisters) { - // Preserve RAX, RDI, and RSI by pushing them to the stack. - generateRegisterStackPush(X86::RAX, ConfigurePerfCounterCode); - generateRegisterStackPush(X86::RDI, ConfigurePerfCounterCode); - generateRegisterStackPush(X86::RSI, ConfigurePerfCounterCode); - // RCX and R11 will get clobbered by the syscall instruction, so save them - // as well. - generateRegisterStackPush(X86::RCX, ConfigurePerfCounterCode); - generateRegisterStackPush(X86::R11, ConfigurePerfCounterCode); - } + if (SaveRegisters) + saveSyscallRegisters(ConfigurePerfCounterCode, 2); ConfigurePerfCounterCode.push_back( loadImmediate(X86::RDI, 64, APInt(64, getAuxiliaryMemoryStartAddress()))); ConfigurePerfCounterCode.push_back(MCInstBuilder(X86::MOV32rm) @@ -1192,15 +1252,8 @@ ExegesisX86Target::configurePerfCounter(long Request, bool SaveRegisters) const ConfigurePerfCounterCode.push_back( loadImmediate(X86::RSI, 64, APInt(64, Request))); generateSyscall(SYS_ioctl, ConfigurePerfCounterCode); - if(SaveRegisters) { - // Restore R11 then RCX - generateRegisterStackPop(X86::R11, ConfigurePerfCounterCode); - generateRegisterStackPop(X86::RCX, ConfigurePerfCounterCode); - // Restore RAX, RDI, and RSI, in reverse order. - generateRegisterStackPop(X86::RSI, ConfigurePerfCounterCode); - generateRegisterStackPop(X86::RDI, ConfigurePerfCounterCode); - generateRegisterStackPop(X86::RAX, ConfigurePerfCounterCode); - } + if (SaveRegisters) + restoreSyscallRegisters(ConfigurePerfCounterCode, 2); return ConfigurePerfCounterCode; }