Skip to content

Commit

Permalink
Reland "[llvm-exegesis] Add support for loading X86 segment registers (
Browse files Browse the repository at this point in the history
…#76368)"

This reverts commit 8b48507.

This relands commit 7c38348.

This caused build failures on non-x86-64 builders as there was no
preprocessor logic around the newly included headers. This has been
fixed in the relanded patch.
  • Loading branch information
boomanaiden154 committed Dec 29, 2023
1 parent 64f0681 commit 5cc7402
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 35 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# REQUIRES: exegesis-can-measure-latency, x86_64-linux

# Check that the value of the segment registers is set properly when in
# subprocess mode.

# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -snippets-file=%s -execution-mode=subprocess | FileCheck %s

# LLVM-EXEGESIS-DEFREG FS 12345600
# LLVM-EXEGESIS-DEFREG GS 2468ac00
# LLVM-EXEGESIS-DEFREG R13 0
# LLVM-EXEGESIS-DEFREG R14 127
# LLVM-EXEGESIS-DEFREG R15 0
# LLVM-EXEGESIS-MEM-DEF MEM1 4096 0000000012345600
# LLVM-EXEGESIS-MEM-DEF MEM2 4096 000000002468ac00
# LLVM-EXEGESIS-MEM-MAP MEM1 305418240
# LLVM-EXEGESIS-MEM-MAP MEM2 610836480

movq %fs:0, %r13
cmpq $0x12345600, %r13
cmovneq %r14, %r15
movq %gs:0, %r13
cmpq $0x2468ac00, %r13
cmovneq %r14, %r15

movq $60, %rax
movq %r15, %rdi
syscall

# CHECK-NOT: error: 'Child benchmarking process exited with non-zero exit code: Child process returned with unknown exit code'
123 changes: 88 additions & 35 deletions llvm/tools/llvm-exegesis/lib/X86/Target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
#endif

#ifdef __linux__
#ifdef __x86_64__
#include <asm/prctl.h>
#endif // __x86_64__
#include <sys/mman.h>
#include <sys/syscall.h>
#include <unistd.h>
Expand Down Expand Up @@ -907,9 +910,90 @@ void ExegesisX86Target::decrementLoopCounterAndJump(
.addImm(X86::COND_NE);
}

void generateRegisterStackPush(unsigned int Register,
std::vector<MCInst> &GeneratedCode) {
GeneratedCode.push_back(MCInstBuilder(X86::PUSH64r).addReg(Register));
}

void generateRegisterStackPop(unsigned int Register,
std::vector<MCInst> &GeneratedCode) {
GeneratedCode.push_back(MCInstBuilder(X86::POP64r).addReg(Register));
}

void generateSyscall(long SyscallNumber, std::vector<MCInst> &GeneratedCode) {
GeneratedCode.push_back(
loadImmediate(X86::RAX, 64, APInt(64, SyscallNumber)));
GeneratedCode.push_back(MCInstBuilder(X86::SYSCALL));
}

constexpr std::array<unsigned, 6> SyscallArgumentRegisters{
X86::RDI, X86::RSI, X86::RDX, X86::R10, X86::R8, X86::R9};

static void saveSyscallRegisters(std::vector<MCInst> &GeneratedCode,
unsigned ArgumentCount) {
assert(ArgumentCount <= 6 &&
"System calls only X86-64 Linux can only take six arguments");
// Preserve RCX and R11 (Clobbered by the system call).
generateRegisterStackPush(X86::RCX, GeneratedCode);
generateRegisterStackPush(X86::R11, GeneratedCode);
// Preserve RAX (used for the syscall number/return value).
generateRegisterStackPush(X86::RAX, GeneratedCode);
// Preserve the registers used to pass arguments to the system call.
for (unsigned I = 0; I < ArgumentCount; ++I)
generateRegisterStackPush(SyscallArgumentRegisters[I], GeneratedCode);
}

static void restoreSyscallRegisters(std::vector<MCInst> &GeneratedCode,
unsigned ArgumentCount) {
assert(ArgumentCount <= 6 &&
"System calls only X86-64 Linux can only take six arguments");
// Restore the argument registers, in the opposite order of the way they are
// saved.
for (unsigned I = ArgumentCount; I > 0; --I) {
generateRegisterStackPop(SyscallArgumentRegisters[I - 1], GeneratedCode);
}
generateRegisterStackPop(X86::RAX, GeneratedCode);
generateRegisterStackPop(X86::R11, GeneratedCode);
generateRegisterStackPop(X86::RCX, GeneratedCode);
}

static std::vector<MCInst> loadImmediateSegmentRegister(unsigned Reg,
const APInt &Value) {
#ifdef __x86_64__
assert(Value.getBitWidth() <= 64 && "Value must fit in the register.");
std::vector<MCInst> loadSegmentRegisterCode;
// Preserve the syscall registers here as we don't
// want to make any assumptions about the ordering of what registers are
// loaded in first, and we might have already loaded in registers that we are
// going to be clobbering here.
saveSyscallRegisters(loadSegmentRegisterCode, 2);
// Generate the instructions to make the arch_prctl system call to set
// the registers.
int SyscallCode = 0;
if (Reg == X86::FS)
SyscallCode = ARCH_SET_FS;
else if (Reg == X86::GS)
SyscallCode = ARCH_SET_GS;
else
llvm_unreachable("Only the segment registers GS and FS are supported");
loadSegmentRegisterCode.push_back(
loadImmediate(X86::RDI, 64, APInt(64, SyscallCode)));
loadSegmentRegisterCode.push_back(loadImmediate(X86::RSI, 64, Value));
generateSyscall(SYS_arch_prctl, loadSegmentRegisterCode);
// Restore the registers in reverse order
restoreSyscallRegisters(loadSegmentRegisterCode, 2);
return loadSegmentRegisterCode;
#else
llvm_unreachable("Loading immediate segment registers is only supported with "
"x86-64 llvm-exegesis");
#endif
}

std::vector<MCInst> ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI,
unsigned Reg,
const APInt &Value) const {
if (X86::SEGMENT_REGRegClass.contains(Reg))
return loadImmediateSegmentRegister(Reg, Value);
if (X86::GR8RegClass.contains(Reg))
return {loadImmediate(Reg, 8, Value)};
if (X86::GR16RegClass.contains(Reg))
Expand Down Expand Up @@ -992,12 +1076,6 @@ static constexpr const intptr_t VAddressSpaceCeiling = 0xC0000000;
static constexpr const intptr_t VAddressSpaceCeiling = 0x0000800000000000;
#endif

void generateSyscall(long SyscallNumber, std::vector<MCInst> &GeneratedCode) {
GeneratedCode.push_back(
loadImmediate(X86::RAX, 64, APInt(64, SyscallNumber)));
GeneratedCode.push_back(MCInstBuilder(X86::SYSCALL));
}

void generateRoundToNearestPage(unsigned int Register,
std::vector<MCInst> &GeneratedCode) {
int PageSizeShift = static_cast<int>(round(log2(getpagesize())));
Expand Down Expand Up @@ -1157,29 +1235,11 @@ intptr_t ExegesisX86Target::getAuxiliaryMemoryStartAddress() const {
return VAddressSpaceCeiling - 2 * getpagesize();
}

void generateRegisterStackPush(unsigned int Register,
std::vector<MCInst> &GeneratedCode) {
GeneratedCode.push_back(MCInstBuilder(X86::PUSH64r).addReg(Register));
}

void generateRegisterStackPop(unsigned int Register,
std::vector<MCInst> &GeneratedCode) {
GeneratedCode.push_back(MCInstBuilder(X86::POP64r).addReg(Register));
}

std::vector<MCInst>
ExegesisX86Target::configurePerfCounter(long Request, bool SaveRegisters) const {
std::vector<MCInst> ConfigurePerfCounterCode;
if(SaveRegisters) {
// Preserve RAX, RDI, and RSI by pushing them to the stack.
generateRegisterStackPush(X86::RAX, ConfigurePerfCounterCode);
generateRegisterStackPush(X86::RDI, ConfigurePerfCounterCode);
generateRegisterStackPush(X86::RSI, ConfigurePerfCounterCode);
// RCX and R11 will get clobbered by the syscall instruction, so save them
// as well.
generateRegisterStackPush(X86::RCX, ConfigurePerfCounterCode);
generateRegisterStackPush(X86::R11, ConfigurePerfCounterCode);
}
if (SaveRegisters)
saveSyscallRegisters(ConfigurePerfCounterCode, 2);
ConfigurePerfCounterCode.push_back(
loadImmediate(X86::RDI, 64, APInt(64, getAuxiliaryMemoryStartAddress())));
ConfigurePerfCounterCode.push_back(MCInstBuilder(X86::MOV32rm)
Expand All @@ -1192,15 +1252,8 @@ ExegesisX86Target::configurePerfCounter(long Request, bool SaveRegisters) const
ConfigurePerfCounterCode.push_back(
loadImmediate(X86::RSI, 64, APInt(64, Request)));
generateSyscall(SYS_ioctl, ConfigurePerfCounterCode);
if(SaveRegisters) {
// Restore R11 then RCX
generateRegisterStackPop(X86::R11, ConfigurePerfCounterCode);
generateRegisterStackPop(X86::RCX, ConfigurePerfCounterCode);
// Restore RAX, RDI, and RSI, in reverse order.
generateRegisterStackPop(X86::RSI, ConfigurePerfCounterCode);
generateRegisterStackPop(X86::RDI, ConfigurePerfCounterCode);
generateRegisterStackPop(X86::RAX, ConfigurePerfCounterCode);
}
if (SaveRegisters)
restoreSyscallRegisters(ConfigurePerfCounterCode, 2);
return ConfigurePerfCounterCode;
}

Expand Down

0 comments on commit 5cc7402

Please sign in to comment.