Skip to content

Commit

Permalink
[PR][BOLT][Instrumentation] Optimize eflags load/store
Browse files Browse the repository at this point in the history
Summary:
This commit uses reviews.llvm.org/D6629 as a reference to optimize
X86::EFLAGS load/store in the instrumentation snippet by using lahf/sahf
instructions instead of pushf/popf.

(cherry picked from FBD31662303)
  • Loading branch information
yavtuk authored and maksfb committed Oct 11, 2021
1 parent 443f1b4 commit 85ffa8e
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 12 deletions.
7 changes: 7 additions & 0 deletions bolt/include/bolt/Core/MCPlusBuilder.h
Expand Up @@ -428,6 +428,13 @@ class MCPlusBuilder {
llvm_unreachable("not implemented");
}

/// Create increment contents of target by 1 for Instrumentation
virtual void createInstrIncMemory(std::vector<MCInst> &Instrs,
const MCSymbol *Target, MCContext *Ctx,
bool IsLeaf) const {
llvm_unreachable("not implemented");
}

/// Return a register number that is guaranteed to not match with
/// any real register on the underlying architecture.
virtual MCPhysReg getNoRegister() const {
Expand Down
13 changes: 1 addition & 12 deletions bolt/lib/Passes/Instrumentation.cpp
Expand Up @@ -177,18 +177,7 @@ Instrumentation::createInstrumentationSnippet(BinaryContext &BC, bool IsLeaf) {
Label = BC.Ctx->createNamedTempSymbol("InstrEntry");
Summary->Counters.emplace_back(Label);
std::vector<MCInst> CounterInstrs;
CounterInstrs.resize(IsLeaf? 5 : 3);
uint32_t I = 0;
// Don't clobber application red zone (ABI dependent)
if (IsLeaf)
BC.MIB->createStackPointerIncrement(CounterInstrs[I++], 128,
/*NoFlagsClobber=*/true);
BC.MIB->createPushFlags(CounterInstrs[I++], 2);
BC.MIB->createIncMemory(CounterInstrs[I++], Label, &*BC.Ctx);
BC.MIB->createPopFlags(CounterInstrs[I++], 2);
if (IsLeaf)
BC.MIB->createStackPointerDecrement(CounterInstrs[I++], 128,
/*NoFlagsClobber=*/true);
BC.MIB->createInstrIncMemory(CounterInstrs, Label, &*BC.Ctx, IsLeaf);
return CounterInstrs;
}

Expand Down
84 changes: 84 additions & 0 deletions bolt/lib/Target/X86/X86MCPlusBuilder.cpp
Expand Up @@ -3509,6 +3509,90 @@ class X86MCPlusBuilder : public MCPlusBuilder {
return createPopRegister(Inst, X86::EFLAGS, Size);
}

void createAddRegImm(MCInst &Inst, MCPhysReg Reg, int64_t Value,
unsigned Size) const {
unsigned int Opcode;
switch (Size) {
case 1: Opcode = X86::ADD8ri; break;
case 2: Opcode = X86::ADD16ri; break;
case 4: Opcode = X86::ADD32ri; break;
default:
llvm_unreachable("Unexpected size");
}
Inst.setOpcode(Opcode);
Inst.clear();
Inst.addOperand(MCOperand::createReg(Reg));
Inst.addOperand(MCOperand::createReg(Reg));
Inst.addOperand(MCOperand::createImm(Value));
}

void createClearRegWithNoEFlagsUpdate(MCInst &Inst, MCPhysReg Reg,
unsigned Size) const {
unsigned int Opcode;
switch (Size) {
case 1: Opcode = X86::MOV8ri; break;
case 2: Opcode = X86::MOV16ri; break;
case 4: Opcode = X86::MOV32ri; break;
case 8: Opcode = X86::MOV64ri; break;
default:
llvm_unreachable("Unexpected size");
}
Inst.setOpcode(Opcode);
Inst.clear();
Inst.addOperand(MCOperand::createReg(Reg));
Inst.addOperand(MCOperand::createImm(0));
}

void createX86SaveOVFlagToRegister(MCInst &Inst, MCPhysReg Reg) const {
Inst.setOpcode(X86::SETCCr);
Inst.clear();
Inst.addOperand(MCOperand::createReg(Reg));
Inst.addOperand(MCOperand::createImm(X86::COND_O));
}

void createX86Lahf(MCInst &Inst) const {
Inst.setOpcode(X86::LAHF);
Inst.clear();
}

void createX86Sahf(MCInst &Inst) const {
Inst.setOpcode(X86::SAHF);
Inst.clear();
}

void createInstrIncMemory(std::vector<MCInst> &Instrs, const MCSymbol *Target,
MCContext *Ctx, bool IsLeaf) const override {
unsigned int I = 0;

Instrs.resize(IsLeaf ? 13 : 11);
// Don't clobber application red zone (ABI dependent)
if (IsLeaf)
createStackPointerIncrement(Instrs[I++], 128,
/*NoFlagsClobber=*/true);

// Performance improvements based on the optimization discussed at
// https://reviews.llvm.org/D6629
// LAHF/SAHF are used instead of PUSHF/POPF
// PUSHF
createPushRegister(Instrs[I++], X86::RAX, 8);
createClearRegWithNoEFlagsUpdate(Instrs[I++], X86::RAX, 8);
createX86Lahf(Instrs[I++]);
createPushRegister(Instrs[I++], X86::RAX, 8);
createClearRegWithNoEFlagsUpdate(Instrs[I++], X86::RAX, 8);
createX86SaveOVFlagToRegister(Instrs[I++], X86::AL);
// LOCK INC
createIncMemory(Instrs[I++], Target, Ctx);
// POPF
createAddRegImm(Instrs[I++], X86::AL, 127, 1);
createPopRegister(Instrs[I++], X86::RAX, 8);
createX86Sahf(Instrs[I++]);
createPopRegister(Instrs[I++], X86::RAX, 8);

if (IsLeaf)
createStackPointerDecrement(Instrs[I], 128,
/*NoFlagsClobber=*/true);
}

void createSwap(MCInst &Inst, MCPhysReg Source, MCPhysReg MemBaseReg,
int64_t Disp) const {
Inst.setOpcode(X86::XCHG64rm);
Expand Down

0 comments on commit 85ffa8e

Please sign in to comment.