Skip to content

Commit

Permalink
[llvm-exegesis] Add uop computation for more X87 instruction classes.
Browse files Browse the repository at this point in the history
Summary:
This allows measuring comparisons (UCOM_FpIr32,UCOM_Fpr32,...),
conditional moves (CMOVBE_Fp32,...)

Reviewers: gchatelet

Subscribers: tschuett, llvm-commits

Differential Revision: https://reviews.llvm.org/D48713

llvm-svn: 336352
  • Loading branch information
legrosbuffle committed Jul 5, 2018
1 parent 9e987e0 commit f9a0bb3
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 70 deletions.
13 changes: 11 additions & 2 deletions llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
Expand Up @@ -196,8 +196,8 @@ BenchmarkRunner::writeObjectFile(const BenchmarkConfiguration::Setup &Setup,
return ResultPath.str();
}

llvm::Expected<SnippetPrototype> BenchmarkRunner::generateSelfAliasingPrototype(
const Instruction &Instr) const {
llvm::Expected<SnippetPrototype>
BenchmarkRunner::generateSelfAliasingPrototype(const Instruction &Instr) const {
const AliasingConfigurations SelfAliasing(Instr, Instr);
if (SelfAliasing.empty()) {
return llvm::make_error<BenchmarkFailure>("empty self aliasing");
Expand All @@ -217,4 +217,13 @@ llvm::Expected<SnippetPrototype> BenchmarkRunner::generateSelfAliasingPrototype(
return std::move(Prototype);
}

llvm::Expected<SnippetPrototype>
BenchmarkRunner::generateUnconstrainedPrototype(const Instruction &Instr,
llvm::StringRef Msg) const {
SnippetPrototype Prototype;
Prototype.Explanation =
llvm::formatv("{0}, repeating an unconstrained assignment", Msg);
Prototype.Snippet.emplace_back(Instr);
return std::move(Prototype);
}
} // namespace exegesis
5 changes: 5 additions & 0 deletions llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
Expand Up @@ -70,8 +70,13 @@ class BenchmarkRunner {
const LLVMState &State;
const RegisterAliasingTrackerCache RATC;

// Generates a single instruction prototype that has a self-dependency.
llvm::Expected<SnippetPrototype>
generateSelfAliasingPrototype(const Instruction &Instr) const;
// Generates a single instruction prototype without assignment constraints.
llvm::Expected<SnippetPrototype>
generateUnconstrainedPrototype(const Instruction &Instr,
llvm::StringRef Msg) const;

private:
// API to be implemented by subclasses.
Expand Down
10 changes: 2 additions & 8 deletions llvm/tools/llvm-exegesis/lib/Uops.cpp
Expand Up @@ -139,16 +139,10 @@ UopsBenchmarkRunner::generatePrototype(unsigned Opcode) const {
const Instruction Instr(InstrDesc, RATC);
const AliasingConfigurations SelfAliasing(Instr, Instr);
if (SelfAliasing.empty()) {
SnippetPrototype Prototype;
Prototype.Explanation = "instruction is parallel, repeating a random one.";
Prototype.Snippet.emplace_back(Instr);
return std::move(Prototype);
return generateUnconstrainedPrototype(Instr, "instruction is parallel");
}
if (SelfAliasing.hasImplicitAliasing()) {
SnippetPrototype Prototype;
Prototype.Explanation = "instruction is serial, repeating a random one.";
Prototype.Snippet.emplace_back(Instr);
return std::move(Prototype);
return generateUnconstrainedPrototype(Instr, "instruction is serial");
}
const auto TiedVariables = getTiedVariables(Instr);
if (!TiedVariables.empty()) {
Expand Down
127 changes: 67 additions & 60 deletions llvm/tools/llvm-exegesis/lib/X86/Target.cpp
Expand Up @@ -44,9 +44,9 @@ template <typename Impl> class X86BenchmarkRunner : public Impl {
case llvm::X86II::NotFP:
break;
case llvm::X86II::ZeroArgFP:
return Impl::handleZeroArgFP(Instr);
return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
case llvm::X86II::OneArgFP:
return Impl::handleOneArgFP(Instr); // fstp ST(0)
return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
case llvm::X86II::OneArgFPRW:
case llvm::X86II::TwoArgFP: {
// These are instructions like
Expand All @@ -61,7 +61,7 @@ template <typename Impl> class X86BenchmarkRunner : public Impl {
case llvm::X86II::CondMovFP:
return Impl::handleCondMovFP(Instr);
case llvm::X86II::SpecialFP:
return Impl::handleSpecialFP(Instr);
return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
default:
llvm_unreachable("Unknown FP Type!");
}
Expand All @@ -76,50 +76,30 @@ class X86LatencyImpl : public LatencyBenchmarkRunner {
using Base = LatencyBenchmarkRunner;
using Base::Base;
llvm::Expected<SnippetPrototype>
handleZeroArgFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
}
llvm::Expected<SnippetPrototype>
handleOneArgFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
}
llvm::Expected<SnippetPrototype>
handleCompareFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
}
llvm::Expected<SnippetPrototype>
handleCondMovFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
}
llvm::Expected<SnippetPrototype>
handleSpecialFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
}
};

class X86UopsImpl : public UopsBenchmarkRunner {
protected:
using Base = UopsBenchmarkRunner;
using Base::Base;
llvm::Expected<SnippetPrototype>
handleZeroArgFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
}
llvm::Expected<SnippetPrototype>
handleOneArgFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
}
// We can compute uops for any FP instruction that does not grow or shrink the
// stack (either do not touch the stack or push as much as they pop).
llvm::Expected<SnippetPrototype>
handleCompareFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
return generateUnconstrainedPrototype(
Instr, "instruction does not grow/shrink the FP stack");
}
llvm::Expected<SnippetPrototype>
handleCondMovFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
}
llvm::Expected<SnippetPrototype>
handleSpecialFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
return generateUnconstrainedPrototype(
Instr, "instruction does not grow/shrink the FP stack");
}
};

Expand Down Expand Up @@ -163,6 +143,15 @@ class ExegesisX86Target : public ExegesisTarget {
llvm::X86::RFP64RegClass.contains(Reg) ||
llvm::X86::RFP80RegClass.contains(Reg))
return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m);
if (Reg == llvm::X86::EFLAGS) {
// Set all flags to 0 but the bits that are "reserved and set to 1".
constexpr const uint32_t kImmValue = 0x00007002u;
std::vector<llvm::MCInst> Result;
Result.push_back(allocateStackSpace(8));
Result.push_back(fillStackSpace(llvm::X86::MOV64mi32, 0, kImmValue));
Result.push_back(llvm::MCInstBuilder(llvm::X86::POPF64)); // Also pops.
return Result;
}
return {};
}

Expand Down Expand Up @@ -193,41 +182,59 @@ class ExegesisX86Target : public ExegesisTarget {
// value that has set bits for all byte values and is a normal float/
// double. 0x40404040 is ~32.5 when interpreted as a double and ~3.0f when
// interpreted as a float.
constexpr const uint64_t kImmValue = 0x40404040ull;
constexpr const uint32_t kImmValue = 0x40404040u;
std::vector<llvm::MCInst> Result;
// Allocate scratch memory on the stack.
Result.push_back(llvm::MCInstBuilder(llvm::X86::SUB64ri8)
.addReg(llvm::X86::RSP)
.addReg(llvm::X86::RSP)
.addImm(RegSizeBytes));
// Fill scratch memory.
for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += 4) {
Result.push_back(llvm::MCInstBuilder(llvm::X86::MOV32mi)
// Address = ESP
.addReg(llvm::X86::RSP) // BaseReg
.addImm(1) // ScaleAmt
.addReg(0) // IndexReg
.addImm(Disp) // Disp
.addReg(0) // Segment
// Immediate.
.addImm(kImmValue));
Result.push_back(allocateStackSpace(RegSizeBytes));
constexpr const unsigned kMov32NumBytes = 4;
for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += kMov32NumBytes) {
Result.push_back(fillStackSpace(llvm::X86::MOV32mi, Disp, kImmValue));
}
// Load Reg from scratch memory.
Result.push_back(llvm::MCInstBuilder(RMOpcode)
.addReg(Reg)
// Address = ESP
.addReg(llvm::X86::RSP) // BaseReg
.addImm(1) // ScaleAmt
.addReg(0) // IndexReg
.addImm(0) // Disp
.addReg(0)); // Segment
// Release scratch memory.
Result.push_back(llvm::MCInstBuilder(llvm::X86::ADD64ri8)
.addReg(llvm::X86::RSP)
.addReg(llvm::X86::RSP)
.addImm(RegSizeBytes));
Result.push_back(loadToReg(Reg, RMOpcode));
Result.push_back(releaseStackSpace(RegSizeBytes));
return Result;
}

// Allocates scratch memory on the stack.
static llvm::MCInst allocateStackSpace(unsigned Bytes) {
return llvm::MCInstBuilder(llvm::X86::SUB64ri8)
.addReg(llvm::X86::RSP)
.addReg(llvm::X86::RSP)
.addImm(Bytes);
}

// Fills scratch memory at offset `OffsetBytes` with value `Imm`.
static llvm::MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes,
uint64_t Imm) {
return llvm::MCInstBuilder(MovOpcode)
// Address = ESP
.addReg(llvm::X86::RSP) // BaseReg
.addImm(1) // ScaleAmt
.addReg(0) // IndexReg
.addImm(OffsetBytes) // Disp
.addReg(0) // Segment
// Immediate.
.addImm(Imm);
}

// Loads scratch memory into register `Reg` using opcode `RMOpcode`.
static llvm::MCInst loadToReg(unsigned Reg, unsigned RMOpcode) {
return llvm::MCInstBuilder(RMOpcode)
.addReg(Reg)
// Address = ESP
.addReg(llvm::X86::RSP) // BaseReg
.addImm(1) // ScaleAmt
.addReg(0) // IndexReg
.addImm(0) // Disp
.addReg(0); // Segment
}

// Releases scratch memory.
static llvm::MCInst releaseStackSpace(unsigned Bytes) {
return llvm::MCInstBuilder(llvm::X86::ADD64ri8)
.addReg(llvm::X86::RSP)
.addReg(llvm::X86::RSP)
.addImm(Bytes);
}
};

} // namespace
Expand Down

0 comments on commit f9a0bb3

Please sign in to comment.