Skip to content

Commit

Permalink
[llvm-exegesis] AliasingConfigurations: pay attention to forbidden …
Browse files Browse the repository at this point in the history
…registers

When trying to measure latency of certain opcodes, e.g.
`./bin/llvm-exegesis --opcode-name=BT32ri8 --mode=latency --repetition-mode=loop  --benchmarks-file=- --max-configs-per-opcode=65536`,
we'd pick such an aliasing instruction, and such an aliasing registers,
that would alias with forbidden registers.

And in particular with loop counter in `loop` repetition mode,
which made the measurements never finish.
This does not address all such cases, only the most obvious one.

The added test case fails without the patch.

Fixes #59441
  • Loading branch information
LebedevRI committed Dec 20, 2022
1 parent bf94eac commit a5b5631
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 16 deletions.
10 changes: 6 additions & 4 deletions llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
Expand Up @@ -351,10 +351,12 @@ bool AliasingConfigurations::hasImplicitAliasing() const {
}

AliasingConfigurations::AliasingConfigurations(
const Instruction &DefInstruction, const Instruction &UseInstruction) {
if (UseInstruction.AllUseRegs.anyCommon(DefInstruction.AllDefRegs)) {
auto CommonRegisters = UseInstruction.AllUseRegs;
CommonRegisters &= DefInstruction.AllDefRegs;
const Instruction &DefInstruction, const Instruction &UseInstruction,
const BitVector &ForbiddenRegisters) {
auto CommonRegisters = UseInstruction.AllUseRegs;
CommonRegisters &= DefInstruction.AllDefRegs;
CommonRegisters.reset(ForbiddenRegisters);
if (!CommonRegisters.empty()) {
for (const MCPhysReg Reg : CommonRegisters.set_bits()) {
AliasingRegisterOperands ARO;
addOperandIfAlias(Reg, true, DefInstruction.Operands, ARO.Defs);
Expand Down
3 changes: 2 additions & 1 deletion llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
Expand Up @@ -217,7 +217,8 @@ struct AliasingRegisterOperands {
// to alias with Use registers of UseInstruction.
struct AliasingConfigurations {
AliasingConfigurations(const Instruction &DefInstruction,
const Instruction &UseInstruction);
const Instruction &UseInstruction,
const BitVector &ForbiddenRegisters);

bool empty() const; // True if no aliasing configuration is found.
bool hasImplicitAliasing() const;
Expand Down
2 changes: 1 addition & 1 deletion llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp
Expand Up @@ -301,7 +301,7 @@ ParallelSnippetGenerator::generateCodeTemplates(
? State.getExegesisTarget().getScratchMemoryRegister(
State.getTargetMachine().getTargetTriple())
: 0;
const AliasingConfigurations SelfAliasing(Instr, Instr);
const AliasingConfigurations SelfAliasing(Instr, Instr, ForbiddenRegisters);
if (SelfAliasing.empty()) {
CT.Info = "instruction is parallel, repeating a random one.";
CT.Instructions.push_back(std::move(Variant));
Expand Down
9 changes: 5 additions & 4 deletions llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp
Expand Up @@ -115,8 +115,8 @@ static void appendCodeTemplates(const LLVMState &State,
case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: {
// Making the execution of this instruction serial by selecting one def
// register to alias with one use register.
const AliasingConfigurations SelfAliasing(Variant.getInstr(),
Variant.getInstr());
const AliasingConfigurations SelfAliasing(
Variant.getInstr(), Variant.getInstr(), ForbiddenRegisters);
assert(!SelfAliasing.empty() && !SelfAliasing.hasImplicitAliasing() &&
"Instr must alias itself explicitly");
// This is a self aliasing instruction so defs and uses are from the same
Expand All @@ -134,8 +134,9 @@ static void appendCodeTemplates(const LLVMState &State,
// Select back-to-back non-memory instruction.
for (const auto *OtherInstr : computeAliasingInstructions(
State, &Instr, kMaxAliasingInstructions, ForbiddenRegisters)) {
const AliasingConfigurations Forward(Instr, *OtherInstr);
const AliasingConfigurations Back(*OtherInstr, Instr);
const AliasingConfigurations Forward(Instr, *OtherInstr,
ForbiddenRegisters);
const AliasingConfigurations Back(*OtherInstr, Instr, ForbiddenRegisters);
InstructionTemplate ThisIT(Variant);
InstructionTemplate OtherIT(OtherInstr);
if (!Forward.hasImplicitAliasing())
Expand Down
7 changes: 4 additions & 3 deletions llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
Expand Up @@ -140,9 +140,10 @@ std::vector<RegisterValue> SnippetGenerator::computeRegisterInitialValues(
}

Expected<std::vector<CodeTemplate>>
generateSelfAliasingCodeTemplates(InstructionTemplate Variant) {
const AliasingConfigurations SelfAliasing(Variant.getInstr(),
Variant.getInstr());
generateSelfAliasingCodeTemplates(InstructionTemplate Variant,
const BitVector &ForbiddenRegisters) {
const AliasingConfigurations SelfAliasing(
Variant.getInstr(), Variant.getInstr(), ForbiddenRegisters);
if (SelfAliasing.empty())
return make_error<SnippetGeneratorFailure>("empty self aliasing");
std::vector<CodeTemplate> Result;
Expand Down
3 changes: 2 additions & 1 deletion llvm/tools/llvm-exegesis/lib/SnippetGenerator.h
Expand Up @@ -35,7 +35,8 @@ std::vector<CodeTemplate> getSingleton(CodeTemplate &&CT);

// Generates code templates that has a self-dependency.
Expected<std::vector<CodeTemplate>>
generateSelfAliasingCodeTemplates(InstructionTemplate Variant);
generateSelfAliasingCodeTemplates(InstructionTemplate Variant,
const BitVector &ForbiddenRegisters);

// Generates code templates without assignment constraints.
Expected<std::vector<CodeTemplate>>
Expand Down
4 changes: 2 additions & 2 deletions llvm/tools/llvm-exegesis/lib/X86/Target.cpp
Expand Up @@ -370,7 +370,7 @@ X86SerialSnippetGenerator::generateCodeTemplates(
// - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
// - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
// They are intrinsically serial and do not modify the state of the stack.
return generateSelfAliasingCodeTemplates(Variant);
return generateSelfAliasingCodeTemplates(Variant, ForbiddenRegisters);
default:
llvm_unreachable("Unknown FP Type!");
}
Expand Down Expand Up @@ -426,7 +426,7 @@ X86ParallelSnippetGenerator::generateCodeTemplates(
// - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
// They are intrinsically serial and do not modify the state of the stack.
// We generate the same code for latency and uops.
return generateSelfAliasingCodeTemplates(Variant);
return generateSelfAliasingCodeTemplates(Variant, ForbiddenRegisters);
case X86II::CompareFP:
case X86II::CondMovFP:
// We can compute uops for any FP instruction that does not grow or shrink
Expand Down
33 changes: 33 additions & 0 deletions llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp
Expand Up @@ -157,6 +157,39 @@ TEST_F(X86SerialSnippetGeneratorTest,
consumeError(std::move(Error));
}

TEST_F(X86SerialSnippetGeneratorTest,
ImplicitSelfDependencyThroughExplicitRegsForbidAlmostAll) {
// - VXORPSrr
// - Op0 Explicit Def RegClass(VR128)
// - Op1 Explicit Use RegClass(VR128)
// - Op2 Explicit Use RegClass(VR128)
// - Var0 [Op0]
// - Var1 [Op1]
// - Var2 [Op2]
// - hasAliasingRegisters
const unsigned Opcode = X86::VXORPSrr;
randomGenerator().seed(0); // Initialize seed.
const Instruction &Instr = State.getIC().getInstr(Opcode);
auto ForbiddenRegisters = State.getRATC().emptyRegisters();
ForbiddenRegisters.flip();
ForbiddenRegisters.reset(X86::XMM0);
auto Error = Generator.generateCodeTemplates(&Instr, ForbiddenRegisters);
EXPECT_FALSE((bool)Error.takeError());
auto CodeTemplates = std::move(Error.get());
ASSERT_THAT(CodeTemplates, SizeIs(Gt(0U))) << "Templates are available";
for (const auto &CT : CodeTemplates) {
EXPECT_THAT(CT.Execution, ExecutionMode::SERIAL_VIA_EXPLICIT_REGS);
ASSERT_THAT(CT.Instructions, SizeIs(1));
const InstructionTemplate &IT = CT.Instructions[0];
EXPECT_THAT(IT.getOpcode(), Opcode);
ASSERT_THAT(IT.getVariableValues(), SizeIs(3));
for (const auto &Var : IT.getVariableValues()) {
if (Var.isReg())
EXPECT_FALSE(ForbiddenRegisters[Var.getReg()]);
}
}
}

TEST_F(X86SerialSnippetGeneratorTest, DependencyThroughOtherOpcode) {
// - CMP64rr
// - Op0 Explicit Use RegClass(GR64)
Expand Down

0 comments on commit a5b5631

Please sign in to comment.