Skip to content

Commit 03512ae

Browse files
committed
[exegesis][X86] ParallelSnippetGenerator: don't accidentally create serialized instructions
In the case of no tied variables, we pick random defs, and then random uses that don't alias with defs we just picked. Sounds good, except that an X86 instruction may have implicit reg uses, e.g. for `MULX` it's `EDX`/`RDX`: `Intel SDM, 4-162 Vol. 2B MULX — Unsigned Multiply Without Affecting Flags` > Performs an unsigned multiplication of the implicit source operand (EDX/RDX) and the specified source operand > (the third operand) and stores the low half of the result in the second destination (second operand), the high half > of the result in the first destination operand (first operand), without reading or writing the arithmetic flags. And indeed, every once in a while `llvm-exegesis` happened to pick EDX as a def while measuring throughput, and producing garbage output: ``` $ ./bin/llvm-exegesis -num-repetitions=1000000 -mode=inverse_throughput -repetition-mode=min --loop-body-size=4096 -dump-object-to-disk=false -opcode-name=MULX32rr --max-configs-per-opcode=65536 --- mode: inverse_throughput key: instructions: - 'MULX32rr EDX R11D R12D' config: '' register_initial_values: - 'R12D=0x0' - 'EDX=0x0' cpu_name: znver3 llvm_triple: x86_64-unknown-linux-gnu num_repetitions: 1000000 measurements: - { key: inverse_throughput, value: 4.00014, per_snippet_value: 4.00014 } error: '' info: instruction has no tied variables picking Uses different from defs assembled_snippet: 415441BC00000000BA00000000C4C223F6D4C4C223F6D4C4C223F6D4C4C223F6D4415CC3415441BC00000000BA0000000049B80200000000000000C4C223F6D4C4C223F6D44983C0FF75F0415CC3 ... ``` ``` $ ./bin/llvm-exegesis -num-repetitions=1000000 -mode=inverse_throughput -repetition-mode=min --loop-body-size=4096 -dump-object-to-disk=false -opcode-name=MULX32rr --max-configs-per-opcode=65536 --- mode: inverse_throughput key: instructions: - 'MULX32rr R13D EDX ECX' config: '' register_initial_values: - 'ECX=0x0' - 'EDX=0x0' cpu_name: znver3 llvm_triple: x86_64-unknown-linux-gnu num_repetitions: 1000000 measurements: - { key: inverse_throughput, value: 3.00013, per_snippet_value: 3.00013 } error: '' info: instruction has no tied variables picking Uses different from defs assembled_snippet: 4155B900000000BA00000000C4626BF6E9C4626BF6E9C4626BF6E9C4626BF6E9415DC34155B900000000BA0000000049B80200000000000000C4626BF6E9C4626BF6E94983C0FF75F0415DC3 ... ``` Oops! Not only does that not look fun, i did hit that pitfail during AMD Zen 3 enablement. While i have since then addressed this in rGd4d459e7475b4bb0d15280f12ed669342fa5edcd, i suspect there may be other buggy results lying around, so we should at least stop producing them. Reviewed By: courbet Differential Revision: https://reviews.llvm.org/D109275
1 parent c33e296 commit 03512ae

File tree

2 files changed

+59
-25
lines changed

2 files changed

+59
-25
lines changed

llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,25 +186,59 @@ ParallelSnippetGenerator::generateCodeTemplates(
186186
return getSingleton(std::move(CT));
187187
}
188188
// No tied variables, we pick random values for defs.
189+
190+
// We don't want to accidentally serialize the instruction,
191+
// so we must be sure that we don't pick a def that is an implicit use,
192+
// or a use that is an implicit def, so record implicit regs now.
193+
BitVector ImplicitUses(State.getRegInfo().getNumRegs());
194+
BitVector ImplicitDefs(State.getRegInfo().getNumRegs());
195+
for (const auto &Op : Instr.Operands) {
196+
if (Op.isReg() && Op.isImplicit() && !Op.isMemory()) {
197+
assert(Op.isImplicitReg() && "Not an implicit register operand?");
198+
if (Op.isUse())
199+
ImplicitUses.set(Op.getImplicitReg());
200+
else {
201+
assert(Op.isDef() && "Not a use and not a def?");
202+
ImplicitDefs.set(Op.getImplicitReg());
203+
}
204+
}
205+
}
206+
const auto ImplicitUseAliases =
207+
getAliasedBits(State.getRegInfo(), ImplicitUses);
208+
const auto ImplicitDefAliases =
209+
getAliasedBits(State.getRegInfo(), ImplicitDefs);
189210
BitVector Defs(State.getRegInfo().getNumRegs());
190211
for (const auto &Op : Instr.Operands) {
191212
if (Op.isReg() && Op.isExplicit() && Op.isDef() && !Op.isMemory()) {
192213
auto PossibleRegisters = Op.getRegisterAliasing().sourceBits();
193-
// Do not use forbidden registers.
214+
// Do not use forbidden registers and regs that are implicitly used.
215+
// Note that we don't try to avoid using implicit defs explicitly.
194216
remove(PossibleRegisters, ForbiddenRegisters);
195-
assert(PossibleRegisters.any() && "No register left to choose from");
217+
remove(PossibleRegisters, ImplicitUseAliases);
218+
if (!PossibleRegisters.any())
219+
return make_error<StringError>(
220+
Twine("no available registers:\ncandidates:\n")
221+
.concat(debugString(State.getRegInfo(),
222+
Op.getRegisterAliasing().sourceBits()))
223+
.concat("\nforbidden:\n")
224+
.concat(debugString(State.getRegInfo(), ForbiddenRegisters))
225+
.concat("\nimplicit use:\n")
226+
.concat(debugString(State.getRegInfo(), ImplicitUseAliases)),
227+
inconvertibleErrorCode());
196228
const auto RandomReg = randomBit(PossibleRegisters);
197229
Defs.set(RandomReg);
198230
Variant.getValueFor(Op) = MCOperand::createReg(RandomReg);
199231
}
200232
}
201233
// And pick random use values that are not reserved and don't alias with defs.
234+
// Note that we don't try to avoid using implicit uses explicitly.
202235
const auto DefAliases = getAliasedBits(State.getRegInfo(), Defs);
203236
for (const auto &Op : Instr.Operands) {
204237
if (Op.isReg() && Op.isExplicit() && Op.isUse() && !Op.isMemory()) {
205238
auto PossibleRegisters = Op.getRegisterAliasing().sourceBits();
206239
remove(PossibleRegisters, ForbiddenRegisters);
207240
remove(PossibleRegisters, DefAliases);
241+
remove(PossibleRegisters, ImplicitDefAliases);
208242
assert(PossibleRegisters.any() && "No register left to choose from");
209243
const auto RandomReg = randomBit(PossibleRegisters);
210244
Variant.getValueFor(Op) = MCOperand::createReg(RandomReg);

llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -154,29 +154,6 @@ TEST_F(X86SerialSnippetGeneratorTest,
154154
consumeError(std::move(Error));
155155
}
156156

157-
TEST_F(X86SerialSnippetGeneratorTest,
158-
AvoidSerializingThroughImplicitRegisters) {
159-
// MULX32rr implicitly uses EDX. We should not select that register to avoid
160-
// serialization.
161-
const unsigned Opcode = X86::MULX32rr;
162-
randomGenerator().seed(0); // Initialize seed.
163-
const Instruction &Instr = State.getIC().getInstr(Opcode);
164-
// Forbid all registers but RDX/EDX/DX/DH/DL. The only option would be to
165-
// choose that register, but that would serialize the instruction, so we
166-
// should be returning an error.
167-
auto AllRegisters = State.getRATC().emptyRegisters();
168-
AllRegisters.flip();
169-
AllRegisters.reset(X86::RDX);
170-
AllRegisters.reset(X86::EDX);
171-
AllRegisters.reset(X86::DX);
172-
AllRegisters.reset(X86::DH);
173-
AllRegisters.reset(X86::DL);
174-
auto Error =
175-
Generator.generateCodeTemplates(&Instr, AllRegisters).takeError();
176-
// FIXME: EXPECT_TRUE + consumeError(std::move(Error)).
177-
EXPECT_FALSE((bool)Error);
178-
}
179-
180157
TEST_F(X86SerialSnippetGeneratorTest, DependencyThroughOtherOpcode) {
181158
// - CMP64rr
182159
// - Op0 Explicit Use RegClass(GR64)
@@ -385,6 +362,29 @@ TEST_F(X86ParallelSnippetGeneratorTest, MOV16ms) {
385362
testing::HasSubstr("no available registers"));
386363
}
387364

365+
TEST_F(X86ParallelSnippetGeneratorTest,
366+
AvoidSerializingThroughImplicitRegisters) {
367+
// MULX32rr implicitly uses EDX. We should not select that register to avoid
368+
// serialization.
369+
const unsigned Opcode = X86::MULX32rr;
370+
randomGenerator().seed(0); // Initialize seed.
371+
const Instruction &Instr = State.getIC().getInstr(Opcode);
372+
// Forbid all registers but RDX/EDX/DX/DH/DL. The only option would be to
373+
// choose that register, but that would serialize the instruction, so we
374+
// should be returning an error.
375+
auto AllRegisters = State.getRATC().emptyRegisters();
376+
AllRegisters.flip();
377+
AllRegisters.reset(X86::RDX);
378+
AllRegisters.reset(X86::EDX);
379+
AllRegisters.reset(X86::DX);
380+
AllRegisters.reset(X86::DH);
381+
AllRegisters.reset(X86::DL);
382+
auto Err = Generator.generateCodeTemplates(&Instr, AllRegisters);
383+
EXPECT_FALSE((bool)Err);
384+
EXPECT_THAT(toString(Err.takeError()),
385+
testing::HasSubstr("no available registers"));
386+
}
387+
388388
class X86FakeSnippetGenerator : public SnippetGenerator {
389389
public:
390390
X86FakeSnippetGenerator(const LLVMState &State, const Options &Opts)

0 commit comments

Comments
 (0)