Skip to content

Commit caea37b

Browse files
committed
Revert "[X86][AMX] Try to hoist AMX shapes' def"
This reverts commit 9011856. Reason: Broke the MSan buildbots. https://lab.llvm.org/buildbot/#/builders/5/builds/6967/steps/9/logs/stdio More details can be found in the original phabricator review: https://reviews.llvm.org/D101067
1 parent e10d7d4 commit caea37b

File tree

2 files changed

+17
-68
lines changed

2 files changed

+17
-68
lines changed

llvm/lib/Target/X86/X86PreTileConfig.cpp

Lines changed: 17 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,6 @@ struct MIRef {
5757
++I, ++Pos)
5858
MI = &*I;
5959
}
60-
MIRef(MachineInstr *MI)
61-
: MI(MI), MBB(MI->getParent()),
62-
Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {}
6360
MIRef(MachineInstr *MI, MachineBasicBlock *MBB)
6461
: MI(MI), MBB(MBB),
6562
Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {}
@@ -69,7 +66,6 @@ struct MIRef {
6966
bool operator==(const MIRef &RHS) const {
7067
return MI == RHS.MI && MBB == RHS.MBB;
7168
}
72-
bool operator!=(const MIRef &RHS) const { return !(*this == RHS); }
7369
bool operator<(const MIRef &RHS) const {
7470
return MBB < RHS.MBB || (MBB == RHS.MBB && Pos < RHS.Pos);
7571
}
@@ -81,7 +77,7 @@ struct MIRef {
8177
struct BBInfo {
8278
MIRef FirstAMX;
8379
MIRef LastCall;
84-
bool HasAMXRegLiveIn = false;
80+
MIRef LastShape;
8581
bool TileCfgForbidden = false;
8682
bool NeedTileCfgLiveIn = false;
8783
};
@@ -90,8 +86,8 @@ class X86PreTileConfig : public MachineFunctionPass {
9086
MachineRegisterInfo *MRI;
9187
const MachineLoopInfo *MLI;
9288
SmallSet<MachineInstr *, 8> DefVisited;
89+
SmallSet<MachineBasicBlock *, 8> ShapeBBs;
9390
DenseMap<MachineBasicBlock *, BBInfo> BBVisitedInfo;
94-
DenseMap<MachineBasicBlock *, SmallVector<MIRef, 8>> ShapeBBs;
9591

9692
/// Check if the callee will clobber AMX registers.
9793
bool isDestructiveCall(MachineInstr &MI, BitVector UsableRegs) {
@@ -128,33 +124,6 @@ class X86PreTileConfig : public MachineFunctionPass {
128124
/// Collect the shape def information for later use.
129125
void collectShapeInfo(MachineInstr &MI);
130126

131-
/// Try to hoist shapes definded below AMX instructions.
132-
bool hoistShapesInBB(MachineBasicBlock *MBB) {
133-
auto FirstShapeBelowAMX =
134-
llvm::lower_bound(ShapeBBs[MBB], BBVisitedInfo[MBB].FirstAMX);
135-
auto InsertPoint = BBVisitedInfo[MBB].FirstAMX.MI->getIterator();
136-
for (auto I = FirstShapeBelowAMX, E = ShapeBBs[MBB].end(); I != E; ++I) {
137-
// Do not hoist instructions that access memory.
138-
if (I->MI->mayLoadOrStore())
139-
return false;
140-
for (auto &MO : I->MI->operands()) {
141-
if (MO.isDef())
142-
continue;
143-
// Do not hoist instructions if the sources' def under AMX instruction.
144-
// TODO: We can handle isMoveImmediate MI here.
145-
if (MO.isReg() &&
146-
MIRef(MRI->getVRegDef(MO.getReg())) > BBVisitedInfo[MBB].FirstAMX)
147-
return false;
148-
// TODO: Maybe need more checks here.
149-
}
150-
MBB->insert(InsertPoint, I->MI->removeFromParent());
151-
}
152-
// We only need to mark the last shape in the BB now.
153-
ShapeBBs[MBB].clear();
154-
ShapeBBs[MBB].push_back(MIRef(&*--InsertPoint, MBB));
155-
return true;
156-
}
157-
158127
public:
159128
X86PreTileConfig() : MachineFunctionPass(ID) {}
160129

@@ -196,9 +165,9 @@ INITIALIZE_PASS_END(X86PreTileConfig, "tilepreconfig",
196165
void X86PreTileConfig::collectShapeInfo(MachineInstr &MI) {
197166
auto RecordShape = [&](MachineInstr *MI, MachineBasicBlock *MBB) {
198167
MIRef MIR(MI, MBB);
199-
auto I = llvm::lower_bound(ShapeBBs[MBB], MIR);
200-
if (*I != MIR)
201-
ShapeBBs[MBB].insert(I, MIR);
168+
if (BBVisitedInfo[MBB].LastShape < MIR)
169+
BBVisitedInfo[MBB].LastShape = MIR;
170+
ShapeBBs.insert(MBB);
202171
};
203172

204173
SmallVector<Register, 8> WorkList(
@@ -260,10 +229,6 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
260229
else
261230
CfgLiveInBBs.push_back(&MBB);
262231
}
263-
if (BBVisitedInfo[&MBB].FirstAMX || BBVisitedInfo[&MBB].HasAMXRegLiveIn)
264-
for (auto *Succ : MBB.successors())
265-
if (!isLoopBackEdge(Succ, &MBB))
266-
BBVisitedInfo[Succ].HasAMXRegLiveIn = true;
267232
}
268233

269234
// Update NeedTileCfgLiveIn for predecessors.
@@ -287,17 +252,8 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
287252
return false;
288253

289254
// Avoid to insert ldtilecfg before any shape defs.
290-
SmallVector<MachineBasicBlock *, 8> WorkList;
291-
for (auto &I : ShapeBBs) {
292-
// TODO: We can hoist shapes across BBs here.
293-
if (BBVisitedInfo[I.first].HasAMXRegLiveIn)
294-
REPORT_CONFIG_FAIL
295-
if (BBVisitedInfo[I.first].FirstAMX &&
296-
BBVisitedInfo[I.first].FirstAMX < ShapeBBs[I.first].back() &&
297-
!hoistShapesInBB(I.first))
298-
REPORT_CONFIG_FAIL
299-
WorkList.push_back(I.first);
300-
}
255+
SmallVector<MachineBasicBlock *, 8> WorkList(
256+
make_range(ShapeBBs.begin(), ShapeBBs.end()));
301257
while (!WorkList.empty()) {
302258
MachineBasicBlock *MBB = WorkList.pop_back_val();
303259
for (auto *Pred : MBB->predecessors()) {
@@ -326,6 +282,9 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
326282
} else {
327283
// Avoid the BB to be multi visited.
328284
VisitedOrInserted.insert(I);
285+
// We cannot sink it across any AMX instruction.
286+
if (BBVisitedInfo[I.MBB].FirstAMX)
287+
REPORT_CONFIG_FAIL;
329288
// Sink the inserting point along the chain with NeedTileCfgLiveIn =
330289
// true when MBB isn't all shapes reachable.
331290
for (auto *Succ : I.MBB->successors())
@@ -337,9 +296,14 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
337296

338297
// A given point might be forked due to shape conditions are not met.
339298
for (MIRef I : InsertPoints) {
299+
// Even MBB is all shapes reachable, we still need to check if there's
300+
// AMX that intersects with shapes in the same MBB.
301+
if (BBVisitedInfo[I.MBB].FirstAMX &&
302+
BBVisitedInfo[I.MBB].FirstAMX < BBVisitedInfo[I.MBB].LastShape)
303+
REPORT_CONFIG_FAIL;
340304
// Make sure we insert ldtilecfg after the last shape def in MBB.
341-
if (ShapeBBs.count(I.MBB) && I < ShapeBBs[I.MBB].back())
342-
I = ShapeBBs[I.MBB].back();
305+
if (I < BBVisitedInfo[I.MBB].LastShape)
306+
I = BBVisitedInfo[I.MBB].LastShape;
343307
// There're chances the MBB is sunk more than once. Record it to avoid
344308
// multi insert.
345309
if (VisitedOrInserted.insert(I).second) {

llvm/test/CodeGen/X86/AMX/amx-sched.ll

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
define <256 x i32> @test_shape_sched(i16 %m, i16 %n, i16 %k, <256 x i32> %c, <256 x i32> %a, <256 x i32> %b) nounwind {
44
; Just to make sure shape def is not scheduled across ldtilecfg.
5-
; CHECK-LABEL: test_shape_sched:
65
; CHECK: ldtilecfg
76
; CHECK-NOT: movw
87
%c1 = bitcast <256 x i32> %c to x86_amx
@@ -13,19 +12,5 @@ define <256 x i32> @test_shape_sched(i16 %m, i16 %n, i16 %k, <256 x i32> %c, <25
1312
ret <256 x i32> %res
1413
}
1514

16-
define <256 x i32> @test_shape_sched2(i16 %m, i16 %n, i16 %k, i8* %c, i8* %a, i8* %b) nounwind {
17-
; Just to make sure shape def is not scheduled across ldtilecfg.
18-
; CHECK-LABEL: test_shape_sched2:
19-
; CHECK: ldtilecfg
20-
; CHECK-NOT: movw
21-
%aa = lshr i16 %k, 2
22-
%c1 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %m, i16 %n, i8* %c, i64 64)
23-
%a1 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %m, i16 %k, i8* %a, i64 64)
24-
%b1 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %aa, i16 %n, i8* %b, i64 64)
25-
%t = call x86_amx @llvm.x86.tdpbssd.internal(i16 %m, i16 %n, i16 %k, x86_amx %c1, x86_amx %a1, x86_amx %b1)
26-
%res = bitcast x86_amx %t to <256 x i32>
27-
ret <256 x i32> %res
28-
}
2915

30-
declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, i8*, i64)
3116
declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)

0 commit comments

Comments
 (0)