@@ -57,9 +57,6 @@ struct MIRef {
57
57
++I, ++Pos)
58
58
MI = &*I;
59
59
}
60
- MIRef (MachineInstr *MI)
61
- : MI(MI), MBB(MI->getParent ()),
62
- Pos(std::distance(MBB->instr_begin (), ++MI->getIterator())) {}
63
60
MIRef (MachineInstr *MI, MachineBasicBlock *MBB)
64
61
: MI(MI), MBB(MBB),
65
62
Pos (std::distance(MBB->instr_begin (), ++MI->getIterator())) {}
@@ -69,7 +66,6 @@ struct MIRef {
69
66
bool operator ==(const MIRef &RHS) const {
70
67
return MI == RHS.MI && MBB == RHS.MBB ;
71
68
}
72
- bool operator !=(const MIRef &RHS) const { return !(*this == RHS); }
73
69
bool operator <(const MIRef &RHS) const {
74
70
return MBB < RHS.MBB || (MBB == RHS.MBB && Pos < RHS.Pos );
75
71
}
@@ -81,7 +77,7 @@ struct MIRef {
81
77
struct BBInfo {
82
78
MIRef FirstAMX;
83
79
MIRef LastCall;
84
- bool HasAMXRegLiveIn = false ;
80
+ MIRef LastShape ;
85
81
bool TileCfgForbidden = false ;
86
82
bool NeedTileCfgLiveIn = false ;
87
83
};
@@ -90,8 +86,8 @@ class X86PreTileConfig : public MachineFunctionPass {
90
86
MachineRegisterInfo *MRI;
91
87
const MachineLoopInfo *MLI;
92
88
SmallSet<MachineInstr *, 8 > DefVisited;
89
+ SmallSet<MachineBasicBlock *, 8 > ShapeBBs;
93
90
DenseMap<MachineBasicBlock *, BBInfo> BBVisitedInfo;
94
- DenseMap<MachineBasicBlock *, SmallVector<MIRef, 8 >> ShapeBBs;
95
91
96
92
// / Check if the callee will clobber AMX registers.
97
93
bool isDestructiveCall (MachineInstr &MI, BitVector UsableRegs) {
@@ -128,33 +124,6 @@ class X86PreTileConfig : public MachineFunctionPass {
128
124
// / Collect the shape def information for later use.
129
125
void collectShapeInfo (MachineInstr &MI);
130
126
131
- // / Try to hoist shapes definded below AMX instructions.
132
- bool hoistShapesInBB (MachineBasicBlock *MBB) {
133
- auto FirstShapeBelowAMX =
134
- llvm::lower_bound (ShapeBBs[MBB], BBVisitedInfo[MBB].FirstAMX );
135
- auto InsertPoint = BBVisitedInfo[MBB].FirstAMX .MI ->getIterator ();
136
- for (auto I = FirstShapeBelowAMX, E = ShapeBBs[MBB].end (); I != E; ++I) {
137
- // Do not hoist instructions that access memory.
138
- if (I->MI ->mayLoadOrStore ())
139
- return false ;
140
- for (auto &MO : I->MI ->operands ()) {
141
- if (MO.isDef ())
142
- continue ;
143
- // Do not hoist instructions if the sources' def under AMX instruction.
144
- // TODO: We can handle isMoveImmediate MI here.
145
- if (MO.isReg () &&
146
- MIRef (MRI->getVRegDef (MO.getReg ())) > BBVisitedInfo[MBB].FirstAMX )
147
- return false ;
148
- // TODO: Maybe need more checks here.
149
- }
150
- MBB->insert (InsertPoint, I->MI ->removeFromParent ());
151
- }
152
- // We only need to mark the last shape in the BB now.
153
- ShapeBBs[MBB].clear ();
154
- ShapeBBs[MBB].push_back (MIRef (&*--InsertPoint, MBB));
155
- return true ;
156
- }
157
-
158
127
public:
159
128
X86PreTileConfig () : MachineFunctionPass(ID) {}
160
129
@@ -196,9 +165,9 @@ INITIALIZE_PASS_END(X86PreTileConfig, "tilepreconfig",
196
165
void X86PreTileConfig::collectShapeInfo(MachineInstr &MI) {
197
166
auto RecordShape = [&](MachineInstr *MI, MachineBasicBlock *MBB) {
198
167
MIRef MIR (MI, MBB);
199
- auto I = llvm::lower_bound (ShapeBBs [MBB], MIR);
200
- if (*I ! = MIR)
201
- ShapeBBs[MBB] .insert (I, MIR );
168
+ if (BBVisitedInfo [MBB]. LastShape < MIR)
169
+ BBVisitedInfo[MBB]. LastShape = MIR;
170
+ ShapeBBs.insert (MBB );
202
171
};
203
172
204
173
SmallVector<Register, 8 > WorkList (
@@ -260,10 +229,6 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
260
229
else
261
230
CfgLiveInBBs.push_back (&MBB);
262
231
}
263
- if (BBVisitedInfo[&MBB].FirstAMX || BBVisitedInfo[&MBB].HasAMXRegLiveIn )
264
- for (auto *Succ : MBB.successors ())
265
- if (!isLoopBackEdge (Succ, &MBB))
266
- BBVisitedInfo[Succ].HasAMXRegLiveIn = true ;
267
232
}
268
233
269
234
// Update NeedTileCfgLiveIn for predecessors.
@@ -287,17 +252,8 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
287
252
return false ;
288
253
289
254
// Avoid to insert ldtilecfg before any shape defs.
290
- SmallVector<MachineBasicBlock *, 8 > WorkList;
291
- for (auto &I : ShapeBBs) {
292
- // TODO: We can hoist shapes across BBs here.
293
- if (BBVisitedInfo[I.first ].HasAMXRegLiveIn )
294
- REPORT_CONFIG_FAIL
295
- if (BBVisitedInfo[I.first ].FirstAMX &&
296
- BBVisitedInfo[I.first ].FirstAMX < ShapeBBs[I.first ].back () &&
297
- !hoistShapesInBB (I.first ))
298
- REPORT_CONFIG_FAIL
299
- WorkList.push_back (I.first );
300
- }
255
+ SmallVector<MachineBasicBlock *, 8 > WorkList (
256
+ make_range (ShapeBBs.begin (), ShapeBBs.end ()));
301
257
while (!WorkList.empty ()) {
302
258
MachineBasicBlock *MBB = WorkList.pop_back_val ();
303
259
for (auto *Pred : MBB->predecessors ()) {
@@ -326,6 +282,9 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
326
282
} else {
327
283
// Avoid the BB to be multi visited.
328
284
VisitedOrInserted.insert (I);
285
+ // We cannot sink it across any AMX instruction.
286
+ if (BBVisitedInfo[I.MBB ].FirstAMX )
287
+ REPORT_CONFIG_FAIL;
329
288
// Sink the inserting point along the chain with NeedTileCfgLiveIn =
330
289
// true when MBB isn't all shapes reachable.
331
290
for (auto *Succ : I.MBB ->successors ())
@@ -337,9 +296,14 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
337
296
338
297
// A given point might be forked due to shape conditions are not met.
339
298
for (MIRef I : InsertPoints) {
299
+ // Even MBB is all shapes reachable, we still need to check if there's
300
+ // AMX that intersects with shapes in the same MBB.
301
+ if (BBVisitedInfo[I.MBB ].FirstAMX &&
302
+ BBVisitedInfo[I.MBB ].FirstAMX < BBVisitedInfo[I.MBB ].LastShape )
303
+ REPORT_CONFIG_FAIL;
340
304
// Make sure we insert ldtilecfg after the last shape def in MBB.
341
- if (ShapeBBs. count (I. MBB ) && I < ShapeBBs [I.MBB ].back () )
342
- I = ShapeBBs [I.MBB ].back () ;
305
+ if (I < BBVisitedInfo [I.MBB ].LastShape )
306
+ I = BBVisitedInfo [I.MBB ].LastShape ;
343
307
// There're chances the MBB is sunk more than once. Record it to avoid
344
308
// multi insert.
345
309
if (VisitedOrInserted.insert (I).second ) {
0 commit comments