-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Revert "AMDGPU/GlobalISelDivergenceLowering: select divergent i1 phis" #78468
Merged
petar-avramovic
merged 1 commit into
main
from
revert-76145-global-isel-lane-masks-patch-3
Jan 17, 2024
Merged
Revert "AMDGPU/GlobalISelDivergenceLowering: select divergent i1 phis" #78468
petar-avramovic
merged 1 commit into
main
from
revert-76145-global-isel-lane-masks-patch-3
Jan 17, 2024
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Petar Avramovic (petar-avramovic) ChangesReverts llvm/llvm-project#76145 Patch is 127.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78468.diff 17 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h b/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h
index 1039ac4e5189b3..e6da099751e7ae 100644
--- a/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h
+++ b/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h
@@ -32,25 +32,6 @@ MachineUniformityInfo computeMachineUniformityInfo(
MachineFunction &F, const MachineCycleInfo &cycleInfo,
const MachineDomTree &domTree, bool HasBranchDivergence);
-/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
-class MachineUniformityAnalysisPass : public MachineFunctionPass {
- MachineUniformityInfo UI;
-
-public:
- static char ID;
-
- MachineUniformityAnalysisPass();
-
- MachineUniformityInfo &getUniformityInfo() { return UI; }
- const MachineUniformityInfo &getUniformityInfo() const { return UI; }
-
- bool runOnMachineFunction(MachineFunction &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- void print(raw_ostream &OS, const Module *M = nullptr) const override;
-
- // TODO: verify analysis
-};
-
} // namespace llvm
#endif // LLVM_CODEGEN_MACHINEUNIFORMITYANALYSIS_H
diff --git a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
index 131138e0649e4c..3e0fe2b1ba087f 100644
--- a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
@@ -165,6 +165,25 @@ MachineUniformityInfo llvm::computeMachineUniformityInfo(
namespace {
+/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
+class MachineUniformityAnalysisPass : public MachineFunctionPass {
+ MachineUniformityInfo UI;
+
+public:
+ static char ID;
+
+ MachineUniformityAnalysisPass();
+
+ MachineUniformityInfo &getUniformityInfo() { return UI; }
+ const MachineUniformityInfo &getUniformityInfo() const { return UI; }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void print(raw_ostream &OS, const Module *M = nullptr) const override;
+
+ // TODO: verify analysis
+};
+
class MachineUniformityInfoPrinterPass : public MachineFunctionPass {
public:
static char ID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
index 9ac74ef55b275f..4cd8b1ec1051f4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
@@ -16,10 +16,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "SILowerI1Copies.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineUniformityAnalysis.h"
-#include "llvm/InitializePasses.h"
#define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"
@@ -45,152 +42,14 @@ class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<MachinePostDominatorTree>();
- AU.addRequired<MachineUniformityAnalysisPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-class DivergenceLoweringHelper : public PhiLoweringHelper {
-public:
- DivergenceLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
- MachinePostDominatorTree *PDT,
- MachineUniformityInfo *MUI);
-
-private:
- MachineUniformityInfo *MUI = nullptr;
-
-public:
- void markAsLaneMask(Register DstReg) const override;
- void getCandidatesForLowering(
- SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override;
- void collectIncomingValuesFromPhi(
- const MachineInstr *MI,
- SmallVectorImpl<Incoming> &Incomings) const override;
- void replaceDstReg(Register NewReg, Register OldReg,
- MachineBasicBlock *MBB) override;
- void buildMergeLaneMasks(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, const DebugLoc &DL,
- Register DstReg, Register PrevReg,
- Register CurReg) override;
- void constrainAsLaneMask(Incoming &In) override;
-};
-
-DivergenceLoweringHelper::DivergenceLoweringHelper(
- MachineFunction *MF, MachineDominatorTree *DT,
- MachinePostDominatorTree *PDT, MachineUniformityInfo *MUI)
- : PhiLoweringHelper(MF, DT, PDT), MUI(MUI) {}
-
-// _(s1) -> SReg_32/64(s1)
-void DivergenceLoweringHelper::markAsLaneMask(Register DstReg) const {
- assert(MRI->getType(DstReg) == LLT::scalar(1));
-
- if (MRI->getRegClassOrNull(DstReg)) {
- MRI->constrainRegClass(DstReg, ST->getBoolRC());
- return;
- }
-
- MRI->setRegClass(DstReg, ST->getBoolRC());
-}
-
-void DivergenceLoweringHelper::getCandidatesForLowering(
- SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
- LLT S1 = LLT::scalar(1);
-
- // Add divergent i1 phis to the list
- for (MachineBasicBlock &MBB : *MF) {
- for (MachineInstr &MI : MBB.phis()) {
- Register Dst = MI.getOperand(0).getReg();
- if (MRI->getType(Dst) == S1 && MUI->isDivergent(Dst))
- Vreg1Phis.push_back(&MI);
- }
- }
-}
-
-void DivergenceLoweringHelper::collectIncomingValuesFromPhi(
- const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
- for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
- Incomings.emplace_back(MI->getOperand(i).getReg(),
- MI->getOperand(i + 1).getMBB(), Register());
- }
-}
-
-void DivergenceLoweringHelper::replaceDstReg(Register NewReg, Register OldReg,
- MachineBasicBlock *MBB) {
- BuildMI(*MBB, MBB->getFirstNonPHI(), {}, TII->get(AMDGPU::COPY), OldReg)
- .addReg(NewReg);
-}
-
-// Get pointers to build instruction just after MI (skips phis if needed)
-static std::pair<MachineBasicBlock *, MachineBasicBlock::iterator>
-getInsertAfterPtrs(MachineInstr *MI) {
- MachineBasicBlock *InsertMBB = MI->getParent();
- return {InsertMBB,
- InsertMBB->SkipPHIsAndLabels(std::next(MI->getIterator()))};
-}
-
-// bb.previous
-// %PrevReg = ...
-//
-// bb.current
-// %CurReg = ...
-//
-// %DstReg - not defined
-//
-// -> (wave32 example, new registers have sreg_32 reg class and S1 LLT)
-//
-// bb.previous
-// %PrevReg = ...
-// %PrevRegCopy:sreg_32(s1) = COPY %PrevReg
-//
-// bb.current
-// %CurReg = ...
-// %CurRegCopy:sreg_32(s1) = COPY %CurReg
-// ...
-// %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0
-// %CurMaskedReg:sreg_32(s1) = AND %ExecReg, CurRegCopy - inactive lanes to 0
-// %DstReg:sreg_32(s1) = OR %PrevMaskedReg, CurMaskedReg
-//
-// DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg
-void DivergenceLoweringHelper::buildMergeLaneMasks(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
- Register DstReg, Register PrevReg, Register CurReg) {
- // DstReg = (PrevReg & !EXEC) | (CurReg & EXEC)
- // TODO: check if inputs are constants or results of a compare.
-
- Register PrevRegCopy = createLaneMaskReg(MRI, LaneMaskRegAttrs);
- auto [PrevMBB, AfterPrevReg] = getInsertAfterPtrs(MRI->getVRegDef(PrevReg));
- BuildMI(*PrevMBB, AfterPrevReg, DL, TII->get(AMDGPU::COPY), PrevRegCopy)
- .addReg(PrevReg);
- Register PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
- BuildMI(MBB, I, DL, TII->get(AndN2Op), PrevMaskedReg)
- .addReg(PrevRegCopy)
- .addReg(ExecReg);
-
- Register CurRegCopy = createLaneMaskReg(MRI, LaneMaskRegAttrs);
- auto [CurMBB, AfterCurReg] = getInsertAfterPtrs(MRI->getVRegDef(CurReg));
- BuildMI(*CurMBB, AfterCurReg, DL, TII->get(AMDGPU::COPY), CurRegCopy)
- .addReg(CurReg);
- Register CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
- BuildMI(MBB, I, DL, TII->get(AndOp), CurMaskedReg)
- .addReg(ExecReg)
- .addReg(CurRegCopy);
-
- BuildMI(MBB, I, DL, TII->get(OrOp), DstReg)
- .addReg(PrevMaskedReg)
- .addReg(CurMaskedReg);
-}
-
-void DivergenceLoweringHelper::constrainAsLaneMask(Incoming &In) { return; }
-
} // End anonymous namespace.
INITIALIZE_PASS_BEGIN(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
"AMDGPU GlobalISel divergence lowering", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
INITIALIZE_PASS_END(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
"AMDGPU GlobalISel divergence lowering", false, false)
@@ -205,14 +64,5 @@ FunctionPass *llvm::createAMDGPUGlobalISelDivergenceLoweringPass() {
bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
MachineFunction &MF) {
- MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
- MachinePostDominatorTree &PDT = getAnalysis<MachinePostDominatorTree>();
- MachineUniformityInfo &MUI =
- getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
-
- DivergenceLoweringHelper Helper(&MF, &DT, &PDT, &MUI);
-
- bool Changed = false;
- Changed |= Helper.lowerPhis();
- return Changed;
+ return false;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 5c37f163e37075..1d31c6b8fde93a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -210,14 +210,6 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
const Register DefReg = I.getOperand(0).getReg();
const LLT DefTy = MRI->getType(DefReg);
- // Lane mask PHIs, PHI where all register operands have sgpr register class
- // with S1 LLT, are already selected in divergence lowering pass.
- if (I.getOpcode() == AMDGPU::PHI) {
- assert(MRI->getType(DefReg) == LLT::scalar(1));
- assert(TRI.isSGPRClass(MRI->getRegClass(DefReg)));
- return true;
- }
-
if (DefTy == LLT::scalar(1)) {
if (!AllowRiskySelect) {
LLVM_DEBUG(dbgs() << "Skipping risky boolean phi\n");
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index db06ef0250470d..cfa0c21def791d 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -78,7 +78,7 @@ class Vreg1LoweringHelper : public PhiLoweringHelper {
MachineBasicBlock::iterator I, const DebugLoc &DL,
Register DstReg, Register PrevReg,
Register CurReg) override;
- void constrainAsLaneMask(Incoming &In) override;
+ void constrainIncomingRegisterTakenAsIs(Incoming &In) override;
bool lowerCopiesFromI1();
bool lowerCopiesToI1();
@@ -619,7 +619,7 @@ bool PhiLoweringHelper::lowerPhis() {
for (auto &Incoming : Incomings) {
MachineBasicBlock &IMBB = *Incoming.Block;
if (PIA.isSource(IMBB)) {
- constrainAsLaneMask(Incoming);
+ constrainIncomingRegisterTakenAsIs(Incoming);
SSAUpdater.AddAvailableValue(&IMBB, Incoming.Reg);
} else {
Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
@@ -911,4 +911,6 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
}
}
-void Vreg1LoweringHelper::constrainAsLaneMask(Incoming &In) { return; }
+void Vreg1LoweringHelper::constrainIncomingRegisterTakenAsIs(Incoming &In) {
+ return;
+}
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
index f0fe9bb4d03469..5099d39c2d1415 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
@@ -91,7 +91,7 @@ class PhiLoweringHelper {
MachineBasicBlock::iterator I,
const DebugLoc &DL, Register DstReg,
Register PrevReg, Register CurReg) = 0;
- virtual void constrainAsLaneMask(Incoming &In) = 0;
+ virtual void constrainIncomingRegisterTakenAsIs(Incoming &In) = 0;
};
} // end namespace llvm
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
index 38a4e81b5c2596..7a68aec1a1c555 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
-; REQUIRES: do-not-run-me
; Divergent phis that don't require lowering using lane mask merging
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
index 56f2812b590a8d..d314ebe355f51d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
@@ -46,7 +46,7 @@ body: |
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.4(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = G_PHI %14(s1), %bb.3, [[ICMP]](s1), %bb.0
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI %14(s1), %bb.3, [[ICMP]](s1), %bb.0
; GFX10-NEXT: G_BR %bb.4
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.3:
@@ -126,7 +126,6 @@ body: |
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr0
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]]
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]]
; GFX10-NEXT: G_BRCOND [[ICMP1]](s1), %bb.2
@@ -137,17 +136,12 @@ body: |
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]]
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY5]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[ICMP]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.1
- ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY6]](s1), [[C4]], [[C3]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI]](s1), [[C4]], [[C3]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; GFX10-NEXT: S_ENDPGM 0
bb.0:
@@ -197,37 +191,30 @@ body: |
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]]
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]]
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32)
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
- ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32)
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[XOR]](s1), %bb.1
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.1
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI3]](s1), [[C5]], [[C4]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -292,28 +279,25 @@ body: |
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]]
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF]](s1), %bb.0, %39(s1), %bb.5
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.5
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = G_PHI [[FCMP]](s1), %bb.0, %19(s1), %bb.5
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.5
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[FCMP]](s1), %bb.0, %19(s1), %bb.5
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[...
[truncated]
|
It looks to be something in machine verifier but I will need some time to reproduce the error. Revert for now. |
petar-avramovic
deleted the
revert-76145-global-isel-lane-masks-patch-3
branch
January 17, 2024 16:41
ampandey-1995
pushed a commit
to ampandey-1995/llvm-project
that referenced
this pull request
Jan 19, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Reverts #76145