Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "AMDGPU/GlobalISelDivergenceLowering: select divergent i1 phis" #79274

Merged
merged 1 commit into from
Jan 24, 2024

Conversation

petar-avramovic
Copy link
Collaborator

Reverts #78482

@petar-avramovic petar-avramovic merged commit c46109d into main Jan 24, 2024
3 of 4 checks passed
@petar-avramovic petar-avramovic deleted the revert-78482-global-isel-lane-masks-patch-3 branch January 24, 2024 11:18
@llvmbot
Copy link
Collaborator

llvmbot commented Jan 24, 2024

@llvm/pr-subscribers-backend-amdgpu

Author: Petar Avramovic (petar-avramovic)

Changes

Reverts llvm/llvm-project#78482


Patch is 137.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/79274.diff

21 Files Affected:

  • (modified) llvm/include/llvm/CodeGen/MachineRegisterInfo.h (-11)
  • (modified) llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h (-19)
  • (modified) llvm/lib/CodeGen/MachineRegisterInfo.cpp (-11)
  • (modified) llvm/lib/CodeGen/MachineUniformityAnalysis.cpp (+19)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp (+1-144)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+2-3)
  • (modified) llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp (+15-15)
  • (modified) llvm/lib/Target/AMDGPU/SILowerI1Copies.h (+4-7)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll (+1-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir (+26-50)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll (+1-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir (+86-254)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll (+1-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir (+66-226)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll (+1-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir (+32-70)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll (-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll (-1)
diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index 03ba952c354ae20..9bca74a1d4fc82f 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -752,17 +752,6 @@ class MachineRegisterInfo {
   Register createVirtualRegister(const TargetRegisterClass *RegClass,
                                  StringRef Name = "");
 
-  /// All avilable attributes a virtual register can have.
-  struct RegisterAttributes {
-    const RegClassOrRegBank *RCOrRB;
-    LLT Ty;
-  };
-
-  /// createVirtualRegister - Create and return a new virtual register in the
-  /// function with the specified register attributes.
-  Register createVirtualRegister(RegisterAttributes RegAttr,
-                                 StringRef Name = "");
-
   /// Create and return a new virtual register in the function with the same
   /// attributes as the given register.
   Register cloneVirtualRegister(Register VReg, StringRef Name = "");
diff --git a/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h b/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h
index 1039ac4e5189b36..e6da099751e7ae5 100644
--- a/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h
+++ b/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h
@@ -32,25 +32,6 @@ MachineUniformityInfo computeMachineUniformityInfo(
     MachineFunction &F, const MachineCycleInfo &cycleInfo,
     const MachineDomTree &domTree, bool HasBranchDivergence);
 
-/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
-class MachineUniformityAnalysisPass : public MachineFunctionPass {
-  MachineUniformityInfo UI;
-
-public:
-  static char ID;
-
-  MachineUniformityAnalysisPass();
-
-  MachineUniformityInfo &getUniformityInfo() { return UI; }
-  const MachineUniformityInfo &getUniformityInfo() const { return UI; }
-
-  bool runOnMachineFunction(MachineFunction &F) override;
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-  void print(raw_ostream &OS, const Module *M = nullptr) const override;
-
-  // TODO: verify analysis
-};
-
 } // namespace llvm
 
 #endif // LLVM_CODEGEN_MACHINEUNIFORMITYANALYSIS_H
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index d286128cc89bef7..087604af6a71846 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -167,17 +167,6 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
   return Reg;
 }
 
-/// createVirtualRegister - Create and return a new virtual register in the
-/// function with the specified register attributes.
-Register MachineRegisterInfo::createVirtualRegister(RegisterAttributes RegAttr,
-                                                    StringRef Name) {
-  Register Reg = createIncompleteVirtualRegister(Name);
-  VRegInfo[Reg].first = *RegAttr.RCOrRB;
-  setType(Reg, RegAttr.Ty);
-  noteNewVirtualRegister(Reg);
-  return Reg;
-}
-
 Register MachineRegisterInfo::cloneVirtualRegister(Register VReg,
                                                    StringRef Name) {
   Register Reg = createIncompleteVirtualRegister(Name);
diff --git a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
index 131138e0649e4c2..3e0fe2b1ba087fe 100644
--- a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
@@ -165,6 +165,25 @@ MachineUniformityInfo llvm::computeMachineUniformityInfo(
 
 namespace {
 
+/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
+class MachineUniformityAnalysisPass : public MachineFunctionPass {
+  MachineUniformityInfo UI;
+
+public:
+  static char ID;
+
+  MachineUniformityAnalysisPass();
+
+  MachineUniformityInfo &getUniformityInfo() { return UI; }
+  const MachineUniformityInfo &getUniformityInfo() const { return UI; }
+
+  bool runOnMachineFunction(MachineFunction &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  void print(raw_ostream &OS, const Module *M = nullptr) const override;
+
+  // TODO: verify analysis
+};
+
 class MachineUniformityInfoPrinterPass : public MachineFunctionPass {
 public:
   static char ID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
index 4f65a95de82ac80..4cd8b1ec1051f4b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
@@ -16,11 +16,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
-#include "SILowerI1Copies.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineUniformityAnalysis.h"
-#include "llvm/InitializePasses.h"
 
 #define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"
 
@@ -46,146 +42,14 @@ class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass {
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
-    AU.addRequired<MachineDominatorTree>();
-    AU.addRequired<MachinePostDominatorTree>();
-    AU.addRequired<MachineUniformityAnalysisPass>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
 
-class DivergenceLoweringHelper : public PhiLoweringHelper {
-public:
-  DivergenceLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
-                           MachinePostDominatorTree *PDT,
-                           MachineUniformityInfo *MUI);
-
-private:
-  MachineUniformityInfo *MUI = nullptr;
-  MachineIRBuilder B;
-  Register buildRegCopyToLaneMask(Register Reg);
-
-public:
-  void markAsLaneMask(Register DstReg) const override;
-  void getCandidatesForLowering(
-      SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override;
-  void collectIncomingValuesFromPhi(
-      const MachineInstr *MI,
-      SmallVectorImpl<Incoming> &Incomings) const override;
-  void replaceDstReg(Register NewReg, Register OldReg,
-                     MachineBasicBlock *MBB) override;
-  void buildMergeLaneMasks(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator I, const DebugLoc &DL,
-                           Register DstReg, Register PrevReg,
-                           Register CurReg) override;
-  void constrainAsLaneMask(Incoming &In) override;
-};
-
-DivergenceLoweringHelper::DivergenceLoweringHelper(
-    MachineFunction *MF, MachineDominatorTree *DT,
-    MachinePostDominatorTree *PDT, MachineUniformityInfo *MUI)
-    : PhiLoweringHelper(MF, DT, PDT), MUI(MUI), B(*MF) {}
-
-// _(s1) -> SReg_32/64(s1)
-void DivergenceLoweringHelper::markAsLaneMask(Register DstReg) const {
-  assert(MRI->getType(DstReg) == LLT::scalar(1));
-
-  if (MRI->getRegClassOrNull(DstReg)) {
-    if (MRI->constrainRegClass(DstReg, ST->getBoolRC()))
-      return;
-    llvm_unreachable("Failed to constrain register class");
-  }
-
-  MRI->setRegClass(DstReg, ST->getBoolRC());
-}
-
-void DivergenceLoweringHelper::getCandidatesForLowering(
-    SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
-  LLT S1 = LLT::scalar(1);
-
-  // Add divergent i1 phis to the list
-  for (MachineBasicBlock &MBB : *MF) {
-    for (MachineInstr &MI : MBB.phis()) {
-      Register Dst = MI.getOperand(0).getReg();
-      if (MRI->getType(Dst) == S1 && MUI->isDivergent(Dst))
-        Vreg1Phis.push_back(&MI);
-    }
-  }
-}
-
-void DivergenceLoweringHelper::collectIncomingValuesFromPhi(
-    const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
-  for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
-    Incomings.emplace_back(MI->getOperand(i).getReg(),
-                           MI->getOperand(i + 1).getMBB(), Register());
-  }
-}
-
-void DivergenceLoweringHelper::replaceDstReg(Register NewReg, Register OldReg,
-                                             MachineBasicBlock *MBB) {
-  BuildMI(*MBB, MBB->getFirstNonPHI(), {}, TII->get(AMDGPU::COPY), OldReg)
-      .addReg(NewReg);
-}
-
-// Copy Reg to new lane mask register, insert a copy after instruction that
-// defines Reg while skipping phis if needed.
-Register DivergenceLoweringHelper::buildRegCopyToLaneMask(Register Reg) {
-  Register LaneMask = createLaneMaskReg(MRI, LaneMaskRegAttrs);
-  MachineInstr *Instr = MRI->getVRegDef(Reg);
-  MachineBasicBlock *MBB = Instr->getParent();
-  B.setInsertPt(*MBB, MBB->SkipPHIsAndLabels(std::next(Instr->getIterator())));
-  B.buildCopy(LaneMask, Reg);
-  return LaneMask;
-}
-
-// bb.previous
-//   %PrevReg = ...
-//
-// bb.current
-//   %CurReg = ...
-//
-//   %DstReg - not defined
-//
-// -> (wave32 example, new registers have sreg_32 reg class and S1 LLT)
-//
-// bb.previous
-//   %PrevReg = ...
-//   %PrevRegCopy:sreg_32(s1) = COPY %PrevReg
-//
-// bb.current
-//   %CurReg = ...
-//   %CurRegCopy:sreg_32(s1) = COPY %CurReg
-//   ...
-//   %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0
-//   %CurMaskedReg:sreg_32(s1)  = AND %ExecReg, CurRegCopy - inactive lanes to 0
-//   %DstReg:sreg_32(s1)        = OR %PrevMaskedReg, CurMaskedReg
-//
-// DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg
-void DivergenceLoweringHelper::buildMergeLaneMasks(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
-    Register DstReg, Register PrevReg, Register CurReg) {
-  // DstReg = (PrevReg & !EXEC) | (CurReg & EXEC)
-  // TODO: check if inputs are constants or results of a compare.
-
-  Register PrevRegCopy = buildRegCopyToLaneMask(PrevReg);
-  Register CurRegCopy = buildRegCopyToLaneMask(CurReg);
-  Register PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
-  Register CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
-
-  B.setInsertPt(MBB, I);
-  B.buildInstr(AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});
-  B.buildInstr(AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});
-  B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});
-}
-
-void DivergenceLoweringHelper::constrainAsLaneMask(Incoming &In) { return; }
-
 } // End anonymous namespace.
 
 INITIALIZE_PASS_BEGIN(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
                       "AMDGPU GlobalISel divergence lowering", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
 INITIALIZE_PASS_END(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
                     "AMDGPU GlobalISel divergence lowering", false, false)
 
@@ -200,12 +64,5 @@ FunctionPass *llvm::createAMDGPUGlobalISelDivergenceLoweringPass() {
 
 bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
     MachineFunction &MF) {
-  MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
-  MachinePostDominatorTree &PDT = getAnalysis<MachinePostDominatorTree>();
-  MachineUniformityInfo &MUI =
-      getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
-
-  DivergenceLoweringHelper Helper(&MF, &DT, &PDT, &MUI);
-
-  return Helper.lowerPhis();
+  return false;
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 610cedd4ea6fcea..fdee74d58d26914 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -210,7 +210,6 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
 bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
   const Register DefReg = I.getOperand(0).getReg();
   const LLT DefTy = MRI->getType(DefReg);
-
   if (DefTy == LLT::scalar(1)) {
     if (!AllowRiskySelect) {
       LLVM_DEBUG(dbgs() << "Skipping risky boolean phi\n");
@@ -3553,6 +3552,8 @@ bool AMDGPUInstructionSelector::selectStackRestore(MachineInstr &MI) const {
 }
 
 bool AMDGPUInstructionSelector::select(MachineInstr &I) {
+  if (I.isPHI())
+    return selectPHI(I);
 
   if (!I.isPreISelOpcode()) {
     if (I.isCopy())
@@ -3695,8 +3696,6 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
     return selectWaveAddress(I);
   case AMDGPU::G_STACKRESTORE:
     return selectStackRestore(I);
-  case AMDGPU::G_PHI:
-    return selectPHI(I);
   default:
     return selectImpl(I, *CoverageInfo);
   }
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 59843438950ac4f..cfa0c21def791dd 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -31,9 +31,9 @@
 
 using namespace llvm;
 
-static Register
-insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI,
-                    MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs);
+static Register insertUndefLaneMask(MachineBasicBlock *MBB,
+                                    MachineRegisterInfo *MRI,
+                                    Register LaneMaskRegAttrs);
 
 namespace {
 
@@ -78,7 +78,7 @@ class Vreg1LoweringHelper : public PhiLoweringHelper {
                            MachineBasicBlock::iterator I, const DebugLoc &DL,
                            Register DstReg, Register PrevReg,
                            Register CurReg) override;
-  void constrainAsLaneMask(Incoming &In) override;
+  void constrainIncomingRegisterTakenAsIs(Incoming &In) override;
 
   bool lowerCopiesFromI1();
   bool lowerCopiesToI1();
@@ -304,8 +304,7 @@ class LoopFinder {
   /// blocks, so that the SSA updater doesn't have to search all the way to the
   /// function entry.
   void addLoopEntries(unsigned LoopLevel, MachineSSAUpdater &SSAUpdater,
-                      MachineRegisterInfo &MRI,
-                      MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs,
+                      MachineRegisterInfo &MRI, Register LaneMaskRegAttrs,
                       ArrayRef<Incoming> Incomings = {}) {
     assert(LoopLevel < CommonDominators.size());
 
@@ -412,15 +411,14 @@ FunctionPass *llvm::createSILowerI1CopiesPass() {
   return new SILowerI1Copies();
 }
 
-Register llvm::createLaneMaskReg(
-    MachineRegisterInfo *MRI,
-    MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs) {
-  return MRI->createVirtualRegister(LaneMaskRegAttrs);
+Register llvm::createLaneMaskReg(MachineRegisterInfo *MRI,
+                                 Register LaneMaskRegAttrs) {
+  return MRI->cloneVirtualRegister(LaneMaskRegAttrs);
 }
 
-static Register
-insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI,
-                    MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs) {
+static Register insertUndefLaneMask(MachineBasicBlock *MBB,
+                                    MachineRegisterInfo *MRI,
+                                    Register LaneMaskRegAttrs) {
   MachineFunction &MF = *MBB->getParent();
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
@@ -621,7 +619,7 @@ bool PhiLoweringHelper::lowerPhis() {
       for (auto &Incoming : Incomings) {
         MachineBasicBlock &IMBB = *Incoming.Block;
         if (PIA.isSource(IMBB)) {
-          constrainAsLaneMask(Incoming);
+          constrainIncomingRegisterTakenAsIs(Incoming);
           SSAUpdater.AddAvailableValue(&IMBB, Incoming.Reg);
         } else {
           Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
@@ -913,4 +911,6 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
   }
 }
 
-void Vreg1LoweringHelper::constrainAsLaneMask(Incoming &In) {}
+void Vreg1LoweringHelper::constrainIncomingRegisterTakenAsIs(Incoming &In) {
+  return;
+}
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
index 80ea0536b4cbff3..5099d39c2d14155 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
@@ -31,9 +31,7 @@ struct Incoming {
       : Reg(Reg), Block(Block), UpdatedReg(UpdatedReg) {}
 };
 
-Register
-createLaneMaskReg(MachineRegisterInfo *MRI,
-                  MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs);
+Register createLaneMaskReg(MachineRegisterInfo *MRI, Register LaneMaskRegAttrs);
 
 class PhiLoweringHelper {
 public:
@@ -49,7 +47,7 @@ class PhiLoweringHelper {
   MachineRegisterInfo *MRI = nullptr;
   const GCNSubtarget *ST = nullptr;
   const SIInstrInfo *TII = nullptr;
-  MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs;
+  Register LaneMaskRegAttrs;
 
 #ifndef NDEBUG
   DenseSet<Register> PhiRegisters;
@@ -70,8 +68,7 @@ class PhiLoweringHelper {
   getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
 
   void initializeLaneMaskRegisterAttributes(Register LaneMask) {
-    LaneMaskRegAttrs.RCOrRB = &MRI->getRegClassOrRegBank(LaneMask);
-    LaneMaskRegAttrs.Ty = MRI->getType(LaneMask);
+    LaneMaskRegAttrs = LaneMask;
   }
 
   bool isLaneMaskReg(Register Reg) const {
@@ -94,7 +91,7 @@ class PhiLoweringHelper {
                                    MachineBasicBlock::iterator I,
                                    const DebugLoc &DL, Register DstReg,
                                    Register PrevReg, Register CurReg) = 0;
-  virtual void constrainAsLaneMask(Incoming &In) = 0;
+  virtual void constrainIncomingRegisterTakenAsIs(Incoming &In) = 0;
 };
 
 } // end namespace llvm
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
index 06a8f80e6aa34ab..7a68aec1a1c5558 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
@@ -1,6 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
-; REQUIRES: do-not-run-me
+; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
 
 ; Divergent phis that don't require lowering using lane mask merging
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
index 55f22b0bbb4df6f..d314ebe355f51d0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
@@ -1,9 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
-# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
-
-# Test is updated but copies between S1-register-with-reg-class and
-# register-with-reg-class-no-LLT fail machine verification
-# REQUIRES: do-not-run-me-with-machine-verifier
+# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering %s -o - | FileCheck -check-prefix=GFX10 %s
 
 --- |
   define void @divergent_i1_phi_uniform_branch() {ret void}
@@ -50,7 +46,7 @@ body: |
   ; GFX10-NEXT: bb.2:
   ; GFX10-NEXT:   successors: %bb.4(0x80000000)
   ; GFX10-NEXT: {{  $}}
-  ; GFX10-NEXT:   [[PHI:%[0-9]+]]:sreg_32(s1) = G_PHI %14(s1), %bb.3, [[ICMP]](s1), %bb.0
+  ; GFX10-NEXT:   [[PHI:%[0-9]+]]:_(s1) = G_PHI %14(s1), %bb.3, [[ICMP]](s1), %bb.0
   ; GFX10-NEXT:   G_BR %bb.4
   ; GFX10-NEXT: {{  $}}
   ; GFX10-NEXT: bb.3:
@@ -130,7 +126,6 @@ body: |
   ; GFX10-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr0
   ; GFX10-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
   ; GFX10-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]]
-  ; GFX10-NEXT:   [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
   ; GFX10-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; GFX10-NEXT:   [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]]
   ; GFX10-NEXT:   G_BRCOND [[ICMP1]](s1), %bb.2
@@ -141,17 +136,12 @@ body: |
   ; GFX10-NEXT: {{  $}}
   ; GFX10-NEXT:...
[truncated]

@llvmbot
Copy link
Collaborator

llvmbot commented Jan 24, 2024

@llvm/pr-subscribers-llvm-globalisel

Author: Petar Avramovic (petar-avramovic)

Changes

Reverts llvm/llvm-project#78482


Patch is 137.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/79274.diff

21 Files Affected:

  • (modified) llvm/include/llvm/CodeGen/MachineRegisterInfo.h (-11)
  • (modified) llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h (-19)
  • (modified) llvm/lib/CodeGen/MachineRegisterInfo.cpp (-11)
  • (modified) llvm/lib/CodeGen/MachineUniformityAnalysis.cpp (+19)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp (+1-144)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+2-3)
  • (modified) llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp (+15-15)
  • (modified) llvm/lib/Target/AMDGPU/SILowerI1Copies.h (+4-7)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll (+1-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir (+26-50)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll (+1-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir (+86-254)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll (+1-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir (+66-226)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll (+1-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir (+32-70)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll (-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll (-1)
diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index 03ba952c354ae20..9bca74a1d4fc82f 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -752,17 +752,6 @@ class MachineRegisterInfo {
   Register createVirtualRegister(const TargetRegisterClass *RegClass,
                                  StringRef Name = "");
 
-  /// All avilable attributes a virtual register can have.
-  struct RegisterAttributes {
-    const RegClassOrRegBank *RCOrRB;
-    LLT Ty;
-  };
-
-  /// createVirtualRegister - Create and return a new virtual register in the
-  /// function with the specified register attributes.
-  Register createVirtualRegister(RegisterAttributes RegAttr,
-                                 StringRef Name = "");
-
   /// Create and return a new virtual register in the function with the same
   /// attributes as the given register.
   Register cloneVirtualRegister(Register VReg, StringRef Name = "");
diff --git a/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h b/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h
index 1039ac4e5189b36..e6da099751e7ae5 100644
--- a/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h
+++ b/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h
@@ -32,25 +32,6 @@ MachineUniformityInfo computeMachineUniformityInfo(
     MachineFunction &F, const MachineCycleInfo &cycleInfo,
     const MachineDomTree &domTree, bool HasBranchDivergence);
 
-/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
-class MachineUniformityAnalysisPass : public MachineFunctionPass {
-  MachineUniformityInfo UI;
-
-public:
-  static char ID;
-
-  MachineUniformityAnalysisPass();
-
-  MachineUniformityInfo &getUniformityInfo() { return UI; }
-  const MachineUniformityInfo &getUniformityInfo() const { return UI; }
-
-  bool runOnMachineFunction(MachineFunction &F) override;
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-  void print(raw_ostream &OS, const Module *M = nullptr) const override;
-
-  // TODO: verify analysis
-};
-
 } // namespace llvm
 
 #endif // LLVM_CODEGEN_MACHINEUNIFORMITYANALYSIS_H
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index d286128cc89bef7..087604af6a71846 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -167,17 +167,6 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
   return Reg;
 }
 
-/// createVirtualRegister - Create and return a new virtual register in the
-/// function with the specified register attributes.
-Register MachineRegisterInfo::createVirtualRegister(RegisterAttributes RegAttr,
-                                                    StringRef Name) {
-  Register Reg = createIncompleteVirtualRegister(Name);
-  VRegInfo[Reg].first = *RegAttr.RCOrRB;
-  setType(Reg, RegAttr.Ty);
-  noteNewVirtualRegister(Reg);
-  return Reg;
-}
-
 Register MachineRegisterInfo::cloneVirtualRegister(Register VReg,
                                                    StringRef Name) {
   Register Reg = createIncompleteVirtualRegister(Name);
diff --git a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
index 131138e0649e4c2..3e0fe2b1ba087fe 100644
--- a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
@@ -165,6 +165,25 @@ MachineUniformityInfo llvm::computeMachineUniformityInfo(
 
 namespace {
 
+/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
+class MachineUniformityAnalysisPass : public MachineFunctionPass {
+  MachineUniformityInfo UI;
+
+public:
+  static char ID;
+
+  MachineUniformityAnalysisPass();
+
+  MachineUniformityInfo &getUniformityInfo() { return UI; }
+  const MachineUniformityInfo &getUniformityInfo() const { return UI; }
+
+  bool runOnMachineFunction(MachineFunction &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  void print(raw_ostream &OS, const Module *M = nullptr) const override;
+
+  // TODO: verify analysis
+};
+
 class MachineUniformityInfoPrinterPass : public MachineFunctionPass {
 public:
   static char ID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
index 4f65a95de82ac80..4cd8b1ec1051f4b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
@@ -16,11 +16,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
-#include "SILowerI1Copies.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineUniformityAnalysis.h"
-#include "llvm/InitializePasses.h"
 
 #define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"
 
@@ -46,146 +42,14 @@ class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass {
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
-    AU.addRequired<MachineDominatorTree>();
-    AU.addRequired<MachinePostDominatorTree>();
-    AU.addRequired<MachineUniformityAnalysisPass>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
 
-class DivergenceLoweringHelper : public PhiLoweringHelper {
-public:
-  DivergenceLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
-                           MachinePostDominatorTree *PDT,
-                           MachineUniformityInfo *MUI);
-
-private:
-  MachineUniformityInfo *MUI = nullptr;
-  MachineIRBuilder B;
-  Register buildRegCopyToLaneMask(Register Reg);
-
-public:
-  void markAsLaneMask(Register DstReg) const override;
-  void getCandidatesForLowering(
-      SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override;
-  void collectIncomingValuesFromPhi(
-      const MachineInstr *MI,
-      SmallVectorImpl<Incoming> &Incomings) const override;
-  void replaceDstReg(Register NewReg, Register OldReg,
-                     MachineBasicBlock *MBB) override;
-  void buildMergeLaneMasks(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator I, const DebugLoc &DL,
-                           Register DstReg, Register PrevReg,
-                           Register CurReg) override;
-  void constrainAsLaneMask(Incoming &In) override;
-};
-
-DivergenceLoweringHelper::DivergenceLoweringHelper(
-    MachineFunction *MF, MachineDominatorTree *DT,
-    MachinePostDominatorTree *PDT, MachineUniformityInfo *MUI)
-    : PhiLoweringHelper(MF, DT, PDT), MUI(MUI), B(*MF) {}
-
-// _(s1) -> SReg_32/64(s1)
-void DivergenceLoweringHelper::markAsLaneMask(Register DstReg) const {
-  assert(MRI->getType(DstReg) == LLT::scalar(1));
-
-  if (MRI->getRegClassOrNull(DstReg)) {
-    if (MRI->constrainRegClass(DstReg, ST->getBoolRC()))
-      return;
-    llvm_unreachable("Failed to constrain register class");
-  }
-
-  MRI->setRegClass(DstReg, ST->getBoolRC());
-}
-
-void DivergenceLoweringHelper::getCandidatesForLowering(
-    SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
-  LLT S1 = LLT::scalar(1);
-
-  // Add divergent i1 phis to the list
-  for (MachineBasicBlock &MBB : *MF) {
-    for (MachineInstr &MI : MBB.phis()) {
-      Register Dst = MI.getOperand(0).getReg();
-      if (MRI->getType(Dst) == S1 && MUI->isDivergent(Dst))
-        Vreg1Phis.push_back(&MI);
-    }
-  }
-}
-
-void DivergenceLoweringHelper::collectIncomingValuesFromPhi(
-    const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
-  for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
-    Incomings.emplace_back(MI->getOperand(i).getReg(),
-                           MI->getOperand(i + 1).getMBB(), Register());
-  }
-}
-
-void DivergenceLoweringHelper::replaceDstReg(Register NewReg, Register OldReg,
-                                             MachineBasicBlock *MBB) {
-  BuildMI(*MBB, MBB->getFirstNonPHI(), {}, TII->get(AMDGPU::COPY), OldReg)
-      .addReg(NewReg);
-}
-
-// Copy Reg to new lane mask register, insert a copy after instruction that
-// defines Reg while skipping phis if needed.
-Register DivergenceLoweringHelper::buildRegCopyToLaneMask(Register Reg) {
-  Register LaneMask = createLaneMaskReg(MRI, LaneMaskRegAttrs);
-  MachineInstr *Instr = MRI->getVRegDef(Reg);
-  MachineBasicBlock *MBB = Instr->getParent();
-  B.setInsertPt(*MBB, MBB->SkipPHIsAndLabels(std::next(Instr->getIterator())));
-  B.buildCopy(LaneMask, Reg);
-  return LaneMask;
-}
-
-// bb.previous
-//   %PrevReg = ...
-//
-// bb.current
-//   %CurReg = ...
-//
-//   %DstReg - not defined
-//
-// -> (wave32 example, new registers have sreg_32 reg class and S1 LLT)
-//
-// bb.previous
-//   %PrevReg = ...
-//   %PrevRegCopy:sreg_32(s1) = COPY %PrevReg
-//
-// bb.current
-//   %CurReg = ...
-//   %CurRegCopy:sreg_32(s1) = COPY %CurReg
-//   ...
-//   %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0
-//   %CurMaskedReg:sreg_32(s1)  = AND %ExecReg, CurRegCopy - inactive lanes to 0
-//   %DstReg:sreg_32(s1)        = OR %PrevMaskedReg, CurMaskedReg
-//
-// DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg
-void DivergenceLoweringHelper::buildMergeLaneMasks(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
-    Register DstReg, Register PrevReg, Register CurReg) {
-  // DstReg = (PrevReg & !EXEC) | (CurReg & EXEC)
-  // TODO: check if inputs are constants or results of a compare.
-
-  Register PrevRegCopy = buildRegCopyToLaneMask(PrevReg);
-  Register CurRegCopy = buildRegCopyToLaneMask(CurReg);
-  Register PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
-  Register CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
-
-  B.setInsertPt(MBB, I);
-  B.buildInstr(AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});
-  B.buildInstr(AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});
-  B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});
-}
-
-void DivergenceLoweringHelper::constrainAsLaneMask(Incoming &In) { return; }
-
 } // End anonymous namespace.
 
 INITIALIZE_PASS_BEGIN(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
                       "AMDGPU GlobalISel divergence lowering", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
 INITIALIZE_PASS_END(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
                     "AMDGPU GlobalISel divergence lowering", false, false)
 
@@ -200,12 +64,5 @@ FunctionPass *llvm::createAMDGPUGlobalISelDivergenceLoweringPass() {
 
 bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
     MachineFunction &MF) {
-  MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
-  MachinePostDominatorTree &PDT = getAnalysis<MachinePostDominatorTree>();
-  MachineUniformityInfo &MUI =
-      getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
-
-  DivergenceLoweringHelper Helper(&MF, &DT, &PDT, &MUI);
-
-  return Helper.lowerPhis();
+  return false;
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 610cedd4ea6fcea..fdee74d58d26914 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -210,7 +210,6 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
 bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
   const Register DefReg = I.getOperand(0).getReg();
   const LLT DefTy = MRI->getType(DefReg);
-
   if (DefTy == LLT::scalar(1)) {
     if (!AllowRiskySelect) {
       LLVM_DEBUG(dbgs() << "Skipping risky boolean phi\n");
@@ -3553,6 +3552,8 @@ bool AMDGPUInstructionSelector::selectStackRestore(MachineInstr &MI) const {
 }
 
 bool AMDGPUInstructionSelector::select(MachineInstr &I) {
+  if (I.isPHI())
+    return selectPHI(I);
 
   if (!I.isPreISelOpcode()) {
     if (I.isCopy())
@@ -3695,8 +3696,6 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
     return selectWaveAddress(I);
   case AMDGPU::G_STACKRESTORE:
     return selectStackRestore(I);
-  case AMDGPU::G_PHI:
-    return selectPHI(I);
   default:
     return selectImpl(I, *CoverageInfo);
   }
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 59843438950ac4f..cfa0c21def791dd 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -31,9 +31,9 @@
 
 using namespace llvm;
 
-static Register
-insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI,
-                    MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs);
+static Register insertUndefLaneMask(MachineBasicBlock *MBB,
+                                    MachineRegisterInfo *MRI,
+                                    Register LaneMaskRegAttrs);
 
 namespace {
 
@@ -78,7 +78,7 @@ class Vreg1LoweringHelper : public PhiLoweringHelper {
                            MachineBasicBlock::iterator I, const DebugLoc &DL,
                            Register DstReg, Register PrevReg,
                            Register CurReg) override;
-  void constrainAsLaneMask(Incoming &In) override;
+  void constrainIncomingRegisterTakenAsIs(Incoming &In) override;
 
   bool lowerCopiesFromI1();
   bool lowerCopiesToI1();
@@ -304,8 +304,7 @@ class LoopFinder {
   /// blocks, so that the SSA updater doesn't have to search all the way to the
   /// function entry.
   void addLoopEntries(unsigned LoopLevel, MachineSSAUpdater &SSAUpdater,
-                      MachineRegisterInfo &MRI,
-                      MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs,
+                      MachineRegisterInfo &MRI, Register LaneMaskRegAttrs,
                       ArrayRef<Incoming> Incomings = {}) {
     assert(LoopLevel < CommonDominators.size());
 
@@ -412,15 +411,14 @@ FunctionPass *llvm::createSILowerI1CopiesPass() {
   return new SILowerI1Copies();
 }
 
-Register llvm::createLaneMaskReg(
-    MachineRegisterInfo *MRI,
-    MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs) {
-  return MRI->createVirtualRegister(LaneMaskRegAttrs);
+Register llvm::createLaneMaskReg(MachineRegisterInfo *MRI,
+                                 Register LaneMaskRegAttrs) {
+  return MRI->cloneVirtualRegister(LaneMaskRegAttrs);
 }
 
-static Register
-insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI,
-                    MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs) {
+static Register insertUndefLaneMask(MachineBasicBlock *MBB,
+                                    MachineRegisterInfo *MRI,
+                                    Register LaneMaskRegAttrs) {
   MachineFunction &MF = *MBB->getParent();
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
@@ -621,7 +619,7 @@ bool PhiLoweringHelper::lowerPhis() {
       for (auto &Incoming : Incomings) {
         MachineBasicBlock &IMBB = *Incoming.Block;
         if (PIA.isSource(IMBB)) {
-          constrainAsLaneMask(Incoming);
+          constrainIncomingRegisterTakenAsIs(Incoming);
           SSAUpdater.AddAvailableValue(&IMBB, Incoming.Reg);
         } else {
           Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
@@ -913,4 +911,6 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
   }
 }
 
-void Vreg1LoweringHelper::constrainAsLaneMask(Incoming &In) {}
+void Vreg1LoweringHelper::constrainIncomingRegisterTakenAsIs(Incoming &In) {
+  return;
+}
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
index 80ea0536b4cbff3..5099d39c2d14155 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
@@ -31,9 +31,7 @@ struct Incoming {
       : Reg(Reg), Block(Block), UpdatedReg(UpdatedReg) {}
 };
 
-Register
-createLaneMaskReg(MachineRegisterInfo *MRI,
-                  MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs);
+Register createLaneMaskReg(MachineRegisterInfo *MRI, Register LaneMaskRegAttrs);
 
 class PhiLoweringHelper {
 public:
@@ -49,7 +47,7 @@ class PhiLoweringHelper {
   MachineRegisterInfo *MRI = nullptr;
   const GCNSubtarget *ST = nullptr;
   const SIInstrInfo *TII = nullptr;
-  MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs;
+  Register LaneMaskRegAttrs;
 
 #ifndef NDEBUG
   DenseSet<Register> PhiRegisters;
@@ -70,8 +68,7 @@ class PhiLoweringHelper {
   getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
 
   void initializeLaneMaskRegisterAttributes(Register LaneMask) {
-    LaneMaskRegAttrs.RCOrRB = &MRI->getRegClassOrRegBank(LaneMask);
-    LaneMaskRegAttrs.Ty = MRI->getType(LaneMask);
+    LaneMaskRegAttrs = LaneMask;
   }
 
   bool isLaneMaskReg(Register Reg) const {
@@ -94,7 +91,7 @@ class PhiLoweringHelper {
                                    MachineBasicBlock::iterator I,
                                    const DebugLoc &DL, Register DstReg,
                                    Register PrevReg, Register CurReg) = 0;
-  virtual void constrainAsLaneMask(Incoming &In) = 0;
+  virtual void constrainIncomingRegisterTakenAsIs(Incoming &In) = 0;
 };
 
 } // end namespace llvm
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
index 06a8f80e6aa34ab..7a68aec1a1c5558 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
@@ -1,6 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
-; REQUIRES: do-not-run-me
+; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
 
 ; Divergent phis that don't require lowering using lane mask merging
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
index 55f22b0bbb4df6f..d314ebe355f51d0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
@@ -1,9 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
-# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
-
-# Test is updated but copies between S1-register-with-reg-class and
-# register-with-reg-class-no-LLT fail machine verification
-# REQUIRES: do-not-run-me-with-machine-verifier
+# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering %s -o - | FileCheck -check-prefix=GFX10 %s
 
 --- |
   define void @divergent_i1_phi_uniform_branch() {ret void}
@@ -50,7 +46,7 @@ body: |
   ; GFX10-NEXT: bb.2:
   ; GFX10-NEXT:   successors: %bb.4(0x80000000)
   ; GFX10-NEXT: {{  $}}
-  ; GFX10-NEXT:   [[PHI:%[0-9]+]]:sreg_32(s1) = G_PHI %14(s1), %bb.3, [[ICMP]](s1), %bb.0
+  ; GFX10-NEXT:   [[PHI:%[0-9]+]]:_(s1) = G_PHI %14(s1), %bb.3, [[ICMP]](s1), %bb.0
   ; GFX10-NEXT:   G_BR %bb.4
   ; GFX10-NEXT: {{  $}}
   ; GFX10-NEXT: bb.3:
@@ -130,7 +126,6 @@ body: |
   ; GFX10-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr0
   ; GFX10-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
   ; GFX10-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]]
-  ; GFX10-NEXT:   [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
   ; GFX10-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; GFX10-NEXT:   [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]]
   ; GFX10-NEXT:   G_BRCOND [[ICMP1]](s1), %bb.2
@@ -141,17 +136,12 @@ body: |
   ; GFX10-NEXT: {{  $}}
   ; GFX10-NEXT:...
[truncated]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

2 participants