llvm · vikramRH · Nov 20, 2025 · Nov 21, 2025 · jayfoad · Nov 20, 2025
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -41,10 +41,10 @@ class SIShrinkInstructions {
   bool isKUImmOperand(const MachineOperand &Src) const;
   bool isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const;
   void copyExtraImplicitOps(MachineInstr &NewMI, MachineInstr &MI) const;
-  void shrinkScalarCompare(MachineInstr &MI) const;
-  void shrinkMIMG(MachineInstr &MI) const;
-  void shrinkMadFma(MachineInstr &MI) const;
-  bool shrinkScalarLogicOp(MachineInstr &MI) const;
+  bool shrinkScalarCompare(MachineInstr &MI) const;
+  bool shrinkMIMG(MachineInstr &MI) const;
+  bool shrinkMadFma(MachineInstr &MI) const;
+  bool shrinkScalarLogicOp(MachineInstr &MI, bool &MoveIterator) const;
   bool tryReplaceDeadSDST(MachineInstr &MI) const;
   bool instAccessReg(iterator_range<MachineInstr::const_mop_iterator> &&R,
                      Register Reg, unsigned SubReg) const;
@@ -241,27 +241,30 @@ void SIShrinkInstructions::copyExtraImplicitOps(MachineInstr &NewMI,
   }
 }
 
-void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
+bool SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
   if (!ST->hasSCmpK())
-    return;
+    return false;
 
   // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
   // get constants on the RHS.
-  if (!MI.getOperand(0).isReg())
-    TII->commuteInstruction(MI, false, 0, 1);
+  bool Changed = false;
+  if (!MI.getOperand(0).isReg()) {
+    if (TII->commuteInstruction(MI, false, 0, 1))
+      Changed = true;
-    if (TII->commuteInstruction(MI, false, 0, 1))
-      Changed = true;
+    Changed |= TII->commuteInstruction(MI, false, 0, 1);
-    if (TII->commuteInstruction(MI, false, 0, 1))
-      Changed = true;
+    Changed |= TII->commuteInstruction(MI, false, 0, 1);
+  }
 
   // cmpk requires src0 to be a register
   const MachineOperand &Src0 = MI.getOperand(0);
   if (!Src0.isReg())
-    return;
+    return Changed;
 
   MachineOperand &Src1 = MI.getOperand(1);
   if (!Src1.isImm())
-    return;
+    return Changed;
 
   int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
   if (SOPKOpc == -1)
-    return;
+    return Changed;
 
   // eq/ne is special because the imm16 can be treated as signed or unsigned,
   // and initially selected to the unsigned versions.
@@ -275,9 +278,10 @@ void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
       }
 
       MI.setDesc(TII->get(SOPKOpc));
+      Changed = true;
     }
 
-    return;
+    return Changed;
   }
 
   const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
@@ -287,14 +291,16 @@ void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
     if (!SIInstrInfo::sopkIsZext(SOPKOpc))
       Src1.setImm(SignExtend64(Src1.getImm(), 32));
     MI.setDesc(NewDesc);
+    Changed = true;
   }
+  return Changed;
 }
 
 // Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
-void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
+bool SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
   if (!Info)
-    return;
+    return false;
 
   uint8_t NewEncoding;
   switch (Info->MIMGEncoding) {
@@ -305,7 +311,7 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
     NewEncoding = AMDGPU::MIMGEncGfx11Default;
     break;
   default:
-    return;
+    return false;
   }
 
   int VAddr0Idx =
@@ -359,7 +365,7 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
     } else if (Vgpr == NextVgpr) {
       NextVgpr = Vgpr + Dwords;
     } else {
-      return;
+      return false;
     }
 
     if (!Op.isUndef())
@@ -369,7 +375,7 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
   }
 
   if (VgprBase + NewAddrDwords > 256)
-    return;
+    return false;
 
   // Further check for implicit tied operands - this may be present if TFE is
   // enabled
@@ -408,21 +414,22 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),
         ToUntie - (EndVAddr - 1));
   }
+  return true;
 }
 
 // Shrink MAD to MADAK/MADMK and FMA to FMAAK/FMAMK.
-void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
+bool SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
   // Pre-GFX10 VOP3 instructions like MAD/FMA cannot take a literal operand so
   // there is no reason to try to shrink them.
   if (!ST->hasVOP3Literal())
-    return;
+    return false;
 
   // There is no advantage to doing this pre-RA.
   if (!IsPostRA)
-    return;
+    return false;
 
   if (TII->hasAnyModifiersSet(MI))
-    return;
+    return false;
 
   const unsigned Opcode = MI.getOpcode();
   MachineOperand &Src0 = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);
@@ -439,7 +446,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
     else if (Src0.isReg() && TRI->isVGPR(*MRI, Src0.getReg()))
       Swap = true;
     else
-      return;
+      return false;
 
     switch (Opcode) {
     default:
@@ -477,7 +484,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
     else if (Src0.isImm() && !TII->isInlineConstant(Src0))
       Swap = true;
     else
-      return;
+      return false;
 
     switch (Opcode) {
     default:
@@ -509,10 +516,10 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
   }
 
   if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
-    return;
+    return false;
 
   if (AMDGPU::isTrue16Inst(NewOpcode) && !shouldShrinkTrue16(MI))
-    return;
+    return false;
 
   if (Swap) {
     // Swap Src0 and Src1 by building a new instruction.
@@ -527,14 +534,18 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
     TII->removeModOperands(MI);
     MI.setDesc(TII->get(NewOpcode));
   }
+  return true;
 }
 
 /// Attempt to shrink AND/OR/XOR operations requiring non-inlineable literals.
 /// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
 /// If the inverse of the immediate is legal, use ANDN2, ORN2 or
 /// XNOR (as a ^ b == ~(a ^ ~b)).
-/// \returns true if the caller should continue the machine function iterator
-bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
+/// \param MoveIterator true if the caller should continue the machine function
+/// iterator.
+/// \return true if MI was modified.
+bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI,
+                                               bool &MoveIterator) const {
   unsigned Opc = MI.getOpcode();
   const MachineOperand *Dest = &MI.getOperand(0);
   MachineOperand *Src0 = &MI.getOperand(1);
@@ -580,6 +591,7 @@ bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
     if (Dest->getReg().isVirtual() && SrcReg->isReg()) {
       MRI->setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
       MRI->setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
+      MoveIterator = true;
       return true;
     }
 
@@ -598,6 +610,7 @@ bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
       } else {
         SrcImm->setImm(NewImm);
       }
+      return true;
     }
   }
 
@@ -856,6 +869,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
   IsPostRA = MF.getProperties().hasNoVRegs();
 
   unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
+  bool Changed = false;
 
   for (MachineBasicBlock &MBB : MF) {
     MachineBasicBlock::iterator I, Next;
@@ -879,6 +893,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
           if (ModOpcode != 0) {
             MI.setDesc(TII->get(ModOpcode));
             Src.setImm(static_cast<int64_t>(ModImm));
+            Changed = true;
             continue;
           }
         }
@@ -889,6 +904,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
                             MI.getOpcode() == AMDGPU::COPY)) {
         if (auto *NextMI = matchSwap(MI)) {
           Next = NextMI->getIterator();
+          Changed = true;
           continue;
         }
       }
@@ -901,8 +917,10 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
         MachineOperand *Src1 = &MI.getOperand(2);
 
         if (!Src0->isReg() && Src1->isReg()) {
-          if (TII->commuteInstruction(MI, false, 1, 2))
+          if (TII->commuteInstruction(MI, false, 1, 2)) {
             std::swap(Src0, Src1);
+            Changed = true;
+          }
         }
 
         // FIXME: This could work better if hints worked with subregisters. If
@@ -922,13 +940,14 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
             Src1->setImm(SignExtend64(Src1->getImm(), 32));
             MI.setDesc(TII->get(Opc));
             MI.tieOperands(0, 1);
+            Changed = true;
           }
         }
       }
 
       // Try to use s_cmpk_*
       if (MI.isCompare() && TII->isSOPC(MI)) {
-        shrinkScalarCompare(MI);
+        Changed |= shrinkScalarCompare(MI);
         continue;
       }
 
@@ -943,10 +962,12 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
           if (isKImmOperand(Src)) {
             MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
             Src.setImm(SignExtend64(Src.getImm(), 32));
+            Changed = true;
           } else if ((ModOpc = canModifyToInlineImmOp32(TII, Src, ModImm,
                                                         /*Scalar=*/true))) {
             MI.setDesc(TII->get(ModOpc));
             Src.setImm(static_cast<int64_t>(ModImm));
+            Changed = true;
           }
         }
 
@@ -957,13 +978,15 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
       if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
           MI.getOpcode() == AMDGPU::S_OR_B32 ||
           MI.getOpcode() == AMDGPU::S_XOR_B32) {
-        if (shrinkScalarLogicOp(MI))
+        bool MoveIterator = false;
+        Changed |= shrinkScalarLogicOp(MI, MoveIterator);
+        if (MoveIterator)
           continue;
       }
 
       if (IsPostRA && TII->isMIMG(MI.getOpcode()) &&
           ST->getGeneration() >= AMDGPUSubtarget::GFX10) {
-        shrinkMIMG(MI);
+        Changed |= shrinkMIMG(MI);
         continue;
       }
 
@@ -979,14 +1002,14 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
           MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64 ||
           (MI.getOpcode() == AMDGPU::V_FMA_F64_e64 &&
            ST->hasFmaakFmamkF64Insts())) {
-        shrinkMadFma(MI);
+        Changed |= shrinkMadFma(MI);
         continue;
       }
 
       // If there is no chance we will shrink it and use VCC as sdst to get
       // a 32 bit form try to replace dead sdst with NULL.
       if (TII->isVOP3(MI.getOpcode())) {
-        tryReplaceDeadSDST(MI);
+        Changed |= tryReplaceDeadSDST(MI);
         if (!TII->hasVALU32BitEncoding(MI.getOpcode())) {
           continue;
         }
@@ -997,9 +1020,12 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
         // it.
         if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
             !TII->canShrink(MI, *MRI)) {
-          tryReplaceDeadSDST(MI);
+          Changed |= tryReplaceDeadSDST(MI);
           continue;
         }
+
+        // Operands were commuted.
+        Changed = true;
       }
 
       int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
@@ -1103,9 +1129,10 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
       foldImmediates(*Inst32);
 
       LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
+      Changed = true;
     }
   }
-  return false;
+  return Changed;
 }
 
 bool SIShrinkInstructionsLegacy::runOnMachineFunction(MachineFunction &MF) {