[AArch64] Teach areMemAccessesTriviallyDisjoint about scalable widths. #73655

davemgreen · 2023-11-28T14:57:46Z

The base change here is to change getMemOperandWithOffsetWidth to return a TypeSize Width, which in turn allows areMemAccessesTriviallyDisjoint to reason about trivially disjoint widths.

llvmbot · 2023-11-28T14:58:23Z

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

Changes

The base change here is to change getMemOperandWithOffsetWidth to return a TypeSize Width, which in turn allows areMemAccessesTriviallyDisjoint to reason about trivially disjoint widths.

Patch is 31.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73655.diff

7 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+1-2)
(modified) llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (+58-53)
(modified) llvm/lib/Target/AArch64/AArch64InstrInfo.h (+2-2)
(modified) llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp (+2-4)
(modified) llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll (+1-1)
(modified) llvm/test/CodeGen/AArch64/sve-aliasing.ll (+93-93)
(modified) llvm/test/CodeGen/AArch64/sve-insert-vector.ll (+1-1)

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index fd47970bd050596..6faad4ec4acb2c3 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1214,8 +1214,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
       SEH->eraseFromParent();
   }
 
-  TypeSize Scale = TypeSize::getFixed(1);
-  unsigned Width;
+  TypeSize Scale = TypeSize::getFixed(1), Width(0, false);
   int64_t MinOffset, MaxOffset;
   bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
       NewOpc, Scale, Width, MinOffset, MaxOffset);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index a95ab3c7e0f288a..01fecbf4c1408be 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1096,7 +1096,7 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
   const TargetRegisterInfo *TRI = &getRegisterInfo();
   const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
   int64_t OffsetA = 0, OffsetB = 0;
-  unsigned WidthA = 0, WidthB = 0;
+  TypeSize WidthA(0, false), WidthB(0, false);
   bool OffsetAIsScalable = false, OffsetBIsScalable = false;
 
   assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
@@ -1121,8 +1121,9 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
         OffsetAIsScalable == OffsetBIsScalable) {
       int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
       int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
-      int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
-      if (LowOffset + LowWidth <= HighOffset)
+      TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
+      if (LowWidth.isScalable() == OffsetAIsScalable &&
+          LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
         return true;
     }
   }
@@ -2675,9 +2676,15 @@ bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
     return false;
 
   const MachineOperand *BaseOp;
+  TypeSize WidthN(0, false);
   if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
-                                    Width, TRI))
+                                    WidthN, TRI))
     return false;
+  // The maximum vscale is 16 undef AArch64, return the maximal extent for the
+  // vector.
+  Width = WidthN.isScalable()
+              ? WidthN.getKnownMinValue() * AArch64::SVEMaxBitsPerVector / 128
+              : WidthN.getKnownMinValue();
   BaseOps.push_back(BaseOp);
   return true;
 }
@@ -3456,7 +3463,7 @@ MachineInstr *AArch64InstrInfo::emitLdStWithAddr(MachineInstr &MemI,
 
 bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
     const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
-    bool &OffsetIsScalable, unsigned &Width,
+    bool &OffsetIsScalable, TypeSize &Width,
     const TargetRegisterInfo *TRI) const {
   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
   // Handle only loads/stores with base register followed by immediate offset.
@@ -3511,26 +3518,25 @@ AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
 }
 
 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
-                                    unsigned &Width, int64_t &MinOffset,
+                                    TypeSize &Width, int64_t &MinOffset,
                                     int64_t &MaxOffset) {
-  const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8;
   switch (Opcode) {
   // Not a memory operation or something we want to handle.
   default:
     Scale = TypeSize::getFixed(0);
-    Width = 0;
+    Width = TypeSize::getFixed(0);
     MinOffset = MaxOffset = 0;
     return false;
   case AArch64::STRWpost:
   case AArch64::LDRWpost:
-    Width = 32;
+    Width = TypeSize::getFixed(32);
     Scale = TypeSize::getFixed(4);
     MinOffset = -256;
     MaxOffset = 255;
     break;
   case AArch64::LDURQi:
   case AArch64::STURQi:
-    Width = 16;
+    Width = TypeSize::getFixed(16);
     Scale = TypeSize::getFixed(1);
     MinOffset = -256;
     MaxOffset = 255;
@@ -3542,7 +3548,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::STURXi:
   case AArch64::STURDi:
   case AArch64::STLURXi:
-    Width = 8;
+    Width = TypeSize::getFixed(8);
     Scale = TypeSize::getFixed(1);
     MinOffset = -256;
     MaxOffset = 255;
@@ -3555,7 +3561,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::STURWi:
   case AArch64::STURSi:
   case AArch64::STLURWi:
-    Width = 4;
+    Width = TypeSize::getFixed(4);
     Scale = TypeSize::getFixed(1);
     MinOffset = -256;
     MaxOffset = 255;
@@ -3570,7 +3576,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::STURHi:
   case AArch64::STURHHi:
   case AArch64::STLURHi:
-    Width = 2;
+    Width = TypeSize::getFixed(2);
     Scale = TypeSize::getFixed(1);
     MinOffset = -256;
     MaxOffset = 255;
@@ -3585,7 +3591,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::STURBi:
   case AArch64::STURBBi:
   case AArch64::STLURBi:
-    Width = 1;
+    Width = TypeSize::getFixed(1);
     Scale = TypeSize::getFixed(1);
     MinOffset = -256;
     MaxOffset = 255;
@@ -3595,14 +3601,14 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::STPQi:
   case AArch64::STNPQi:
     Scale = TypeSize::getFixed(16);
-    Width = 32;
+    Width = TypeSize::getFixed(32);
     MinOffset = -64;
     MaxOffset = 63;
     break;
   case AArch64::LDRQui:
   case AArch64::STRQui:
     Scale = TypeSize::getFixed(16);
-    Width = 16;
+    Width = TypeSize::getFixed(16);
     MinOffset = 0;
     MaxOffset = 4095;
     break;
@@ -3615,7 +3621,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::STNPXi:
   case AArch64::STNPDi:
     Scale = TypeSize::getFixed(8);
-    Width = 16;
+    Width = TypeSize::getFixed(16);
     MinOffset = -64;
     MaxOffset = 63;
     break;
@@ -3625,14 +3631,14 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::STRXui:
   case AArch64::STRDui:
     Scale = TypeSize::getFixed(8);
-    Width = 8;
+    Width = TypeSize::getFixed(8);
     MinOffset = 0;
     MaxOffset = 4095;
     break;
   case AArch64::StoreSwiftAsyncContext:
     // Store is an STRXui, but there might be an ADDXri in the expansion too.
     Scale = TypeSize::getFixed(1);
-    Width = 8;
+    Width = TypeSize::getFixed(8);
     MinOffset = 0;
     MaxOffset = 4095;
     break;
@@ -3645,7 +3651,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::STNPWi:
   case AArch64::STNPSi:
     Scale = TypeSize::getFixed(4);
-    Width = 8;
+    Width = TypeSize::getFixed(8);
     MinOffset = -64;
     MaxOffset = 63;
     break;
@@ -3655,7 +3661,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::STRWui:
   case AArch64::STRSui:
     Scale = TypeSize::getFixed(4);
-    Width = 4;
+    Width = TypeSize::getFixed(4);
     MinOffset = 0;
     MaxOffset = 4095;
     break;
@@ -3666,7 +3672,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::STRHui:
   case AArch64::STRHHui:
     Scale = TypeSize::getFixed(2);
-    Width = 2;
+    Width = TypeSize::getFixed(2);
     MinOffset = 0;
     MaxOffset = 4095;
     break;
@@ -3677,7 +3683,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::STRBui:
   case AArch64::STRBBui:
     Scale = TypeSize::getFixed(1);
-    Width = 1;
+    Width = TypeSize::getFixed(1);
     MinOffset = 0;
     MaxOffset = 4095;
     break;
@@ -3686,14 +3692,14 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::STPDpre:
   case AArch64::LDPDpost:
     Scale = TypeSize::getFixed(8);
-    Width = 8;
+    Width = TypeSize::getFixed(8);
     MinOffset = -512;
     MaxOffset = 504;
     break;
   case AArch64::STPQpre:
   case AArch64::LDPQpost:
     Scale = TypeSize::getFixed(16);
-    Width = 16;
+    Width = TypeSize::getFixed(16);
     MinOffset = -1024;
     MaxOffset = 1008;
     break;
@@ -3702,26 +3708,26 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::LDRXpost:
   case AArch64::LDRDpost:
     Scale = TypeSize::getFixed(1);
-    Width = 8;
+    Width = TypeSize::getFixed(8);
     MinOffset = -256;
     MaxOffset = 255;
     break;
   case AArch64::STRQpre:
   case AArch64::LDRQpost:
     Scale = TypeSize::getFixed(1);
-    Width = 16;
+    Width = TypeSize::getFixed(16);
     MinOffset = -256;
     MaxOffset = 255;
     break;
   case AArch64::ADDG:
     Scale = TypeSize::getFixed(16);
-    Width = 0;
+    Width = TypeSize::getFixed(0);
     MinOffset = 0;
     MaxOffset = 63;
     break;
   case AArch64::TAGPstack:
     Scale = TypeSize::getFixed(16);
-    Width = 0;
+    Width = TypeSize::getFixed(0);
     // TAGP with a negative offset turns into SUBP, which has a maximum offset
     // of 63 (not 64!).
     MinOffset = -63;
@@ -3731,42 +3737,42 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::STGi:
   case AArch64::STZGi:
     Scale = TypeSize::getFixed(16);
-    Width = 16;
+    Width = TypeSize::getFixed(16);
     MinOffset = -256;
     MaxOffset = 255;
     break;
   case AArch64::STR_ZZZZXI:
   case AArch64::LDR_ZZZZXI:
     Scale = TypeSize::getScalable(16);
-    Width = SVEMaxBytesPerVector * 4;
+    Width = TypeSize::getScalable(16 * 4);
     MinOffset = -256;
     MaxOffset = 252;
     break;
   case AArch64::STR_ZZZXI:
   case AArch64::LDR_ZZZXI:
     Scale = TypeSize::getScalable(16);
-    Width = SVEMaxBytesPerVector * 3;
+    Width = TypeSize::getScalable(16 * 3);
     MinOffset = -256;
     MaxOffset = 253;
     break;
   case AArch64::STR_ZZXI:
   case AArch64::LDR_ZZXI:
     Scale = TypeSize::getScalable(16);
-    Width = SVEMaxBytesPerVector * 2;
+    Width = TypeSize::getScalable(16 * 2);
     MinOffset = -256;
     MaxOffset = 254;
     break;
   case AArch64::LDR_PXI:
   case AArch64::STR_PXI:
     Scale = TypeSize::getScalable(2);
-    Width = SVEMaxBytesPerVector / 8;
+    Width = TypeSize::getScalable(2);
     MinOffset = -256;
     MaxOffset = 255;
     break;
   case AArch64::LDR_ZXI:
   case AArch64::STR_ZXI:
     Scale = TypeSize::getScalable(16);
-    Width = SVEMaxBytesPerVector;
+    Width = TypeSize::getScalable(16);
     MinOffset = -256;
     MaxOffset = 255;
     break;
@@ -3793,7 +3799,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
     // A full vectors worth of data
     // Width = mbytes * elements
     Scale = TypeSize::getScalable(16);
-    Width = SVEMaxBytesPerVector;
+    Width = TypeSize::getScalable(16);
     MinOffset = -8;
     MaxOffset = 7;
     break;
@@ -3806,7 +3812,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::ST2W_IMM:
   case AArch64::ST2D_IMM:
     Scale = TypeSize::getScalable(32);
-    Width = SVEMaxBytesPerVector * 2;
+    Width = TypeSize::getScalable(16 * 2);
     MinOffset = -8;
     MaxOffset = 7;
     break;
@@ -3819,7 +3825,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::ST3W_IMM:
   case AArch64::ST3D_IMM:
     Scale = TypeSize::getScalable(48);
-    Width = SVEMaxBytesPerVector * 3;
+    Width = TypeSize::getScalable(16 * 3);
     MinOffset = -8;
     MaxOffset = 7;
     break;
@@ -3832,7 +3838,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::ST4W_IMM:
   case AArch64::ST4D_IMM:
     Scale = TypeSize::getScalable(64);
-    Width = SVEMaxBytesPerVector * 4;
+    Width = TypeSize::getScalable(16 * 4);
     MinOffset = -8;
     MaxOffset = 7;
     break;
@@ -3854,7 +3860,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
     // A half vector worth of data
     // Width = mbytes * elements
     Scale = TypeSize::getScalable(8);
-    Width = SVEMaxBytesPerVector / 2;
+    Width = TypeSize::getScalable(8);
     MinOffset = -8;
     MaxOffset = 7;
     break;
@@ -3871,7 +3877,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
     // A quarter vector worth of data
     // Width = mbytes * elements
     Scale = TypeSize::getScalable(4);
-    Width = SVEMaxBytesPerVector / 4;
+    Width = TypeSize::getScalable(4);
     MinOffset = -8;
     MaxOffset = 7;
     break;
@@ -3883,20 +3889,20 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
     // A eighth vector worth of data
     // Width = mbytes * elements
     Scale = TypeSize::getScalable(2);
-    Width = SVEMaxBytesPerVector / 8;
+    Width = TypeSize::getScalable(2);
     MinOffset = -8;
     MaxOffset = 7;
     break;
   case AArch64::ST2Gi:
   case AArch64::STZ2Gi:
     Scale = TypeSize::getFixed(16);
-    Width = 32;
+    Width = TypeSize::getFixed(32);
     MinOffset = -256;
     MaxOffset = 255;
     break;
   case AArch64::STGPi:
     Scale = TypeSize::getFixed(16);
-    Width = 16;
+    Width = TypeSize::getFixed(16);
     MinOffset = -64;
     MaxOffset = 63;
     break;
@@ -3908,7 +3914,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::LD1RSB_S_IMM:
   case AArch64::LD1RSB_D_IMM:
     Scale = TypeSize::getFixed(1);
-    Width = 1;
+    Width = TypeSize::getFixed(1);
     MinOffset = 0;
     MaxOffset = 63;
     break;
@@ -3918,7 +3924,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::LD1RSH_S_IMM:
   case AArch64::LD1RSH_D_IMM:
     Scale = TypeSize::getFixed(2);
-    Width = 2;
+    Width = TypeSize::getFixed(2);
     MinOffset = 0;
     MaxOffset = 63;
     break;
@@ -3926,13 +3932,13 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   case AArch64::LD1RW_D_IMM:
   case AArch64::LD1RSW_IMM:
     Scale = TypeSize::getFixed(4);
-    Width = 4;
+    Width = TypeSize::getFixed(4);
     MinOffset = 0;
     MaxOffset = 63;
     break;
   case AArch64::LD1RD_IMM:
     Scale = TypeSize::getFixed(8);
-    Width = 8;
+    Width = TypeSize::getFixed(8);
     MinOffset = 0;
     MaxOffset = 63;
     break;
@@ -5634,8 +5640,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
   }
 
   // Get the min/max offset and the scale.
-  TypeSize ScaleValue(0U, false);
-  unsigned Width;
+  TypeSize ScaleValue(0U, false), Width(0U, false);
   int64_t MinOff, MaxOff;
   if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
                                       MaxOff))
@@ -8392,8 +8397,8 @@ AArch64InstrInfo::getOutliningCandidateInfo(
       // if fixing it up would be in range.
       int64_t MinOffset,
           MaxOffset;  // Unscaled offsets for the instruction.
-      TypeSize Scale(0U, false); // The scale to multiply the offsets by.
-      unsigned DummyWidth;
+      // The scale to multiply the offsets by.
+      TypeSize Scale(0U, false), DummyWidth(0, false);
       getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
 
       Offset += 16; // Update the offset to what it would be if we outlined.
@@ -8898,7 +8903,7 @@ AArch64InstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT,
 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
   for (MachineInstr &MI : MBB) {
     const MachineOperand *Base;
-    unsigned Width;
+    TypeSize Width(0, false);
     int64_t Offset;
     bool OffsetIsScalable;
 
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index a934103c90cbf92..8835e6a1779983e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -165,7 +165,7 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
   bool getMemOperandWithOffsetWidth(const MachineInstr &MI,
                                     const MachineOperand *&BaseOp,
                                     int64_t &Offset, bool &OffsetIsScalable,
-                                    unsigned &Width,
+                                    TypeSize &Width,
                                     const TargetRegisterInfo *TRI) const;
 
   /// Return the immediate offset of the base register in a load/store \p LdSt.
@@ -175,7 +175,7 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
   /// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly.
   ///
   /// For unscaled instructions, \p Scale is set to 1.
-  static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, unsigned &Width,
+  static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width,
                            int64_t &MinOffset, int64_t &MaxOffset);
 
   bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
diff --git a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
index b8b74ae8404d3f3..4afc678abaca63c 100644
--- a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
@@ -220,8 +220,7 @@ static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
       Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui;
   }
   // The implicit scale for Offset is 8.
-  TypeSize Scale(0U, false);
-  unsigned Width;
+  TypeSize Scale(0U, false), Width(0U, false);
   int64_t MinOffset, MaxOffset;
   [[maybe_unused]] bool Success =
       AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
@@ -262,8 +261,7 @@ static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
       Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui;
   }
   // The implicit scale for Offset is 8.
-  TypeSize Scale(0U, false);
-  unsigned Width;
+  TypeSize Scale(0U, false), Width(0U, false);
   int64_t MinOffset, MaxOffset;
   [[maybe_unused]] bool Success =
       AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
index 110f0ef7f4a5500..7244ac949ab88c3 100644
--- a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
+++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
@@ -59,9 +59,9 @@ define void @array_1D_insert(ptr %addr, %my_subtype %elt) #0 {
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0, #2, mul vl]
 ; CHECK-NEXT:    ld1d { z2.d }, p0/z, [x0]
+; CHECK-NEXT:    st1d { z0.d }, p0, [sp, #1, mul vl]
 ; CHECK-NEXT:    st1d { z1.d }, p0, [sp, #2, mul vl]
 ; CHECK-NEXT:    st1d { z2.d }, p0, [sp]
-; CHECK-NEXT:    st1d { z0.d }, p0, [sp, #1, mul vl]
 ; CHECK-NEXT:    addvl sp, sp, #3
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-aliasing.ll b/llvm/test/CodeGen/AArch64/sve-aliasing.ll
index a6d7e1c0fbab173..a83dc494b3bd2c4 100644
--- a/llvm/test/CodeGen/AArch64/sve-aliasing.ll
+++ b/llvm/test/CodeGen/AArch64/sve-aliasing.ll
@@ -8,15 +8,15 @@ define void @scalable_v16i8(ptr noalias nocapture noundef %l0) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
-; CHECK-NEXT:    movprfx z1, z0
-; CHECK-NEXT:    mul z1.b, p0/m, z1.b, z0.b
-; CHECK-NEXT:    eor z0.d, z1.d, z0.d
+; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT:    movprfx z2, z0
+; CHECK-NEXT:    mul z2.b, p0/m, z2.b, z0.b
+; CHECK-NEXT:    movprfx z3, z1
+; CHECK-NEXT:    mul z3.b, p0/m, z3.b, z1.b
+; CHECK-NEXT:    eor z0.d, z2.d, z0.d
+; CHECK-NEXT:    eor z1.d, z3.d, z1.d
 ; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
-; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT:    movprfx z1, z0
-; CHECK-NEXT:    mul z1.b, p0/m, z1.b, z0.b
-; CHECK-NEXT:    eor z0.d, z1.d, z0.d
-; CHECK-NEXT:    st1b { z0.b }, p0, [x0, #1, mul vl]
+; CHECK-NEXT:    st1b { z1.b }, p0, [x0, #1, mul vl]
 ; CHECK-NEXT:    ret
   %l3 = load <vscale x 16 x i8>, ptr %l0, align 16
   %l5 = mul <vscale x 16 x i8> %l3, %l3
@@ -37,15 +37,15 @@ define void @scalable_v8i16(ptr noalias nocapture nou...
[truncated]

harviniriawan · 2023-11-28T15:28:56Z

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

@@ -3918,21 +3924,21 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
  case AArch64::LD1RSH_S_IMM:
  case AArch64::LD1RSH_D_IMM:
    Scale = TypeSize::getFixed(2);
-    Width = 2;
+    Width = TypeSize::getFixed(2);
    MinOffset = 0;
    MaxOffset = 63;
    break;
  case AArch64::LD1RW_IMM:
  case AArch64::LD1RW_D_IMM:
  case AArch64::LD1RSW_IMM:
    Scale = TypeSize::getFixed(4);


aren't LD1RSW* SVE instructions as well? Any reasons why we don't change them to getScalable?

I believe they load a scalar and broadcast them to all lanes, so only load a fixed number of bytes is read from memory.

ah ok yeah makes sense

harviniriawan · 2023-11-28T15:48:53Z

lgtm

david-arm

This looks like a nice improvement to alias analysis! I just had a few minor comments ...

david-arm · 2023-11-29T16:10:15Z

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

@@ -1214,8 +1214,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
      SEH->eraseFromParent();
  }

-  TypeSize Scale = TypeSize::getFixed(1);
-  unsigned Width;
+  TypeSize Scale = TypeSize::getFixed(1), Width(0, false);


I think it would be nice to use consistent notation here and have

TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);

david-arm · 2023-11-29T16:36:48Z

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

    return false;
+  // The maximum vscale is 16 undef AArch64, return the maximal extent for the


Should this comment be

// The maximum vscale is 16 for AArch64

?

david-arm · 2023-11-29T16:55:32Z

llvm/test/CodeGen/AArch64/sve-aliasing.ll

Do we have tests for loads and stores of predicate vectors, i.e. <vscale x 16 x i1>, <vscale x 8 x i1>, <vscale x 4 x i1>, <vscale x 2 x i1>? I'd expect aliasing information to be useful for those cases too.

We don't seem to be able to generate offset versions of these instructions at the moment (except for stack spills/reloads), so I've made it a mir test.

The base change here is to change getMemOperandWithOffsetWidth to return a TypeSize Width, which in turn allows areMemAccessesTriviallyDisjoint to reason about trivially disjoint widths.

david-arm

LGTM! Thanks for making the changes.

davemgreen requested review from paulwalker-arm, harviniriawan, sdesmalen-arm, efriedma-quic and david-arm November 28, 2023 14:57

llvmbot added the backend:AArch64 label Nov 28, 2023

harviniriawan reviewed Nov 28, 2023

View reviewed changes

david-arm reviewed Nov 29, 2023

View reviewed changes

[AArch64] Teach areMemAccessesTriviallyDisjoint about scalable widths.

25f4c23

The base change here is to change getMemOperandWithOffsetWidth to return a TypeSize Width, which in turn allows areMemAccessesTriviallyDisjoint to reason about trivially disjoint widths.

davemgreen force-pushed the gh-sve-trivialdisjointscalable branch from 0ef1a94 to 25f4c23 Compare November 30, 2023 13:39

david-arm approved these changes Nov 30, 2023

View reviewed changes

davemgreen merged commit 4d80122 into llvm:main Nov 30, 2023
2 of 3 checks passed

davemgreen deleted the gh-sve-trivialdisjointscalable branch November 30, 2023 16:54

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AArch64] Teach areMemAccessesTriviallyDisjoint about scalable widths. #73655

[AArch64] Teach areMemAccessesTriviallyDisjoint about scalable widths. #73655

davemgreen commented Nov 28, 2023

llvmbot commented Nov 28, 2023

harviniriawan Nov 28, 2023 •

edited

davemgreen Nov 28, 2023

harviniriawan Nov 28, 2023

harviniriawan commented Nov 28, 2023

david-arm left a comment

david-arm Nov 29, 2023

david-arm Nov 29, 2023

david-arm Nov 29, 2023

davemgreen Nov 30, 2023

david-arm left a comment

		return false;
		// The maximum vscale is 16 undef AArch64, return the maximal extent for the

[AArch64] Teach areMemAccessesTriviallyDisjoint about scalable widths. #73655

[AArch64] Teach areMemAccessesTriviallyDisjoint about scalable widths. #73655

Conversation

davemgreen commented Nov 28, 2023

llvmbot commented Nov 28, 2023

harviniriawan Nov 28, 2023 • edited

Choose a reason for hiding this comment

davemgreen Nov 28, 2023

Choose a reason for hiding this comment

harviniriawan Nov 28, 2023

Choose a reason for hiding this comment

harviniriawan commented Nov 28, 2023

david-arm left a comment

Choose a reason for hiding this comment

david-arm Nov 29, 2023

Choose a reason for hiding this comment

david-arm Nov 29, 2023

Choose a reason for hiding this comment

david-arm Nov 29, 2023

Choose a reason for hiding this comment

davemgreen Nov 30, 2023

Choose a reason for hiding this comment

david-arm left a comment

Choose a reason for hiding this comment

harviniriawan Nov 28, 2023 •

edited