[TTI] Support scalable offsets in getScalingFactorCost #88113

huntergr-arm · 2024-04-09T12:15:23Z

Part of the work to support vscale-relative immediates in LSR.

No tests added yet, but I'd like feedback on the approach.

llvmbot · 2024-04-09T12:15:59Z

@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-backend-x86
@llvm/pr-subscribers-backend-arm

@llvm/pr-subscribers-llvm-analysis

Author: Graham Hunter (huntergr-arm)

Changes

Part of the work to support vscale-relative immediates in LSR.

No tests added yet, but I'd like feedback on the approach.

Full diff: https://github.com/llvm/llvm-project/pull/88113.diff

10 Files Affected:

(modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+7-6)
(modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+3-3)
(modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+3-1)
(modified) llvm/lib/Analysis/TargetTransformInfo.cpp (+2-2)
(modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+4-4)
(modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h (+2-1)
(modified) llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp (+3-1)
(modified) llvm/lib/Target/ARM/ARMTargetTransformInfo.h (+2-1)
(modified) llvm/lib/Target/X86/X86TargetTransformInfo.cpp (+2-1)
(modified) llvm/lib/Target/X86/X86TargetTransformInfo.h (+2-1)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index fa9392b86c15b9..4c6b8e312786cc 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -835,8 +835,8 @@ class TargetTransformInfo {
   /// TODO: Handle pre/postinc as well.
   InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                        int64_t BaseOffset, bool HasBaseReg,
-                                       int64_t Scale,
-                                       unsigned AddrSpace = 0) const;
+                                       int64_t Scale, unsigned AddrSpace = 0,
+                                       int64_t ScalableOffset = 0) const;
 
   /// Return true if the loop strength reduce pass should make
   /// Instruction* based TTI queries to isLegalAddressingMode(). This is
@@ -1894,7 +1894,8 @@ class TargetTransformInfo::Concept {
   virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                                int64_t BaseOffset,
                                                bool HasBaseReg, int64_t Scale,
-                                               unsigned AddrSpace) = 0;
+                                               unsigned AddrSpace,
+                                               int64_t ScalableOffset) = 0;
   virtual bool LSRWithInstrQueries() = 0;
   virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
   virtual bool isProfitableToHoist(Instruction *I) = 0;
@@ -2406,10 +2407,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
   }
   InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                        int64_t BaseOffset, bool HasBaseReg,
-                                       int64_t Scale,
-                                       unsigned AddrSpace) override {
+                                       int64_t Scale, unsigned AddrSpace,
+                                       int64_t ScalableOffset) override {
     return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
-                                     AddrSpace);
+                                     AddrSpace, ScalableOffset);
   }
   bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
   bool isTruncateFree(Type *Ty1, Type *Ty2) override {
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 63c2ef8912b29c..72c7b805abbb67 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -327,11 +327,11 @@ class TargetTransformInfoImplBase {
 
   InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                        int64_t BaseOffset, bool HasBaseReg,
-                                       int64_t Scale,
-                                       unsigned AddrSpace) const {
+                                       int64_t Scale, unsigned AddrSpace,
+                                       int64_t ScalableOffset) const {
     // Guess that all legal addressing mode are free.
     if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
-                              AddrSpace))
+                              AddrSpace, /*I=*/nullptr, ScalableOffset))
       return 0;
     return -1;
   }
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 42d8f74fd427fb..7f42e239d85d96 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -406,12 +406,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
 
   InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                        int64_t BaseOffset, bool HasBaseReg,
-                                       int64_t Scale, unsigned AddrSpace) {
+                                       int64_t Scale, unsigned AddrSpace,
+                                       int64_t ScalableOffset) {
     TargetLoweringBase::AddrMode AM;
     AM.BaseGV = BaseGV;
     AM.BaseOffs = BaseOffset;
     AM.HasBaseReg = HasBaseReg;
     AM.Scale = Scale;
+    AM.ScalableOffset = ScalableOffset;
     if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace))
       return 0;
     return -1;
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 5f933b4587843c..d00ab62bad9fad 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -532,9 +532,9 @@ bool TargetTransformInfo::prefersVectorizedAddressing() const {
 
 InstructionCost TargetTransformInfo::getScalingFactorCost(
     Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
-    int64_t Scale, unsigned AddrSpace) const {
+    int64_t Scale, unsigned AddrSpace, int64_t ScalableOffset) const {
   InstructionCost Cost = TTIImpl->getScalingFactorCost(
-      Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace);
+      Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace, ScalableOffset);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index ee7137b92445bb..2b75f0ea2d4d6f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4118,10 +4118,9 @@ bool AArch64TTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) {
   return NumInsns >= SVETailFoldInsnThreshold;
 }
 
-InstructionCost
-AArch64TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
-                                     int64_t BaseOffset, bool HasBaseReg,
-                                     int64_t Scale, unsigned AddrSpace) const {
+InstructionCost AArch64TTIImpl::getScalingFactorCost(
+    Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
+    int64_t Scale, unsigned AddrSpace, int64_t ScalableOffset) const {
   // Scaling factors are not free at all.
   // Operands                     | Rt Latency
   // -------------------------------------------
@@ -4134,6 +4133,7 @@ AArch64TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
   AM.BaseOffs = BaseOffset;
   AM.HasBaseReg = HasBaseReg;
   AM.Scale = Scale;
+  AM.ScalableOffset = ScalableOffset;
   if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace))
     // Scale represents reg2 * scale, thus account for 1 if
     // it is not equal to 0 or 1.
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index de39dea2be43e1..0f7315446c70d4 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -407,7 +407,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
   /// If the AM is not supported, it returns a negative value.
   InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                        int64_t BaseOffset, bool HasBaseReg,
-                                       int64_t Scale, unsigned AddrSpace) const;
+                                       int64_t Scale, unsigned AddrSpace,
+                                       int64_t ScalableOffset) const;
   /// @}
 
   bool enableSelectOptimize() { return ST->enableSelectOptimize(); }
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 3be894ad3bef2c..73e47fbea23057 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -2572,12 +2572,14 @@ bool ARMTTIImpl::preferPredicatedReductionSelect(
 InstructionCost ARMTTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                                  int64_t BaseOffset,
                                                  bool HasBaseReg, int64_t Scale,
-                                                 unsigned AddrSpace) const {
+                                                 unsigned AddrSpace,
+                                                 int64_t ScalableOffset) const {
   TargetLoweringBase::AddrMode AM;
   AM.BaseGV = BaseGV;
   AM.BaseOffs = BaseOffset;
   AM.HasBaseReg = HasBaseReg;
   AM.Scale = Scale;
+  AM.ScalableOffset = ScalableOffset;
   if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace)) {
     if (ST->hasFPAO())
       return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index bb4b321b530091..10e4b2977a563a 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -303,7 +303,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
   /// If the AM is not supported, the return value must be negative.
   InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                        int64_t BaseOffset, bool HasBaseReg,
-                                       int64_t Scale, unsigned AddrSpace) const;
+                                       int64_t Scale, unsigned AddrSpace,
+                                       int64_t ScalableOffset) const;
 
   bool maybeLoweredToCall(Instruction &I);
   bool isLoweredToCall(const Function *F);
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 5d1810b5bc2c6f..0cfa1da2ce7d78 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -6670,7 +6670,8 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
 InstructionCost X86TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                                  int64_t BaseOffset,
                                                  bool HasBaseReg, int64_t Scale,
-                                                 unsigned AddrSpace) const {
+                                                 unsigned AddrSpace,
+                                                 int64_t ScalableOffset) const {
   // Scaling factors are not free at all.
   // An indexed folded instruction, i.e., inst (reg1, reg2, scale),
   // will take 2 allocations in the out of order engine instead of 1
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 985b00438ce878..060b2b98b341da 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -253,7 +253,8 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
   /// If the AM is not supported, it returns a negative value.
   InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                        int64_t BaseOffset, bool HasBaseReg,
-                                       int64_t Scale, unsigned AddrSpace) const;
+                                       int64_t Scale, unsigned AddrSpace,
+                                       int64_t ScalableOffset) const;
 
   bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
                      const TargetTransformInfo::LSRCost &C2);

paulwalker-arm · 2024-04-18T11:26:04Z

While I appreciate it comes with a significant refactoring cost my initial reaction is these interfaces should take StackOffset rather than treating fixed and scalable offsets as separate entities.

huntergr-arm · 2024-04-23T14:42:27Z

Rebased, switched to StackOffset. There's a potential problem with different approaches here -- we're using StackOffset here now, but two scalars in isLegalAddressingMode, and we have two different functions for isLegalAddImmediate. I think we should be more consistent, though I don't have a preference on which one to go with.

Another possibility is to significantly change the interface of this function, to only take the Scale argument. This interface is only used in LSR, and basically acts as a wrapper around isLegalAddressingMode -- it just returns an InstructionCost based on the Scale if the addressing mode is legal, and asserts otherwise (the base dispatcher asserts if the Cost is negative, and the specialized versions return -1 if the mode was not legal). So you already need to know that the addressing mode is valid, and you just want to know any additional cost of a scaled register.

sdesmalen-arm · 2024-04-24T08:42:33Z

llvm/include/llvm/Analysis/TargetTransformInfo.h

@@ -834,7 +834,7 @@ class TargetTransformInfo {
  /// If the AM is not supported, it returns a negative value.
  /// TODO: Handle pre/postinc as well.
  InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
-                                       int64_t BaseOffset, bool HasBaseReg,
+                                       StackOffset BaseOffset, bool HasBaseReg,


It seems we now have three approaches for passing a 'scalable' and a 'fixed-length' offset to interfaces for this:

One using StackOffset (this patch for getScalingFactorCost)

One passing in two separate variables (isLegalAddressingMode)

Another one with two separate interfaces (isLegalAddImmediate vs isLegalAddScalableImmediate)

It would be good to settle on a single approach. @paulwalker-arm is there a particular reason you're pushing for (1) for this interface?

If we go for (1), I think we should rename StackOffset to MemOffset to make it more generic, because in this instance the offset does not necessarily apply only to stack addresses.

I made my request because the function was changed to take both a fixed-length and a scalar offset, which is exactly why StackOffset exists (albeit, as you say, with a name that's now out of date). I think all offset related common code interfaces should be unified to accept the fact they can be fixed or scaled and thus use whichever of the TypeSize.h family of types that best fits the specific need. I'll note we are missing a generic type like TypeSize but signed, to make this truly possible.

FWIW, my original patches did have a new type like that -- I originally called it AddressOffset, then TargetImmediate. I got some feedback that separating the terms out might be better, so we ended up with different int64_ts.

If we end up with an address formula with both fixed and scalable immediate terms, we'll have to prioritize one over the for a given target other due to the way LSR currently forms the Uses it will evaluate. AFAIK nobody implements instructions with mixed fixed and scalable immediates, so one or the other immediate would need to become part of the base for that formula.

davemgreen · 2024-05-01T11:43:20Z

Hi. An AddrMode should be thought of as a formula - a sum of factors that make up the addressing mode.

  /// This represents an addressing mode of:
  ///    BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*vscale
  /// If BaseGV is null,  there is no BaseGV.
  /// If BaseOffs is zero, there is no base offset.
  /// If HasBaseReg is false, there is no base register.
  /// If Scale is zero, there is no ScaleReg.  Scale of 1 indicates a reg with
  /// no scale.
  /// If ScalableOffset is zero, there is no scalable offset.

IMO the BaseOffset and ScalableOffset are separate quantities that should not be conflated. This isn't like TypeSize where it needs to be one value that is fixed or scalable, it is two values that can be treated separately. (Which has the added benefit that all the backend that don't care about scalable vectors can keep not caring about them, as ScalableOffset will always be 0 and they can treat BaseOffs as a simple integer).

#88124 has quite a lot going on though, it is not obvious to me what this would mean for LSR to treat BaseOffset and ScalableOffset separately throughout?

paulwalker-arm · 2024-05-01T12:07:07Z

I agree, but my formula is "BaseGV + StackOffset + BaseReg + Scale*ScaleReg", which results in common code that matches the other instances where an offset has fixed and scalable parts. StackOffset is not like TypeSize being it represents both fixed and scalable offsets independently and thus any backend can simply ignore the scalable side the same as they'd ignore ScalableOffset.

I'm not hugely familiar with this code so I don't know if we truly need such independence, but if we don't then I'd still prefer the use of a common type rather than say, passing an int and bool separately. I just see no reason to treat scalable vectors as a second class citizen.

paulwalker-arm · 2024-05-03T14:36:26Z

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

  AM.HasBaseReg = HasBaseReg;
  AM.Scale = Scale;
+  assert(!BaseOffset.getScalable() && "Scalable offsets unsupported");


I'm not sure this assert is necessary. I'd just assign AM.ScalableOffset as normal. It's really up to the users of AM to assert ScalableOffset is zero. This is essentially what would need to happen is AM used the correct types to start within. In general though I'd expect most target to simply ignore the ScalableOffset field.

paulwalker-arm · 2024-05-03T14:36:55Z

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

  AM.HasBaseReg = HasBaseReg;
  AM.Scale = Scale;
+  assert(!BaseOffset.getScalable() && "Scalable offsets unsupported");


As above, this is the responsibility of the users of AM.

paulwalker-arm

Based on the current definition of TargetLoweringBase::AddrMode I'm happy with this change. As Sander points out, there's now multiple ways to represent the same thing, which is messy and so I'd love to see some follow on work to make TargetLoweringBase::AddrMode use the correct types, be that StackOffset if the current requirement of BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*vscale is what we want, or something else if the actual goal is for BaseOffs to be fixed OR scalable.

paulwalker-arm · 2024-05-08T10:28:10Z

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

@@ -32,6 +32,8 @@ class Function;
 /// Base class for use as a mix-in that aids implementing
 /// a TargetTransformInfo-compatible class.
 class TargetTransformInfoImplBase {
+  friend class TargetTransformInfo;


I just wondered why this is needed?

It's a leftover from a previous attempt at making an overloaded method to call the existing fixed-only method from the base implementation if there wasn't an override of the newer method. It didn't work out, so I've removed it. Thanks for spotting it.

huntergr-arm requested review from paulwalker-arm and sdesmalen-arm April 9, 2024 12:15

llvmbot added backend:ARM backend:AArch64 backend:X86 llvm:analysis labels Apr 9, 2024

huntergr-arm requested a review from davemgreen April 10, 2024 09:00

huntergr-arm added 2 commits April 23, 2024 14:57

[TTI] Support scalable offsets in getScalingFactorCost

49ee8d7

Switch to StackOffset

2a61c4d

huntergr-arm force-pushed the scalable-offset-getScalingFactorCost branch from 58204d6 to 2a61c4d Compare April 23, 2024 14:35

llvmbot added the llvm:transforms label Apr 23, 2024

sdesmalen-arm reviewed May 1, 2024

View reviewed changes

paulwalker-arm reviewed May 3, 2024

View reviewed changes

Don't assert for scalable offsets for other targets

5228db4

paulwalker-arm approved these changes May 8, 2024

View reviewed changes

Remove unnecessary friend class declaration

10a454a

huntergr-arm merged commit 2e8d815 into llvm:main May 10, 2024
3 of 4 checks passed

huntergr-arm deleted the scalable-offset-getScalingFactorCost branch May 10, 2024 10:22

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[TTI] Support scalable offsets in getScalingFactorCost #88113

[TTI] Support scalable offsets in getScalingFactorCost #88113

huntergr-arm commented Apr 9, 2024

llvmbot commented Apr 9, 2024 •

edited

Loading

paulwalker-arm commented Apr 18, 2024

huntergr-arm commented Apr 23, 2024

sdesmalen-arm Apr 24, 2024

paulwalker-arm May 1, 2024

huntergr-arm May 1, 2024

davemgreen commented May 1, 2024

paulwalker-arm commented May 1, 2024

paulwalker-arm May 3, 2024 •

edited

Loading

paulwalker-arm May 3, 2024

paulwalker-arm left a comment

paulwalker-arm May 8, 2024

huntergr-arm May 8, 2024

[TTI] Support scalable offsets in getScalingFactorCost #88113

[TTI] Support scalable offsets in getScalingFactorCost #88113

Conversation

huntergr-arm commented Apr 9, 2024

llvmbot commented Apr 9, 2024 • edited Loading

paulwalker-arm commented Apr 18, 2024

huntergr-arm commented Apr 23, 2024

sdesmalen-arm Apr 24, 2024

Choose a reason for hiding this comment

paulwalker-arm May 1, 2024

Choose a reason for hiding this comment

huntergr-arm May 1, 2024

Choose a reason for hiding this comment

davemgreen commented May 1, 2024

paulwalker-arm commented May 1, 2024

paulwalker-arm May 3, 2024 • edited Loading

Choose a reason for hiding this comment

paulwalker-arm May 3, 2024

Choose a reason for hiding this comment

paulwalker-arm left a comment

Choose a reason for hiding this comment

paulwalker-arm May 8, 2024

Choose a reason for hiding this comment

huntergr-arm May 8, 2024

Choose a reason for hiding this comment

llvmbot commented Apr 9, 2024 •

edited

Loading

paulwalker-arm May 3, 2024 •

edited

Loading