-
Notifications
You must be signed in to change notification settings - Fork 10.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BasicAA] Add Vscale GEP decomposition on variable index #69152
base: main
Are you sure you want to change the base?
[BasicAA] Add Vscale GEP decomposition on variable index #69152
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-llvm-analysis Author: Harvin Iriawan (harviniriawan) ChangesFurther work on top of #65759 (the first 2 commits of this pull request, still pending review).
Patch is 45.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/69152.diff 10 Files Affected:
diff --git a/llvm/include/llvm/Analysis/MemoryLocation.h b/llvm/include/llvm/Analysis/MemoryLocation.h
index 85ca84e68a13971..ef87412572145fa 100644
--- a/llvm/include/llvm/Analysis/MemoryLocation.h
+++ b/llvm/include/llvm/Analysis/MemoryLocation.h
@@ -64,16 +64,19 @@ class Value;
//
// If asked to represent a pathologically large value, this will degrade to
// std::nullopt.
+// Store Scalable information in bit 62 of Value. Scalable information is
+// required to do Alias Analysis on Scalable quantities
class LocationSize {
enum : uint64_t {
BeforeOrAfterPointer = ~uint64_t(0),
- AfterPointer = BeforeOrAfterPointer - 1,
- MapEmpty = BeforeOrAfterPointer - 2,
- MapTombstone = BeforeOrAfterPointer - 3,
+ ScalableBit = uint64_t(1) << 62,
+ AfterPointer = (BeforeOrAfterPointer - 1) & ~ScalableBit,
+ MapEmpty = (BeforeOrAfterPointer - 2) & ~ScalableBit,
+ MapTombstone = (BeforeOrAfterPointer - 3) & ~ScalableBit,
ImpreciseBit = uint64_t(1) << 63,
// The maximum value we can represent without falling back to 'unknown'.
- MaxValue = (MapTombstone - 1) & ~ImpreciseBit,
+ MaxValue = (MapTombstone - 1) & ~(ImpreciseBit | ScalableBit),
};
uint64_t Value;
@@ -88,6 +91,7 @@ class LocationSize {
"AfterPointer is imprecise by definition.");
static_assert(BeforeOrAfterPointer & ImpreciseBit,
"BeforeOrAfterPointer is imprecise by definition.");
+ static_assert(~(MaxValue & ScalableBit), "Max value don't have bit 62 set");
public:
// FIXME: Migrate all users to construct via either `precise` or `upperBound`,
@@ -98,12 +102,17 @@ class LocationSize {
// this assumes the provided value is precise.
constexpr LocationSize(uint64_t Raw)
: Value(Raw > MaxValue ? AfterPointer : Raw) {}
-
- static LocationSize precise(uint64_t Value) { return LocationSize(Value); }
+ constexpr LocationSize(uint64_t Raw, bool Scalable)
+ : Value(Raw > MaxValue ? AfterPointer
+ : Raw | (Scalable ? ScalableBit : uint64_t(0))) {}
+
+ // Make construction of LocationSize that takes in uint64_t to set Scalable
+ // information as false
+ static LocationSize precise(uint64_t Value) {
+ return LocationSize(Value, false /*Scalable*/);
+ }
static LocationSize precise(TypeSize Value) {
- if (Value.isScalable())
- return afterPointer();
- return precise(Value.getFixedValue());
+ return LocationSize(Value.getKnownMinValue(), Value.isScalable());
}
static LocationSize upperBound(uint64_t Value) {
@@ -150,6 +159,8 @@ class LocationSize {
return beforeOrAfterPointer();
if (Value == AfterPointer || Other.Value == AfterPointer)
return afterPointer();
+ if (isScalable() || Other.isScalable())
+ return afterPointer();
return upperBound(std::max(getValue(), Other.getValue()));
}
@@ -157,9 +168,20 @@ class LocationSize {
bool hasValue() const {
return Value != AfterPointer && Value != BeforeOrAfterPointer;
}
- uint64_t getValue() const {
+ bool isScalable() const { return (Value & ScalableBit); }
+
+ TypeSize getValue() const {
assert(hasValue() && "Getting value from an unknown LocationSize!");
- return Value & ~ImpreciseBit;
+ assert((Value & ~(ImpreciseBit | ScalableBit)) < MaxValue &&
+ "Scalable bit of value should be masked");
+ return {Value & ~(ImpreciseBit | ScalableBit), isScalable()};
+ }
+
+ uint64_t getUintValue() const {
+ assert(hasValue() && "Getting value from an unknown LocationSize!");
+ assert((Value & ~(ImpreciseBit | ScalableBit)) < MaxValue &&
+ "Scalable bit of value should be masked");
+ return Value & ~(ImpreciseBit | ScalableBit);
}
// Returns whether or not this value is precise. Note that if a value is
@@ -169,7 +191,9 @@ class LocationSize {
}
// Convenience method to check if this LocationSize's value is 0.
- bool isZero() const { return hasValue() && getValue() == 0; }
+ bool isZero() const {
+ return hasValue() && getValue().getKnownMinValue() == 0;
+ }
/// Whether accesses before the base pointer are possible.
bool mayBeBeforePointer() const { return Value == BeforeOrAfterPointer; }
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index c162b8f6edc1905..951716cfe99131a 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -44,6 +44,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -63,6 +64,7 @@
#define DEBUG_TYPE "basicaa"
using namespace llvm;
+using namespace llvm::PatternMatch;
/// Enable analysis of recursive PHI nodes.
static cl::opt<bool> EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden,
@@ -101,22 +103,23 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
//===----------------------------------------------------------------------===//
/// Returns the size of the object specified by V or UnknownSize if unknown.
-static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
- const TargetLibraryInfo &TLI,
- bool NullIsValidLoc,
- bool RoundToAlign = false) {
+/// getObjectSize does not support scalable Value
+static LocationSize getObjectSize(const Value *V, const DataLayout &DL,
+ const TargetLibraryInfo &TLI,
+ bool NullIsValidLoc,
+ bool RoundToAlign = false) {
uint64_t Size;
ObjectSizeOpts Opts;
Opts.RoundToAlign = RoundToAlign;
Opts.NullIsUnknownSize = NullIsValidLoc;
if (getObjectSize(V, Size, DL, &TLI, Opts))
- return Size;
- return MemoryLocation::UnknownSize;
+ return LocationSize(Size);
+ return LocationSize(MemoryLocation::UnknownSize);
}
/// Returns true if we can prove that the object specified by V is smaller than
/// Size.
-static bool isObjectSmallerThan(const Value *V, uint64_t Size,
+static bool isObjectSmallerThan(const Value *V, LocationSize Size,
const DataLayout &DL,
const TargetLibraryInfo &TLI,
bool NullIsValidLoc) {
@@ -151,19 +154,21 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
// This function needs to use the aligned object size because we allow
// reads a bit past the end given sufficient alignment.
- uint64_t ObjectSize = getObjectSize(V, DL, TLI, NullIsValidLoc,
- /*RoundToAlign*/ true);
+ LocationSize ObjectSize = getObjectSize(V, DL, TLI, NullIsValidLoc,
+ /*RoundToAlign*/ true);
- return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size;
+ // Bail on comparing V and Size if Size is scalable
+ return ObjectSize != MemoryLocation::UnknownSize && !Size.isScalable() &&
+ ObjectSize.getValue() < Size.getValue();
}
/// Return the minimal extent from \p V to the end of the underlying object,
/// assuming the result is used in an aliasing query. E.g., we do use the query
/// location size and the fact that null pointers cannot alias here.
-static uint64_t getMinimalExtentFrom(const Value &V,
- const LocationSize &LocSize,
- const DataLayout &DL,
- bool NullIsValidLoc) {
+static LocationSize getMinimalExtentFrom(const Value &V,
+ const LocationSize &LocSize,
+ const DataLayout &DL,
+ bool NullIsValidLoc) {
// If we have dereferenceability information we know a lower bound for the
// extent as accesses for a lower offset would be valid. We need to exclude
// the "or null" part if null is a valid pointer. We can ignore frees, as an
@@ -175,15 +180,16 @@ static uint64_t getMinimalExtentFrom(const Value &V,
// If queried with a precise location size, we assume that location size to be
// accessed, thus valid.
if (LocSize.isPrecise())
- DerefBytes = std::max(DerefBytes, LocSize.getValue());
- return DerefBytes;
+ DerefBytes = std::max(DerefBytes, LocSize.getValue().getKnownMinValue());
+ return LocationSize(DerefBytes, LocSize.isScalable());
}
/// Returns true if we can prove that the object specified by V has size Size.
-static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
+static bool isObjectSize(const Value *V, TypeSize Size, const DataLayout &DL,
const TargetLibraryInfo &TLI, bool NullIsValidLoc) {
- uint64_t ObjectSize = getObjectSize(V, DL, TLI, NullIsValidLoc);
- return ObjectSize != MemoryLocation::UnknownSize && ObjectSize == Size;
+ LocationSize ObjectSize = getObjectSize(V, DL, TLI, NullIsValidLoc);
+ return ObjectSize != MemoryLocation::UnknownSize &&
+ ObjectSize.getValue() == Size;
}
//===----------------------------------------------------------------------===//
@@ -342,13 +348,20 @@ struct LinearExpression {
/// Analyzes the specified value as a linear expression: "A*V + B", where A and
/// B are constant integers.
-static LinearExpression GetLinearExpression(
- const CastedValue &Val, const DataLayout &DL, unsigned Depth,
- AssumptionCache *AC, DominatorTree *DT) {
+static LinearExpression GetLinearExpression(const CastedValue &Val,
+ const DataLayout &DL,
+ unsigned Depth, AssumptionCache *AC,
+ DominatorTree *DT) {
// Limit our recursion depth.
if (Depth == 6)
return Val;
+ // If llvm.vscale is matched, set linear expression with scale 1 and offset 0
+ if (match(Val.V, m_VScale())) {
+ return LinearExpression(Val, APInt(Val.getBitWidth(), 1),
+ APInt(Val.getBitWidth(), 0), true);
+ }
+
if (const ConstantInt *Const = dyn_cast<ConstantInt>(Val.V))
return LinearExpression(Val, APInt(Val.getBitWidth(), 0),
Val.evaluateWith(Const->getValue()), true);
@@ -455,6 +468,12 @@ struct VariableGEPIndex {
CastedValue Val;
APInt Scale;
+ // A value representing vscale quantity in a GEP expression
+ bool IsVscale;
+ // A flag indicating that the IsVscale variable GEP holds more than one Val
+ // (e.g. Vscale^2 or Vscale * X)
+ bool InvalidVarVscale;
+
// Context instruction to use when querying information about this index.
const Instruction *CxtI;
@@ -477,13 +496,10 @@ struct VariableGEPIndex {
dbgs() << "\n";
}
void print(raw_ostream &OS) const {
- OS << "(V=" << Val.V->getName()
- << ", zextbits=" << Val.ZExtBits
- << ", sextbits=" << Val.SExtBits
- << ", truncbits=" << Val.TruncBits
- << ", scale=" << Scale
- << ", nsw=" << IsNSW
- << ", negated=" << IsNegated << ")";
+ OS << "(V=" << Val.V->getName() << " IsVscale=" << IsVscale
+ << ", zextbits=" << Val.ZExtBits << ", sextbits=" << Val.SExtBits
+ << ", truncbits=" << Val.TruncBits << ", scale=" << Scale
+ << ", nsw=" << IsNSW << ", negated=" << IsNegated << ")";
}
};
}
@@ -604,6 +620,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end();
I != E; ++I, ++GTI) {
const Value *Index = *I;
+ const bool ScalableGEP = isa<ScalableVectorType>(GTI.getIndexedType());
// Compute the (potentially symbolic) offset in bytes for this index.
if (StructType *STy = GTI.getStructTypeOrNull()) {
// For a struct, add the member offset.
@@ -615,27 +632,17 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
continue;
}
+ TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
// For an array/pointer, add the element offset, explicitly scaled.
+ // Skip adding to constant offset if GEP index is marked as scalable
if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
if (CIdx->isZero())
continue;
-
- // Don't attempt to analyze GEPs if the scalable index is not zero.
- TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
- if (AllocTypeSize.isScalable()) {
- Decomposed.Base = V;
- return Decomposed;
+ if (!ScalableGEP) {
+ Decomposed.Offset += AllocTypeSize.getFixedValue() *
+ CIdx->getValue().sextOrTrunc(MaxIndexSize);
+ continue;
}
-
- Decomposed.Offset += AllocTypeSize.getFixedValue() *
- CIdx->getValue().sextOrTrunc(MaxIndexSize);
- continue;
- }
-
- TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
- if (AllocTypeSize.isScalable()) {
- Decomposed.Base = V;
- return Decomposed;
}
GepHasConstantOffset = false;
@@ -645,22 +652,50 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
unsigned Width = Index->getType()->getIntegerBitWidth();
unsigned SExtBits = IndexSize > Width ? IndexSize - Width : 0;
unsigned TruncBits = IndexSize < Width ? Width - IndexSize : 0;
- LinearExpression LE = GetLinearExpression(
- CastedValue(Index, 0, SExtBits, TruncBits), DL, 0, AC, DT);
+ // Scalable GEP decomposition
+ // Allow Scalable GEP to be decomposed in the case of
+ // 1. getelementptr <4 x vscale x i32> with 1st index as a constant
+ // 2. Index which have a leaf of @llvm.vscale
+ // In both cases, essentially CastedValue of VariableGEPIndex is Vscale,
+ // however in the 1st case, CastedValue is of type constant, hence another
+ // flag in VariableGEPIndex is created in this case, IsVscale If GEP is
+ // Scalable type, e.g. <4 x vscale x i32>, the first index will have
+ // vscale as a variable index, create a LE in this case
+ LinearExpression LE(CastedValue(Index, 0, SExtBits, TruncBits));
+ if (ScalableGEP) {
+ if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
+ LE = LinearExpression(
+ CastedValue(Index, 0, SExtBits, TruncBits),
+ CastedValue(Index, 0, SExtBits, TruncBits)
+ .evaluateWith(CIdx->getValue()),
+ APInt(CastedValue(Index, 0, SExtBits, TruncBits).getBitWidth(),
+ 0),
+ true);
+ assert(LE.Offset.isZero() &&
+ "Scalable GEP, the offset of LE should be 0");
+ }
+ } else
+ LE = GetLinearExpression(CastedValue(Index, 0, SExtBits, TruncBits), DL,
+ 0, AC, DT);
// Scale by the type size.
- unsigned TypeSize = AllocTypeSize.getFixedValue();
+ unsigned TypeSize = AllocTypeSize.getKnownMinValue();
LE = LE.mul(APInt(IndexSize, TypeSize), GEPOp->isInBounds());
Decomposed.Offset += LE.Offset.sext(MaxIndexSize);
APInt Scale = LE.Scale.sext(MaxIndexSize);
+ bool LEhasVscale = match(LE.Val.V, m_VScale());
// If we already had an occurrence of this index variable, merge this
// scale into it. For example, we want to handle:
// A[x][x] -> x*16 + x*4 -> x*20
// This also ensures that 'x' only appears in the index list once.
+ // Only add to IsVscale VariableGEPIndex if it's @llvm.vscale or gep
+ // vscale index
for (unsigned i = 0, e = Decomposed.VarIndices.size(); i != e; ++i) {
- if (Decomposed.VarIndices[i].Val.V == LE.Val.V &&
- Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val)) {
+ if (Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val) &&
+ ((Decomposed.VarIndices[i].IsVscale &&
+ (ScalableGEP || LEhasVscale)) ||
+ Decomposed.VarIndices[i].Val.V == LE.Val.V)) {
Scale += Decomposed.VarIndices[i].Scale;
Decomposed.VarIndices.erase(Decomposed.VarIndices.begin() + i);
break;
@@ -669,10 +704,19 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
// Make sure that we have a scale that makes sense for this target's
// index size.
+ // Only allow variableGEP decomposition for constants, in the case of
+ // vscale
Scale = adjustToIndexSize(Scale, IndexSize);
+ bool InvalidVarVscale = (ScalableGEP && LEhasVscale) ||
+ (ScalableGEP && !isa<ConstantInt>(LE.Val.V));
if (!!Scale) {
- VariableGEPIndex Entry = {LE.Val, Scale, CxtI, LE.IsNSW,
+ VariableGEPIndex Entry = {LE.Val,
+ Scale,
+ ScalableGEP || LEhasVscale,
+ InvalidVarVscale,
+ CxtI,
+ LE.IsNSW,
/* IsNegated */ false};
Decomposed.VarIndices.push_back(Entry);
}
@@ -1049,6 +1093,19 @@ AliasResult BasicAAResult::aliasGEP(
if (DecompGEP1.Base == GEP1 && DecompGEP2.Base == V2)
return AliasResult::MayAlias;
+ // If Vscale Variable index has another variable in V, return mayAlias
+ for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) {
+ if (DecompGEP1.VarIndices[i].IsVscale &&
+ DecompGEP1.VarIndices[i].InvalidVarVscale)
+ return AliasResult::MayAlias;
+ }
+
+ for (unsigned i = 0, e = DecompGEP2.VarIndices.size(); i != e; ++i) {
+ if (DecompGEP2.VarIndices[i].IsVscale &&
+ DecompGEP2.VarIndices[i].InvalidVarVscale)
+ return AliasResult::MayAlias;
+ }
+
// Subtract the GEP2 pointer from the GEP1 pointer to find out their
// symbolic difference.
subtractDecomposedGEPs(DecompGEP1, DecompGEP2, AAQI);
@@ -1056,14 +1113,14 @@ AliasResult BasicAAResult::aliasGEP(
// If an inbounds GEP would have to start from an out of bounds address
// for the two to alias, then we can assume noalias.
if (*DecompGEP1.InBounds && DecompGEP1.VarIndices.empty() &&
- V2Size.hasValue() && DecompGEP1.Offset.sge(V2Size.getValue()) &&
+ V2Size.hasValue() && DecompGEP1.Offset.sge(V2Size.getUintValue()) &&
isBaseOfObject(DecompGEP2.Base))
return AliasResult::NoAlias;
if (isa<GEPOperator>(V2)) {
// Symmetric case to above.
if (*DecompGEP2.InBounds && DecompGEP1.VarIndices.empty() &&
- V1Size.hasValue() && DecompGEP1.Offset.sle(-V1Size.getValue()) &&
+ V1Size.hasValue() && DecompGEP1.Offset.sle(-V1Size.getUintValue()) &&
isBaseOfObject(DecompGEP1.Base))
return AliasResult::NoAlias;
}
@@ -1116,13 +1173,13 @@ AliasResult BasicAAResult::aliasGEP(
if (!VLeftSize.hasValue())
return AliasResult::MayAlias;
- const uint64_t LSize = VLeftSize.getValue();
- if (Off.ult(LSize)) {
+ const uint64_t LSize = VLeftSize.getUintValue();
+ if (Off.ult(LSize) && !VLeftSize.isScalable()) {
// Conservatively drop processing if a phi was visited and/or offset is
// too big.
AliasResult AR = AliasResult::PartialAlias;
if (VRightSize.hasValue() && Off.ule(INT32_MAX) &&
- (Off + VRightSize.getValue()).ule(LSize)) {
+ (Off + VRightSize.getUintValue()).ule(LSize)) {
// Memory referenced by right pointer is nested. Save the offset in
// cache. Note that originally offset estimated as GEP1-V2, but
// AliasResult contains the shift that represents GEP1+Offset=V2.
@@ -1131,7 +1188,8 @@ AliasResult BasicAAResult::aliasGEP(
}
return AR;
}
- return AliasResult::NoAlias;
+ if (!VLeftSize.isScalable())
+ return AliasResult::NoAlias;
}
// We need to know both acess sizes for all the following heuristics.
@@ -1144,6 +1202,11 @@ AliasResult BasicAAResult::aliasGEP(
const VariableGEPIndex &Index = DecompGEP1.VarIndices[i];
const APInt &Scale = Index.Scale;
APInt ScaleForGCD = Scale;
+ assert(
+ Index.I...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
b649f59
to
75ca459
Compare
bc476c1
to
d14d014
Compare
78d5951
to
d3fb8a0
Compare
Please remove all the unrelated changes (anything outside BasicAA) from this PR. |
d3fb8a0
to
d14d014
Compare
d14d014
to
efa06e9
Compare
^ That was just a rebase |
efa06e9
to
a3ee4cc
Compare
I've rebased this again and gone through adding some extra tests and done some extra cleanup and adjustments. Hopefully it looks OK and I haven't made it wrong. I've added some other reviewers to see if they can help check it over. |
Ping. Thanks |
a3ee4cc
to
8af8a42
Compare
Rebase and ping. Thanks. |
if (match(Val.V, m_VScale())) { | ||
return LinearExpression(Val, APInt(Val.getBitWidth(), 1), | ||
APInt(Val.getBitWidth(), 0), true); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand the purpose of this check. Doesn't this do the same as the default case, at least as implemented?
// however in the 1st case, CastedValue is of type constant, hence another | ||
// flag in VariableGEPIndex is created in this case, IsVScale If GEP is | ||
// Scalable type, e.g. <4 x vscale x i32>, the first index will have | ||
// vscale as a variable index, create a LE in this case. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So it sounds like you are interested in modelling vscale * scale
offsets, but not vscale * scale * var
. If IsVScale is set, then the variable is just ignored, is that correct?
If so, then an alternative way to model this is to use some kind of sentinel value (say reinterpret_cast<Value*>(-1)
) as the variable, which will get interpreted as vscale. Then, we should be able to reuse the existing code, with some adjustments to e.g. make the range calculation for the value use the vscale range, etc.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, looking a bit further I guess you kind of already do this, just with checks of the IsVScale flag. I think I'd still prefer a sentinel value, to make sure we "crash and burn" if we fail to handle it somewhere, instead of silently miscompiling. This should also allow you to do something actually useful for the vscale intrinsinc handling in LinearExpression, as we would ensure that the same value gets used for vscale x
and the intrinsic.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that sounds like a good idea, but it is currently using the value for the bitwidth so may need some other way to represent that.
Index.IsVScale | ||
? ConstantRange::getNonEmpty( | ||
APInt(OffsetRange.getBitWidth(), 1), | ||
APInt::getMaxValue(OffsetRange.getBitWidth())) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can use getVScaleRange() here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We were trying to keep the first version simpler, getting the basics working without vscale range before adding it in the future.
@@ -1165,6 +1204,46 @@ AliasResult BasicAAResult::aliasGEP( | |||
if (!V1Size.hasValue() || !V2Size.hasValue()) | |||
return AliasResult::MayAlias; | |||
|
|||
// VScale Alias Analysis |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please drop this code from the initial patch -- let's make sure everything works with vscale in the decomposition first, before we extend to also handling scalable location size.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was hoping that more of the existing code could handle vscales already if they were treated like variables, but I think there might need to be something that know that vscale in the offset is the same as the vscale in the typesize.
// however in the 1st case, CastedValue is of type constant, hence another | ||
// flag in VariableGEPIndex is created in this case, IsVScale If GEP is | ||
// Scalable type, e.g. <4 x vscale x i32>, the first index will have | ||
// vscale as a variable index, create a LE in this case. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, looking a bit further I guess you kind of already do this, just with checks of the IsVScale flag. I think I'd still prefer a sentinel value, to make sure we "crash and burn" if we fail to handle it somewhere, instead of silently miscompiling. This should also allow you to do something actually useful for the vscale intrinsinc handling in LinearExpression, as we would ensure that the same value gets used for vscale x
and the intrinsic.
Enable BasicAA to be done on Scalable GEP & LocationSize Scalable GEP expression such as @llvm.vscale and GEP of scalable type are attached to the VariableGEPIndex, with Val representing Vscale. VScale AA works if there's only one variable index (the vscale) and constant offsets in the GEP for now
8af8a42
to
8cbd346
Compare
I've added a VScaleSentinel. It means that the Value cannot be used for the typesize any more though, so it currently treats vscale values as 64bit, possibly truncated to a smaller number of bits through the CastedValue. Let me know if you think a different way would be better. |
8cbd346
to
d741491
Compare
Can we explicitly store the bit width of the original value, similar to how we store the sext/zext/trunc widths? |
d741491
to
d821d47
Compare
If I'm reading this right, the main goal of this is to be able to represent a gep whose index type is a scalable type? If so, I'm tempted to suggest an alternative approach. We seem to generally be moving in the direction of the ptradd (i.e. i8 index types), could we canonicalize a gep <vscale x ...> to a gep i8, ? Doing so would remove the need for a value to represent vscale when we can't just use the intrinsic. This would remove the need for the sentinel value, and would simplify this patch significantly (I think). |
Thanks for the suggestion - that would certainly help simplify things. I was originally a little sceptical that it would not lead to regressions (especially in LSR), but I've been running a few experiments and I think things look like they should be OK. I will see if I can make just i8 + vscale intrinsics work and maybe try and split it up into separate parts where I can. |
A collection of tests from llvm#69152 and for constant offsets with scalable typesizes.
This is a separate, but related issue to llvm#69152 that was attempting to improve AA with scalable dependency distances. This patch attempts to improve when there are scalable accesses with a constant offset between them. We happen to get a report of such a thing recently, where so long as the vscale_range is known, the maximum size of the access can be assessed and better aliasing results can be returned. The Upper range of the vscale_range, along with known part of the typesize are used to prove that Off >= CR.upper * LSize. It does not try to produce PartialAlias results at the moment from the lower vscale_range.
A collection of tests from #69152 and for constant offsets with scalable typesizes.
This is a separate, but related issue to llvm#69152 that was attempting to improve AA with scalable dependency distances. This patch attempts to improve when there are scalable accesses with a constant offset between them. We happen to get a report of such a thing recently, where so long as the vscale_range is known, the maximum size of the access can be assessed and better aliasing results can be returned. The Upper range of the vscale_range, along with known part of the typesize are used to prove that Off >= CR.upper * LSize. It does not try to produce PartialAlias results at the moment from the lower vscale_range.
This is a separate, but related issue to #69152 that was attempting to improve AA with scalable dependency distances. This patch attempts to improve when there are scalable accesses with a constant offset between them. We happen to get a report of such a thing recently, where so long as the vscale_range is known, the maximum size of the access can be assessed and better aliasing results can be returned. The Upper range of the vscale_range, along with known part of the typesize are used to prove that Off >= CR.upper * LSize. It does not try to produce PartialAlias results at the moment from the lower vscale_range. It also enables the added benefit of allowing better alias analysis when the RHS of the two values is scalable, but the LHS is normal and can be treated like any other aliasing query.
A collection of tests from llvm#69152 and for constant offsets with scalable typesizes.
This is a separate, but related issue to llvm#69152 that was attempting to improve AA with scalable dependency distances. This patch attempts to improve when there are scalable accesses with a constant offset between them. We happen to get a report of such a thing recently, where so long as the vscale_range is known, the maximum size of the access can be assessed and better aliasing results can be returned. The Upper range of the vscale_range, along with known part of the typesize are used to prove that Off >= CR.upper * LSize. It does not try to produce PartialAlias results at the moment from the lower vscale_range. It also enables the added benefit of allowing better alias analysis when the RHS of the two values is scalable, but the LHS is normal and can be treated like any other aliasing query.
Further work on top of #69716