-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] ptrmask of gep for dynamic pointer aligment #80002
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Jon Chesterfield (JonChesterfield) ChangesTargets the dynamic realignment pattern of Specifically, when the pointer already has alignment information, dynamically realigning it to less than is already known should be a no-op. Discovered while writing test cases for another patch. For the zero low bits of a known aligned pointer, adding the gep index then removing it with a mask is a no-op. Folding the ptrmask effect entirely into the gep is the ideal result as that unblocks other optimisations that are not aware of ptrmask. In some other cases the gep is known to be dead and is removed without changing the ptrmask. In the least effective case, this transform creates a new gep with a rounded-down index and still leaves the ptrmask unchanged. That simplified gep is still a minor improvement, geps are cheap and ptrmask occurs in address calculation contexts so I don't think it's worth special casing to avoid the extra instruction. Full diff: https://github.com/llvm/llvm-project/pull/80002.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index a647be2d26c7..ed2cc14a23c5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2040,6 +2040,40 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Changed = true;
}
+ // Combine:
+ // (ptrmask (getelementptr i8, ptr p, imm i), imm mask)
+ // -> (ptrmask (getelementptr i8, ptr p, imm (i & mask)), imm mask)
+ // where only the low bits known to be zero in the pointer are changed
+ uint64_t GEPIndex;
+ uint64_t PtrMaskImmediate;
+ if (match(&CI, m_Intrinsic<Intrinsic::ptrmask>(
+ m_GEP(m_Value(InnerPtr), m_ConstantInt(GEPIndex)),
+ m_ConstantInt(PtrMaskImmediate)))) {
+ auto *GEP = cast<GetElementPtrInst>(II->getArgOperand(0));
+
+ if (GEP->getSourceElementType() == Type::getInt8Ty(CI.getContext())) {
+ unsigned Log2Align = llvm::Log2(InnerPtr->getPointerAlignment(DL));
+ uint64_t PointerAlignBits = (uint64_t(1) << Log2Align) - 1;
+
+ uint64_t HighBitsGEPIndex = GEPIndex & ~PointerAlignBits;
+ uint64_t MaskedLowBitsGEPIndex =
+ GEPIndex & PointerAlignBits & PtrMaskImmediate;
+
+ uint64_t MaskedGEPIndex = HighBitsGEPIndex | MaskedLowBitsGEPIndex;
+
+ if (MaskedGEPIndex != GEPIndex) {
+ Type *I64 = Type::getInt64Ty(CI.getContext());
+ auto MaskedGEP =
+ Builder.CreateGEP(GEP->getSourceElementType(), InnerPtr,
+ ConstantInt::get(I64, MaskedGEPIndex),
+ GEP->getName(), GEP->isInBounds());
+
+ replaceOperand(CI, 0, MaskedGEP);
+ Changed = true;
+ }
+ }
+ }
+
// See if we can deduce non-null.
if (!CI.hasRetAttr(Attribute::NonNull) &&
(Known.isNonZero() ||
diff --git a/llvm/test/Transforms/InstCombine/ptrmask.ll b/llvm/test/Transforms/InstCombine/ptrmask.ll
index afeb5d5251d0..77cf1004c717 100644
--- a/llvm/test/Transforms/InstCombine/ptrmask.ll
+++ b/llvm/test/Transforms/InstCombine/ptrmask.ll
@@ -80,12 +80,12 @@ define ptr addrspace(1) @ptrmask_combine_consecutive_preserve_attrs_todo2(ptr ad
define ptr @ptrmask_combine_add_nonnull(ptr %p) {
; CHECK-LABEL: define ptr @ptrmask_combine_add_nonnull
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: [[PM0:%.*]] = call align 64 ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64)
-; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, ptr [[PM0]], i64 33
-; CHECK-NEXT: [[R:%.*]] = call nonnull align 32 ptr @llvm.ptrmask.p0.i64(ptr [[PGEP]], i64 -32)
+; CHECK-NEXT: [[PM0:%.*]] = call align 4 ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -60)
+; CHECK-NEXT: [[PGEP1:%.*]] = getelementptr i8, ptr [[PM0]], i64 32
+; CHECK-NEXT: [[R:%.*]] = call nonnull align 32 ptr @llvm.ptrmask.p0.i64(ptr [[PGEP1]], i64 -32)
; CHECK-NEXT: ret ptr [[R]]
;
- %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -64)
+ %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -60)
%pgep = getelementptr i8, ptr %pm0, i64 33
%r = call ptr @llvm.ptrmask.p0.i64(ptr %pgep, i64 -16)
ret ptr %r
@@ -287,6 +287,90 @@ define ptr addrspace(1) @ptrmask_maintain_provenance_i32(ptr addrspace(1) %p0) {
ret ptr addrspace(1) %r
}
+define ptr @ptrmask_is_nop0(ptr align 8 %p) {
+; CHECK-LABEL: define ptr @ptrmask_is_nop0
+; CHECK-SAME: (ptr align 8 [[P:%.*]]) {
+; CHECK-NEXT: ret ptr [[P]]
+;
+ %pm = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -8)
+ ret ptr %pm
+}
+
+define ptr @ptrmask_is_nop1(ptr align 8 %p) {
+; CHECK-LABEL: define ptr @ptrmask_is_nop1
+; CHECK-SAME: (ptr align 8 [[P:%.*]]) {
+; CHECK-NEXT: ret ptr [[P]]
+;
+ %pm = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -4)
+ ret ptr %pm
+}
+
+define ptr @ptrmask_to_modified_gep0(ptr align 8 %p) {
+; CHECK-LABEL: define ptr @ptrmask_to_modified_gep0
+; CHECK-SAME: (ptr align 8 [[P:%.*]]) {
+; CHECK-NEXT: [[PM:%.*]] = call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -16)
+; CHECK-NEXT: ret ptr [[PM]]
+;
+ %gep = getelementptr i8, ptr %p, i32 5
+ %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -16)
+ ret ptr %pm
+}
+
+define ptr @ptrmask_to_modified_gep1(ptr align 8 %p) {
+; CHECK-LABEL: define ptr @ptrmask_to_modified_gep1
+; CHECK-SAME: (ptr align 8 [[P:%.*]]) {
+; CHECK-NEXT: ret ptr [[P]]
+;
+ %gep = getelementptr i8, ptr %p, i32 6
+ %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -8)
+ ret ptr %pm
+}
+
+define ptr @ptrmask_to_modified_gep2(ptr align 16 %p) {
+; CHECK-LABEL: define ptr @ptrmask_to_modified_gep2
+; CHECK-SAME: (ptr align 16 [[P:%.*]]) {
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 12
+; CHECK-NEXT: ret ptr [[GEP1]]
+;
+ %gep = getelementptr i8, ptr %p, i32 15
+ %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -4)
+ ret ptr %pm
+}
+
+define ptr @ptrmask_to_modified_gep4(ptr align 8 %p) {
+; CHECK-LABEL: define ptr @ptrmask_to_modified_gep4
+; CHECK-SAME: (ptr align 8 [[P:%.*]]) {
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 24
+; CHECK-NEXT: [[PM:%.*]] = call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[GEP1]], i64 -16)
+; CHECK-NEXT: ret ptr [[PM]]
+;
+ %gep = getelementptr i8, ptr %p, i32 29
+ %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -16)
+ ret ptr %pm
+}
+
+define ptr @ptrmask_to_modified_gep5(ptr align 8 %p) {
+; CHECK-LABEL: define ptr @ptrmask_to_modified_gep5
+; CHECK-SAME: (ptr align 8 [[P:%.*]]) {
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 24
+; CHECK-NEXT: ret ptr [[GEP1]]
+;
+ %gep = getelementptr i8, ptr %p, i32 30
+ %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -8)
+ ret ptr %pm
+}
+
+define ptr @ptrmask_to_modified_gep6(ptr align 16 %p) {
+; CHECK-LABEL: define ptr @ptrmask_to_modified_gep6
+; CHECK-SAME: (ptr align 16 [[P:%.*]]) {
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 28
+; CHECK-NEXT: ret ptr [[GEP1]]
+;
+ %gep = getelementptr i8, ptr %p, i32 31
+ %pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -4)
+ ret ptr %pm
+}
+
define ptr @ptrmask_is_useless0(i64 %i, i64 %m) {
; CHECK-LABEL: define ptr @ptrmask_is_useless0
; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) {
|
56a3ced
to
a849004
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this belongs inside demanded bits simplification. We currently don't handle GEPs there because historically that would have been useless, but now that ptrmask gives a pointer demanded bits root, it makes sense to simplify GEPs as well.
Added tests for non-i8 and for vector types (which don't work as implemented, it makes extracting the value messier). Likewise this was only written for constantints as that's the main dynamic-align use case. Moved the code to InstCombineSimplifyDemanded - same as in the original patch modulo variable names, tests working as before. |
2da26d9
to
de8a821
Compare
Looks fine in general. Does anyone else have any concerns? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ideally this would not be inside the ptrmask
handling, but rather a separate GetElementPtr
case in the switch. The simplification would then be based on the DemandedMask
. But I guess we can start like this.
auto *GEP = cast<GetElementPtrInst>(II->getArgOperand(0)); | ||
|
||
if (GEP->getSourceElementType() == Type::getInt8Ty(I->getContext())) { | ||
unsigned Log2Align = llvm::Log2(InnerPtr->getPointerAlignment(DL)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of getPointerAlignment(), this should make use of LHSKnown
. Please also add a test where the GEP operand isn't literally a function argument but say an alignment-preserving GEP of an argument.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This confused me - LHSKnown refers to the first argument to ptrmask, but it's the argument to the gep that we want. Current patch avoids overwriting the LHSKnown variable, though it looks like that's a mutable local which doesn't particularly need to correspond to I->getOperand(0)
|
||
if (MaskedGEPIndex != GEPIndex) { | ||
Type *I64 = Type::getInt64Ty(I->getContext()); | ||
auto MaskedGEP = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto MaskedGEP = | |
Value *MaskedGEP = |
I believe you also need to set the Builder insertion point here, otherwise the GEP may be created in the wrong place.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice catch on builder, yes
auto MaskedGEP = | ||
Builder.CreateGEP(GEP->getSourceElementType(), InnerPtr, | ||
ConstantInt::get(I64, MaskedGEPIndex), | ||
GEP->getName(), GEP->isInBounds()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Test for inbounds
preservation is missing.
Not forgotten, merely sidetracked. LHSKnown instead of alignment works as expected, need to add the tests and update the PR. |
5d019d9
to
3fc2507
Compare
Now uses computeknownbits on the gep pointer argument instead of checking alignment, added the test cases and checked previous ones are unchanged. As written it could be extended - non-i8 gep types, vector types. Strictly speaking it's not only adding a constant to zero low bits that can be removed either - various combinations of known bits on the input and a constant gep can be combined with the subsequent mask and simplified. Since gep of i8 is essentially add with an implicit ptrtoint. I think this is useful as a standalone improvement without that additional complexity. |
llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Outdated
Show resolved
Hide resolved
llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Outdated
Show resolved
Hide resolved
1a90b5a
to
827431f
Compare
const unsigned trailingZeros = | ||
computeKnownBits(InnerPtr, Depth + 1, I).countMinTrailingZeros(); | ||
|
||
if (trailingZeros < 64) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should 64 -> be DL.getPointerTypeSizeInBits(I->getType())
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Or Q.DL.getIndexTypeSizeInBits
actually.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"Resolve" does not apply a comment, merely deletes the contents of the text box and then hides it.
The 64 magic number here was avoiding shifting a uin64_t by 64 (because C++), replaced with an explicit zero test. Added a test, the zero case is indeed handled correctly elsewhere in instcombine.
uint64_t MaskedGEPIndex = HighBitsGEPIndex | MaskedLowBitsGEPIndex; | ||
|
||
if (MaskedGEPIndex != GEPIndex) { | ||
auto *GEP = cast<GetElementPtrInst>(II->getArgOperand(0)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@nikic, is this safe given that the match is m_PtrAdd
? Is there a more generic way to test if a m_PtrAdd
match has inbounds
? Or does it req a GEP
check?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need the GEP for other things, perhaps m_PtrAdd could bind the GEP it self to a variable? Unsure
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You could make it dyn_cast
and just fail on nullptr to be safe. But think this should be fine
|
||
if (MaskedGEPIndex != GEPIndex) { | ||
auto *GEP = cast<GetElementPtrInst>(II->getArgOperand(0)); | ||
Type *I64 = Type::getInt64Ty(I->getContext()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Think this should be using Q.DL.getIndexTypeSizeInBits
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, thanks. Gone with the slightly clumsy DL.getIndexType(GEP->getPointerOperand()->getType())
827431f
to
50eb883
Compare
Anything further on this one? |
50eb883
to
8b2b14f
Compare
Targets the dynamic realignment pattern of `(Ptr + Align - 1) & -Align;` as implemented by gep then ptrmask. Specifically, when the pointer already has alignment information, dynamically realigning it to less than is already known should be a no-op. Discovered while writing test cases for another patch. For the zero low bits of a known aligned pointer, adding the gep index then removing it with a mask is a no-op. Folding the ptrmask effect entirely into the gep is the ideal result as that unblocks other optimisations that are not aware of ptrmask. In some other cases the gep is known to be dead and is removed without changing the ptrmask. In the least effective case, this transform creates a new gep with a rounded-down index and still leaves the ptrmask unchanged. That simplified gep is still a minor improvement, geps are cheap and ptrmask occurs in address calculation contexts so I don't think it's worth special casing to avoid the extra instruction.
Targets the dynamic realignment pattern of `(Ptr + Align - 1) & -Align;` as implemented by gep then ptrmask. Specifically, when the pointer already has alignment information, dynamically realigning it to less than is already known should be a no-op. Discovered while writing test cases for another patch. For the zero low bits of a known aligned pointer, adding the gep index then removing it with a mask is a no-op. Folding the ptrmask effect entirely into the gep is the ideal result as that unblocks other optimisations that are not aware of ptrmask. In some other cases the gep is known to be dead and is removed without changing the ptrmask. In the least effective case, this transform creates a new gep with a rounded-down index and still leaves the ptrmask unchanged. That simplified gep is still a minor improvement, geps are cheap and ptrmask occurs in address calculation contexts so I don't think it's worth special casing to avoid the extra instruction.
Targets the dynamic realignment pattern of `(Ptr + Align - 1) & -Align;` as implemented by gep then ptrmask. Specifically, when the pointer already has alignment information, dynamically realigning it to less than is already known should be a no-op. Discovered while writing test cases for another patch. For the zero low bits of a known aligned pointer, adding the gep index then removing it with a mask is a no-op. Folding the ptrmask effect entirely into the gep is the ideal result as that unblocks other optimisations that are not aware of ptrmask. In some other cases the gep is known to be dead and is removed without changing the ptrmask. In the least effective case, this transform creates a new gep with a rounded-down index and still leaves the ptrmask unchanged. That simplified gep is still a minor improvement, geps are cheap and ptrmask occurs in address calculation contexts so I don't think it's worth special casing to avoid the extra instruction.
Targets the dynamic realignment pattern of `(Ptr + Align - 1) & -Align;` as implemented by gep then ptrmask. Specifically, when the pointer already has alignment information, dynamically realigning it to less than is already known should be a no-op. Discovered while writing test cases for another patch. For the zero low bits of a known aligned pointer, adding the gep index then removing it with a mask is a no-op. Folding the ptrmask effect entirely into the gep is the ideal result as that unblocks other optimisations that are not aware of ptrmask. In some other cases the gep is known to be dead and is removed without changing the ptrmask. In the least effective case, this transform creates a new gep with a rounded-down index and still leaves the ptrmask unchanged. That simplified gep is still a minor improvement, geps are cheap and ptrmask occurs in address calculation contexts so I don't think it's worth special casing to avoid the extra instruction.
Targets the dynamic realignment pattern of `(Ptr + Align - 1) & -Align;` as implemented by gep then ptrmask. Specifically, when the pointer already has alignment information, dynamically realigning it to less than is already known should be a no-op. Discovered while writing test cases for another patch. For the zero low bits of a known aligned pointer, adding the gep index then removing it with a mask is a no-op. Folding the ptrmask effect entirely into the gep is the ideal result as that unblocks other optimisations that are not aware of ptrmask. In some other cases the gep is known to be dead and is removed without changing the ptrmask. In the least effective case, this transform creates a new gep with a rounded-down index and still leaves the ptrmask unchanged. That simplified gep is still a minor improvement, geps are cheap and ptrmask occurs in address calculation contexts so I don't think it's worth special casing to avoid the extra instruction.
Targets the dynamic realignment pattern of `(Ptr + Align - 1) & -Align;` as implemented by gep then ptrmask. Specifically, when the pointer already has alignment information, dynamically realigning it to less than is already known should be a no-op. Discovered while writing test cases for another patch. For the zero low bits of a known aligned pointer, adding the gep index then removing it with a mask is a no-op. Folding the ptrmask effect entirely into the gep is the ideal result as that unblocks other optimisations that are not aware of ptrmask. In some other cases the gep is known to be dead and is removed without changing the ptrmask. In the least effective case, this transform creates a new gep with a rounded-down index and still leaves the ptrmask unchanged. That simplified gep is still a minor improvement, geps are cheap and ptrmask occurs in address calculation contexts so I don't think it's worth special casing to avoid the extra instruction.
Targets the dynamic realignment pattern of `(Ptr + Align - 1) & -Align;` as implemented by gep then ptrmask. Specifically, when the pointer already has alignment information, dynamically realigning it to less than is already known should be a no-op. Discovered while writing test cases for another patch. For the zero low bits of a known aligned pointer, adding the gep index then removing it with a mask is a no-op. Folding the ptrmask effect entirely into the gep is the ideal result as that unblocks other optimisations that are not aware of ptrmask. In some other cases the gep is known to be dead and is removed without changing the ptrmask. In the least effective case, this transform creates a new gep with a rounded-down index and still leaves the ptrmask unchanged. That simplified gep is still a minor improvement, geps are cheap and ptrmask occurs in address calculation contexts so I don't think it's worth special casing to avoid the extra instruction.
Targets the dynamic realignment pattern of `(Ptr + Align - 1) & -Align;` as implemented by gep then ptrmask. Specifically, when the pointer already has alignment information, dynamically realigning it to less than is already known should be a no-op. Discovered while writing test cases for another patch. For the zero low bits of a known aligned pointer, adding the gep index then removing it with a mask is a no-op. Folding the ptrmask effect entirely into the gep is the ideal result as that unblocks other optimisations that are not aware of ptrmask. In some other cases the gep is known to be dead and is removed without changing the ptrmask. In the least effective case, this transform creates a new gep with a rounded-down index and still leaves the ptrmask unchanged. That simplified gep is still a minor improvement, geps are cheap and ptrmask occurs in address calculation contexts so I don't think it's worth special casing to avoid the extra instruction.
Targets the dynamic realignment pattern of `(Ptr + Align - 1) & -Align;` as implemented by gep then ptrmask. Specifically, when the pointer already has alignment information, dynamically realigning it to less than is already known should be a no-op. Discovered while writing test cases for another patch. For the zero low bits of a known aligned pointer, adding the gep index then removing it with a mask is a no-op. Folding the ptrmask effect entirely into the gep is the ideal result as that unblocks other optimisations that are not aware of ptrmask. In some other cases the gep is known to be dead and is removed without changing the ptrmask. In the least effective case, this transform creates a new gep with a rounded-down index and still leaves the ptrmask unchanged. That simplified gep is still a minor improvement, geps are cheap and ptrmask occurs in address calculation contexts so I don't think it's worth special casing to avoid the extra instruction.
Targets the dynamic realignment pattern of `(Ptr + Align - 1) & -Align;` as implemented by gep then ptrmask. Specifically, when the pointer already has alignment information, dynamically realigning it to less than is already known should be a no-op. Discovered while writing test cases for another patch. For the zero low bits of a known aligned pointer, adding the gep index then removing it with a mask is a no-op. Folding the ptrmask effect entirely into the gep is the ideal result as that unblocks other optimisations that are not aware of ptrmask. In some other cases the gep is known to be dead and is removed without changing the ptrmask. In the least effective case, this transform creates a new gep with a rounded-down index and still leaves the ptrmask unchanged. That simplified gep is still a minor improvement, geps are cheap and ptrmask occurs in address calculation contexts so I don't think it's worth special casing to avoid the extra instruction.
Targets the dynamic realignment pattern of `(Ptr + Align - 1) & -Align;` as implemented by gep then ptrmask. Specifically, when the pointer already has alignment information, dynamically realigning it to less than is already known should be a no-op. Discovered while writing test cases for another patch. For the zero low bits of a known aligned pointer, adding the gep index then removing it with a mask is a no-op. Folding the ptrmask effect entirely into the gep is the ideal result as that unblocks other optimisations that are not aware of ptrmask. In some other cases the gep is known to be dead and is removed without changing the ptrmask. In the least effective case, this transform creates a new gep with a rounded-down index and still leaves the ptrmask unchanged. That simplified gep is still a minor improvement, geps are cheap and ptrmask occurs in address calculation contexts so I don't think it's worth special casing to avoid the extra instruction.
Targets the dynamic realignment pattern of `(Ptr + Align - 1) & -Align;` as implemented by gep then ptrmask. Specifically, when the pointer already has alignment information, dynamically realigning it to less than is already known should be a no-op. Discovered while writing test cases for another patch. For the zero low bits of a known aligned pointer, adding the gep index then removing it with a mask is a no-op. Folding the ptrmask effect entirely into the gep is the ideal result as that unblocks other optimisations that are not aware of ptrmask. In some other cases the gep is known to be dead and is removed without changing the ptrmask. In the least effective case, this transform creates a new gep with a rounded-down index and still leaves the ptrmask unchanged. That simplified gep is still a minor improvement, geps are cheap and ptrmask occurs in address calculation contexts so I don't think it's worth special casing to avoid the extra instruction.
Targets the dynamic realignment pattern of
(Ptr + Align - 1) & -Align;
as implemented by gep then ptrmask.Specifically, when the pointer already has alignment information, dynamically realigning it to less than is already known should be a no-op. Discovered while writing test cases for another patch.
For the zero low bits of a known aligned pointer, adding the gep index then removing it with a mask is a no-op. Folding the ptrmask effect entirely into the gep is the ideal result as that unblocks other optimisations that are not aware of ptrmask.
In some other cases the gep is known to be dead and is removed without changing the ptrmask.
In the least effective case, this transform creates a new gep with a rounded-down index and still leaves the ptrmask unchanged. That simplified gep is still a minor improvement, geps are cheap and ptrmask occurs in address calculation contexts so I don't think it's worth special casing to avoid the extra instruction.