-
Notifications
You must be signed in to change notification settings - Fork 10.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Canonicalize scalable GEPs to use llvm.vscale intrinsic #90569
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-llvm-transforms Author: Nikita Popov (nikic) ChangesCanonicalize getelementptr instructions for scalable vector types into ptradd representation with an explicit llvm.vscale call. This representation has better support in BasicAA, which can reason about llvm.vscale, but not plain scalable GEPs. @davemgreen I'm curious what you think about this change, as you did the work for llvm.vscale support in BasicAA. Can we do this, or would it regress something else? (There is also a less aggressive variant where we just move the Full diff: https://github.com/llvm/llvm-project/pull/90569.diff 8 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 7356941be64593..8fe1d01a4515d6 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2787,6 +2787,14 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
GEP.isInBounds()));
}
+ // Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
+ // intrinsic. This has better support in BasicAA.
+ if (IsGEPSrcEleScalable) {
+ Value *Offset = EmitGEPOffset(cast<GEPOperator>(&GEP));
+ return replaceInstUsesWith(
+ GEP, Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.isInBounds()));
+ }
+
// Check to see if the inputs to the PHI node are getelementptr instructions.
if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
diff --git a/llvm/test/Analysis/ValueTracking/phi-known-bits.ll b/llvm/test/Analysis/ValueTracking/phi-known-bits.ll
index e5b8ba151e04c2..7b5e143fecfd9c 100644
--- a/llvm/test/Analysis/ValueTracking/phi-known-bits.ll
+++ b/llvm/test/Analysis/ValueTracking/phi-known-bits.ll
@@ -932,9 +932,11 @@ define i1 @recursiveGEP_withPtrSub_scalableGEP(ptr %val1) {
; CHECK-NEXT: br label [[WHILE_COND_I:%.*]]
; CHECK: while.cond.i:
; CHECK-NEXT: [[A_PN_I:%.*]] = phi ptr [ [[TEST_0_I:%.*]], [[WHILE_COND_I]] ], [ [[VAL1:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[TEST_0_I]] = getelementptr <vscale x 16 x i8>, ptr [[A_PN_I]], i64 1
-; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[TEST_0_I]], align 1
-; CHECK-NEXT: [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
+; CHECK-NEXT: [[TEST_0_I]] = getelementptr i8, ptr [[A_PN_I]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TEST_0_I]], align 1
+; CHECK-NEXT: [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP2]], 0
; CHECK-NEXT: br i1 [[CMP3_NOT_I]], label [[WHILE_END_I:%.*]], label [[WHILE_COND_I]]
; CHECK: while.end.i:
; CHECK-NEXT: [[BOOL:%.*]] = icmp eq ptr [[TEST_0_I]], [[VAL1]]
@@ -964,9 +966,11 @@ define i1 @recursiveGEP_withPtrSub_scalableGEP_inbounds(ptr %val1) {
; CHECK-NEXT: br label [[WHILE_COND_I:%.*]]
; CHECK: while.cond.i:
; CHECK-NEXT: [[A_PN_I:%.*]] = phi ptr [ [[TEST_0_I:%.*]], [[WHILE_COND_I]] ], [ [[VAL1:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[TEST_0_I]] = getelementptr inbounds <vscale x 16 x i8>, ptr [[A_PN_I]], i64 1
-; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[TEST_0_I]], align 1
-; CHECK-NEXT: [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
+; CHECK-NEXT: [[TEST_0_I]] = getelementptr inbounds i8, ptr [[A_PN_I]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TEST_0_I]], align 1
+; CHECK-NEXT: [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP2]], 0
; CHECK-NEXT: br i1 [[CMP3_NOT_I]], label [[WHILE_END_I:%.*]], label [[WHILE_COND_I]]
; CHECK: while.end.i:
; CHECK-NEXT: [[BOOL:%.*]] = icmp eq ptr [[TEST_0_I]], [[VAL1]]
diff --git a/llvm/test/Transforms/InstCombine/gep-vector.ll b/llvm/test/Transforms/InstCombine/gep-vector.ll
index 3465c620b70fe5..f058338fbf7cdc 100644
--- a/llvm/test/Transforms/InstCombine/gep-vector.ll
+++ b/llvm/test/Transforms/InstCombine/gep-vector.ll
@@ -127,7 +127,10 @@ define ptr addrspace(3) @inbounds_bitcast_vec_to_array_addrspace_matching_alloc_
define ptr @test_accumulate_constant_offset_vscale_nonzero(<vscale x 16 x i1> %pg, ptr %base) {
; CHECK-LABEL: @test_accumulate_constant_offset_vscale_nonzero(
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 1, i64 4
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4
+; CHECK-NEXT: [[GEP_OFFS:%.*]] = or disjoint i64 [[TMP2]], 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[GEP_OFFS]]
; CHECK-NEXT: ret ptr [[GEP]]
;
%gep = getelementptr <vscale x 16 x i8>, ptr %base, i64 1, i64 4
diff --git a/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll b/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll
index 7b7c6fba699c21..4c8c56a9262e34 100644
--- a/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll
+++ b/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll
@@ -280,8 +280,11 @@ define ptr @test_scalable(ptr %base, i64 %a) {
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P1]], i64 [[A]]
-; CHECK-NEXT: [[P2:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[TMP0]], i64 1
+; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[A]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
+; CHECK-NEXT: [[P2_IDX:%.*]] = mul i64 [[INDEX]], [[TMP1]]
+; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P1]], i64 [[P2_IDX]]
; CHECK-NEXT: ret ptr [[P2]]
;
entry:
diff --git a/llvm/test/Transforms/InstCombine/icmp-gep.ll b/llvm/test/Transforms/InstCombine/icmp-gep.ll
index a0e03a5428e6c2..29d0c941ac5c8b 100644
--- a/llvm/test/Transforms/InstCombine/icmp-gep.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-gep.ll
@@ -472,7 +472,14 @@ define void @test60_extra_use_fold(ptr %foo, i64 %start.idx, i64 %end.offset) {
define i1 @test_scalable_same(ptr %x) {
; CHECK-LABEL: @test_scalable_same(
-; CHECK-NEXT: ret i1 false
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[A_IDX:%.*]] = shl i64 [[TMP1]], 5
+; CHECK-NEXT: [[A:%.*]] = getelementptr i8, ptr [[X:%.*]], i64 [[A_IDX]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[B_IDX:%.*]] = shl i64 [[TMP2]], 5
+; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[B_IDX]]
+; CHECK-NEXT: [[C:%.*]] = icmp ugt ptr [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
;
%a = getelementptr <vscale x 4 x i8>, ptr %x, i64 8
%b = getelementptr inbounds <vscale x 4 x i8>, ptr %x, i64 8
@@ -507,11 +514,11 @@ define i1 @test_scalable_xc(ptr %x) {
define i1 @test_scalable_xy(ptr %foo, i64 %i, i64 %j) {
; CHECK-LABEL: @test_scalable_xy(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 2
-; CHECK-NEXT: [[GEP2_IDX:%.*]] = mul nsw i64 [[TMP2]], [[J:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4
+; CHECK-NEXT: [[GEP1_IDX:%.*]] = mul nsw i64 [[TMP2]], [[I:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[TMP3]], 4
-; CHECK-NEXT: [[GEP1_IDX:%.*]] = mul nsw i64 [[TMP4]], [[I:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[TMP3]], 2
+; CHECK-NEXT: [[GEP2_IDX:%.*]] = mul nsw i64 [[TMP4]], [[J:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[GEP2_IDX]], [[GEP1_IDX]]
; CHECK-NEXT: ret i1 [[CMP]]
;
diff --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
index 4d38e2cd37c959..5cc444042e1796 100644
--- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll
+++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
@@ -289,7 +289,9 @@ define ptr @geps_combinable_different_elem_type_extra_use2(ptr %a, i64 %idx) {
define ptr @geps_combinable_scalable(ptr %a, i64 %idx) {
; CHECK-LABEL: @geps_combinable_scalable(
-; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds <vscale x 2 x i32>, ptr [[A:%.*]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 3
+; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP2]]
; CHECK-NEXT: [[A3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 4
; CHECK-NEXT: ret ptr [[A3]]
;
@@ -300,7 +302,9 @@ define ptr @geps_combinable_scalable(ptr %a, i64 %idx) {
define ptr @geps_combinable_scalable_vector_array(ptr %a, i64 %idx) {
; CHECK-LABEL: @geps_combinable_scalable_vector_array(
-; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds [4 x <vscale x 2 x i32>], ptr [[A:%.*]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 5
+; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP2]]
; CHECK-NEXT: [[A3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 4
; CHECK-NEXT: ret ptr [[A3]]
;
diff --git a/llvm/test/Transforms/InstCombine/scalable-vector-array.ll b/llvm/test/Transforms/InstCombine/scalable-vector-array.ll
index d03184766b4a68..20e9f2d99dd9a2 100644
--- a/llvm/test/Transforms/InstCombine/scalable-vector-array.ll
+++ b/llvm/test/Transforms/InstCombine/scalable-vector-array.ll
@@ -4,7 +4,9 @@
define <vscale x 4 x i32> @load(ptr %x) {
; CHECK-LABEL: define <vscale x 4 x i32> @load
; CHECK-SAME: (ptr [[X:%.*]]) {
-; CHECK-NEXT: [[A_ELT1:%.*]] = getelementptr inbounds [2 x <vscale x 4 x i32>], ptr [[X]], i64 0, i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4
+; CHECK-NEXT: [[A_ELT1:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[TMP2]]
; CHECK-NEXT: [[A_UNPACK2:%.*]] = load <vscale x 4 x i32>, ptr [[A_ELT1]], align 16
; CHECK-NEXT: ret <vscale x 4 x i32> [[A_UNPACK2]]
;
@@ -17,7 +19,9 @@ define void @store(ptr %x, <vscale x 4 x i32> %y, <vscale x 4 x i32> %z) {
; CHECK-LABEL: define void @store
; CHECK-SAME: (ptr [[X:%.*]], <vscale x 4 x i32> [[Y:%.*]], <vscale x 4 x i32> [[Z:%.*]]) {
; CHECK-NEXT: store <vscale x 4 x i32> [[Y]], ptr [[X]], align 16
-; CHECK-NEXT: [[X_REPACK1:%.*]] = getelementptr inbounds [2 x <vscale x 4 x i32>], ptr [[X]], i64 0, i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4
+; CHECK-NEXT: [[X_REPACK1:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[TMP2]]
; CHECK-NEXT: store <vscale x 4 x i32> [[Z]], ptr [[X_REPACK1]], align 16
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/InstCombine/vscale_gep.ll b/llvm/test/Transforms/InstCombine/vscale_gep.ll
index 534888dc1a1c49..f424650d21e840 100644
--- a/llvm/test/Transforms/InstCombine/vscale_gep.ll
+++ b/llvm/test/Transforms/InstCombine/vscale_gep.ll
@@ -14,7 +14,9 @@ define <vscale x 2 x ptr> @gep_index_type_is_scalable(ptr %p) {
; This test serves to verify code changes for "GEP.getNumIndices() == 1".
define ptr @gep_num_of_indices_1(ptr %p) {
; CHECK-LABEL: @gep_num_of_indices_1(
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP2]]
; CHECK-NEXT: ret ptr [[GEP]]
;
%gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 1
@@ -25,7 +27,9 @@ define ptr @gep_num_of_indices_1(ptr %p) {
define void @gep_bitcast(ptr %p) {
; CHECK-LABEL: @gep_bitcast(
; CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[P:%.*]], align 16
-; CHECK-NEXT: [[GEP2:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[P]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[GEP2]], align 16
; CHECK-NEXT: ret void
;
@@ -54,7 +58,10 @@ define i32 @gep_alloca_inbounds_vscale_zero() {
define i32 @gep_alloca_inbounds_vscale_nonzero() {
; CHECK-LABEL: @gep_alloca_inbounds_vscale_nonzero(
; CHECK-NEXT: [[A:%.*]] = alloca <vscale x 4 x i32>, align 16
-; CHECK-NEXT: [[TMP:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[A]], i64 1, i64 2
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4
+; CHECK-NEXT: [[TMP_OFFS:%.*]] = or disjoint i64 [[TMP2]], 8
+; CHECK-NEXT: [[TMP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP_OFFS]]
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[TMP]], align 4
; CHECK-NEXT: ret i32 [[LOAD]]
;
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I gave this a test and almost nothing changed. So looks good in that regard.
Our lowering through LSR isn't fantastic at the moment, so there is a chance this is going from sub-optimal to sub-optimal, but I think it makes sense to canonicalize them now and wok with that. LGTM from what I've seen.
return replaceInstUsesWith( | ||
GEP, Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.isInBounds())); | ||
} | ||
|
||
// Check to see if the inputs to the PHI node are getelementptr instructions. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is a check for IsGEPSrcEleScalable below (and a comment), that could be removed now?
Canonicalize getelementptr instructions for scalable vector types into ptradd representation with an explicit llvm.vscale call. This representation has better support in BasicAA, which can reason about llvm.vscale, but not plain scalable GEPs.
36a484a
to
74f7620
Compare
I expect that the main producer of scalable GEPs is LoopVectorize, which will already use the llvm.vscale representation anyway. So there's probably not many |
Canonicalize getelementptr instructions for scalable vector types into ptradd representation with an explicit llvm.vscale call. This representation has better support in BasicAA, which can reason about llvm.vscale, but not plain scalable GEPs.
@davemgreen I'm curious what you think about this change, as you did the work for llvm.vscale support in BasicAA. Can we do this, or would it regress something else?
(There is also a less aggressive variant where we just move the
<vscale x
part out of the GEP type, but don't do the full offset expansion. Doing it this way is easier...)