-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[SLPVectorizer][NFC] A test for widening constant strided loads. #160552
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-risc-v Author: Mikhail Gudim (mgudim) ChangesPrecommit a test. Full diff: https://github.com/llvm/llvm-project/pull/160552.diff 1 Files Affected:
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/basic-strided-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/basic-strided-loads.ll
index 645dbc49269f0..b309be84f42cf 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/basic-strided-loads.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/basic-strided-loads.ll
@@ -527,23 +527,14 @@ define void @rt_stride_1_with_reordering(ptr %pl, i64 %stride, ptr %ps) {
ret void
}
-; TODO: We want to generate this code:
-; define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
-; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 %offset0
-; %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
-; %strided_load = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i64(ptr align 16 %gep_l0, i64 8, <4 x i1> splat (i1 true), i32 4)
-; %bitcast_ = bitcast <4 x i32> %strided_load to <16 x i8>
-; store <16 x i8> %bitcast_, ptr %gep_s0, align 16
-; ret void
-; }
-define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
-; CHECK-LABEL: define void @constant_stride_widen_no_reordering(
+define void @constant_stride_masked_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
+; CHECK-LABEL: define void @constant_stride_masked_no_reordering(
; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 0
; CHECK-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = call <28 x i8> @llvm.masked.load.v28i8.p0(ptr [[GEP_L0]], i32 16, <28 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <28 x i8> poison)
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <28 x i8> [[TMP1]], <28 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
-; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[GEP_S0]], align 16
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <28 x i8> [[TMP1]], <28 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
+; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[GEP_S0]], align 16
; CHECK-NEXT: ret void
;
%gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
@@ -617,6 +608,107 @@ define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps)
ret void
}
+; TODO: We want to generate this code:
+; define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) #0 {
+; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
+; %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
+; %1 = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i64(ptr align 16 %gep_l0, i64 100, <4 x i1> splat (i1 true), i32 4)
+; %2 = bitcast <4 x i32> %1 to <16 x i8>
+; store <16 x i8> %2, ptr %gep_s0, align 16
+; ret void
+; }
+define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
+; CHECK-LABEL: define void @constant_stride_widen_no_reordering(
+; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 0
+; CHECK-NEXT: [[GEP_L4:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 100
+; CHECK-NEXT: [[GEP_L8:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 200
+; CHECK-NEXT: [[GEP_L12:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 300
+; CHECK-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[GEP_L0]], align 16
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_L4]], align 16
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[GEP_L8]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[GEP_L12]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP9]], <16 x i8> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[GEP_S0]], align 16
+; CHECK-NEXT: ret void
+;
+ %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
+ %gep_l1 = getelementptr inbounds i8, ptr %pl, i64 1
+ %gep_l2 = getelementptr inbounds i8, ptr %pl, i64 2
+ %gep_l3 = getelementptr inbounds i8, ptr %pl, i64 3
+ %gep_l4 = getelementptr inbounds i8, ptr %pl, i64 100
+ %gep_l5 = getelementptr inbounds i8, ptr %pl, i64 101
+ %gep_l6 = getelementptr inbounds i8, ptr %pl, i64 102
+ %gep_l7 = getelementptr inbounds i8, ptr %pl, i64 103
+ %gep_l8 = getelementptr inbounds i8, ptr %pl, i64 200
+ %gep_l9 = getelementptr inbounds i8, ptr %pl, i64 201
+ %gep_l10 = getelementptr inbounds i8, ptr %pl, i64 202
+ %gep_l11 = getelementptr inbounds i8, ptr %pl, i64 203
+ %gep_l12 = getelementptr inbounds i8, ptr %pl, i64 300
+ %gep_l13 = getelementptr inbounds i8, ptr %pl, i64 301
+ %gep_l14 = getelementptr inbounds i8, ptr %pl, i64 302
+ %gep_l15 = getelementptr inbounds i8, ptr %pl, i64 303
+
+ %load0 = load i8, ptr %gep_l0 , align 16
+ %load1 = load i8, ptr %gep_l1 , align 16
+ %load2 = load i8, ptr %gep_l2 , align 16
+ %load3 = load i8, ptr %gep_l3 , align 16
+ %load4 = load i8, ptr %gep_l4 , align 16
+ %load5 = load i8, ptr %gep_l5 , align 16
+ %load6 = load i8, ptr %gep_l6 , align 16
+ %load7 = load i8, ptr %gep_l7 , align 16
+ %load8 = load i8, ptr %gep_l8 , align 16
+ %load9 = load i8, ptr %gep_l9 , align 16
+ %load10 = load i8, ptr %gep_l10, align 16
+ %load11 = load i8, ptr %gep_l11, align 16
+ %load12 = load i8, ptr %gep_l12, align 16
+ %load13 = load i8, ptr %gep_l13, align 16
+ %load14 = load i8, ptr %gep_l14, align 16
+ %load15 = load i8, ptr %gep_l15, align 16
+
+ %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
+ %gep_s1 = getelementptr inbounds i8, ptr %ps, i64 1
+ %gep_s2 = getelementptr inbounds i8, ptr %ps, i64 2
+ %gep_s3 = getelementptr inbounds i8, ptr %ps, i64 3
+ %gep_s4 = getelementptr inbounds i8, ptr %ps, i64 4
+ %gep_s5 = getelementptr inbounds i8, ptr %ps, i64 5
+ %gep_s6 = getelementptr inbounds i8, ptr %ps, i64 6
+ %gep_s7 = getelementptr inbounds i8, ptr %ps, i64 7
+ %gep_s8 = getelementptr inbounds i8, ptr %ps, i64 8
+ %gep_s9 = getelementptr inbounds i8, ptr %ps, i64 9
+ %gep_s10 = getelementptr inbounds i8, ptr %ps, i64 10
+ %gep_s11 = getelementptr inbounds i8, ptr %ps, i64 11
+ %gep_s12 = getelementptr inbounds i8, ptr %ps, i64 12
+ %gep_s13 = getelementptr inbounds i8, ptr %ps, i64 13
+ %gep_s14 = getelementptr inbounds i8, ptr %ps, i64 14
+ %gep_s15 = getelementptr inbounds i8, ptr %ps, i64 15
+
+ store i8 %load0, ptr %gep_s0, align 16
+ store i8 %load1, ptr %gep_s1, align 16
+ store i8 %load2, ptr %gep_s2, align 16
+ store i8 %load3, ptr %gep_s3, align 16
+ store i8 %load4, ptr %gep_s4, align 16
+ store i8 %load5, ptr %gep_s5, align 16
+ store i8 %load6, ptr %gep_s6, align 16
+ store i8 %load7, ptr %gep_s7, align 16
+ store i8 %load8, ptr %gep_s8, align 16
+ store i8 %load9, ptr %gep_s9, align 16
+ store i8 %load10, ptr %gep_s10, align 16
+ store i8 %load11, ptr %gep_s11, align 16
+ store i8 %load12, ptr %gep_s12, align 16
+ store i8 %load13, ptr %gep_s13, align 16
+ store i8 %load14, ptr %gep_s14, align 16
+ store i8 %load15, ptr %gep_s15, align 16
+
+ ret void
+}
; TODO: We want to generate this code:
; define void @rt_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 %offset0
|
%gep_s14 = getelementptr inbounds i8, ptr %ps, i64 14 | ||
%gep_s15 = getelementptr inbounds i8, ptr %ps, i64 15 | ||
|
||
store i8 %load0, ptr %gep_s0, align 16 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why the alignment is 16?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I copy-pasted from other functions. Should I remove alignment everywhere in basic-strided-loads.ll?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just it is too much for i8, they usually have align 1
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I updated the alignment (force-pushed by mistake)
Precommit a test.
2ece844
to
e6bb662
Compare
constant_stride_masked_no_reordering
Precommit a test.