New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SLP/RISCV: add negative test for llrint, increase coverage #69940
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesTo follow-up on a06be8a (SLP/RISCV: add negative test for lrint), add a negative test for llvm.llrint as well, and increase the coverage to cover vectors of length 2, 4, and 8, in preparation to get SLPVectorizer to vectorize both lrint and llrint: this is now possible with the recent change 98c90a1 (ISel: introduce vector ISD::LRINT, ISD::LLRINT; custom RISCV lowering). Full diff: https://github.com/llvm/llvm-project/pull/69940.diff 1 Files Affected:
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/fround.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/fround.ll
index 9206f529cbfd368..1dfd915cb23330f 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/fround.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/fround.ll
@@ -29,6 +29,30 @@ entry:
ret <4 x float> %vecins.3
}
+define <2 x i64> @lrint_v2i64f32(ptr %a) {
+; CHECK-LABEL: define <2 x i64> @lrint_v2i64f32(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 8
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <2 x i64> undef, i64 [[TMP1]], i64 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <2 x i64> [[VECINS]], i64 [[TMP2]], i64 1
+; CHECK-NEXT: ret <2 x i64> [[VECINS_1]]
+;
+entry:
+ %0 = load <2 x float>, ptr %a
+ %vecext = extractelement <2 x float> %0, i64 0
+ %1 = call i64 @llvm.lrint.i64.f32(float %vecext)
+ %vecins = insertelement <2 x i64> undef, i64 %1, i64 0
+ %vecext.1 = extractelement <2 x float> %0, i64 1
+ %2 = call i64 @llvm.lrint.i64.f32(float %vecext.1)
+ %vecins.1 = insertelement <2 x i64> %vecins, i64 %2, i64 1
+ ret <2 x i64> %vecins.1
+}
+
define <4 x i64> @lrint_v4i64f32(ptr %a) {
; CHECK-LABEL: define <4 x i64> @lrint_v4i64f32(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
@@ -65,5 +89,186 @@ entry:
ret <4 x i64> %vecins.3
}
+define <8 x i64> @lrint_v8i64f32(ptr %a) {
+; CHECK-LABEL: define <8 x i64> @lrint_v8i64f32(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[A]], align 32
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x float> [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i64> undef, i64 [[TMP1]], i64 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <8 x float> [[TMP0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <8 x i64> [[VECINS]], i64 [[TMP2]], i64 1
+; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <8 x float> [[TMP0]], i64 2
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_2]])
+; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <8 x i64> [[VECINS_1]], i64 [[TMP3]], i64 2
+; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <8 x float> [[TMP0]], i64 3
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_3]])
+; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <8 x i64> [[VECINS_2]], i64 [[TMP4]], i64 3
+; CHECK-NEXT: [[VECEXT_4:%.*]] = extractelement <8 x float> [[TMP0]], i64 4
+; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_4]])
+; CHECK-NEXT: [[VECINS_4:%.*]] = insertelement <8 x i64> [[VECINS_3]], i64 [[TMP5]], i64 4
+; CHECK-NEXT: [[VECEXT_5:%.*]] = extractelement <8 x float> [[TMP0]], i64 5
+; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_5]])
+; CHECK-NEXT: [[VECINS_5:%.*]] = insertelement <8 x i64> [[VECINS_4]], i64 [[TMP6]], i64 5
+; CHECK-NEXT: [[VECEXT_6:%.*]] = extractelement <8 x float> [[TMP0]], i64 6
+; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_6]])
+; CHECK-NEXT: [[VECINS_6:%.*]] = insertelement <8 x i64> [[VECINS_5]], i64 [[TMP7]], i64 6
+; CHECK-NEXT: [[VECEXT_7:%.*]] = extractelement <8 x float> [[TMP0]], i64 7
+; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_7]])
+; CHECK-NEXT: [[VECINS_7:%.*]] = insertelement <8 x i64> [[VECINS_6]], i64 [[TMP8]], i64 7
+; CHECK-NEXT: ret <8 x i64> [[VECINS_7]]
+;
+entry:
+ %0 = load <8 x float>, ptr %a
+ %vecext = extractelement <8 x float> %0, i64 0
+ %1 = call i64 @llvm.lrint.i64.f32(float %vecext)
+ %vecins = insertelement <8 x i64> undef, i64 %1, i64 0
+ %vecext.1 = extractelement <8 x float> %0, i64 1
+ %2 = call i64 @llvm.lrint.i64.f32(float %vecext.1)
+ %vecins.1 = insertelement <8 x i64> %vecins, i64 %2, i64 1
+ %vecext.2 = extractelement <8 x float> %0, i64 2
+ %3 = call i64 @llvm.lrint.i64.f32(float %vecext.2)
+ %vecins.2 = insertelement <8 x i64> %vecins.1, i64 %3, i64 2
+ %vecext.3 = extractelement <8 x float> %0, i64 3
+ %4 = call i64 @llvm.lrint.i64.f32(float %vecext.3)
+ %vecins.3 = insertelement <8 x i64> %vecins.2, i64 %4, i64 3
+ %vecext.4 = extractelement <8 x float> %0, i64 4
+ %5 = call i64 @llvm.lrint.i64.f32(float %vecext.4)
+ %vecins.4 = insertelement <8 x i64> %vecins.3, i64 %5, i64 4
+ %vecext.5 = extractelement <8 x float> %0, i64 5
+ %6 = call i64 @llvm.lrint.i64.f32(float %vecext.5)
+ %vecins.5 = insertelement <8 x i64> %vecins.4, i64 %6, i64 5
+ %vecext.6 = extractelement <8 x float> %0, i64 6
+ %7 = call i64 @llvm.lrint.i64.f32(float %vecext.6)
+ %vecins.6 = insertelement <8 x i64> %vecins.5, i64 %7, i64 6
+ %vecext.7 = extractelement <8 x float> %0, i64 7
+ %8 = call i64 @llvm.lrint.i64.f32(float %vecext.7)
+ %vecins.7 = insertelement <8 x i64> %vecins.6, i64 %8, i64 7
+ ret <8 x i64> %vecins.7
+}
+
+define <2 x i64> @llrint_v2i64f32(ptr %a) {
+; CHECK-LABEL: define <2 x i64> @llrint_v2i64f32(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 8
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <2 x i64> undef, i64 [[TMP1]], i64 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <2 x i64> [[VECINS]], i64 [[TMP2]], i64 1
+; CHECK-NEXT: ret <2 x i64> [[VECINS_1]]
+;
+entry:
+ %0 = load <2 x float>, ptr %a
+ %vecext = extractelement <2 x float> %0, i64 0
+ %1 = call i64 @llvm.llrint.i64.f32(float %vecext)
+ %vecins = insertelement <2 x i64> undef, i64 %1, i64 0
+ %vecext.1 = extractelement <2 x float> %0, i64 1
+ %2 = call i64 @llvm.llrint.i64.f32(float %vecext.1)
+ %vecins.1 = insertelement <2 x i64> %vecins, i64 %2, i64 1
+ ret <2 x i64> %vecins.1
+}
+
+define <4 x i64> @llrint_v4i64f32(ptr %a) {
+; CHECK-LABEL: define <4 x i64> @llrint_v4i64f32(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x i64> undef, i64 [[TMP1]], i64 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x i64> [[VECINS]], i64 [[TMP2]], i64 1
+; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_2]])
+; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x i64> [[VECINS_1]], i64 [[TMP3]], i64 2
+; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_3]])
+; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x i64> [[VECINS_2]], i64 [[TMP4]], i64 3
+; CHECK-NEXT: ret <4 x i64> [[VECINS_3]]
+;
+entry:
+ %0 = load <4 x float>, ptr %a
+ %vecext = extractelement <4 x float> %0, i64 0
+ %1 = call i64 @llvm.llrint.i64.f32(float %vecext)
+ %vecins = insertelement <4 x i64> undef, i64 %1, i64 0
+ %vecext.1 = extractelement <4 x float> %0, i64 1
+ %2 = call i64 @llvm.llrint.i64.f32(float %vecext.1)
+ %vecins.1 = insertelement <4 x i64> %vecins, i64 %2, i64 1
+ %vecext.2 = extractelement <4 x float> %0, i64 2
+ %3 = call i64 @llvm.llrint.i64.f32(float %vecext.2)
+ %vecins.2 = insertelement <4 x i64> %vecins.1, i64 %3, i64 2
+ %vecext.3 = extractelement <4 x float> %0, i64 3
+ %4 = call i64 @llvm.llrint.i64.f32(float %vecext.3)
+ %vecins.3 = insertelement <4 x i64> %vecins.2, i64 %4, i64 3
+ ret <4 x i64> %vecins.3
+}
+
+define <8 x i64> @llrint_v8i64f32(ptr %a) {
+; CHECK-LABEL: define <8 x i64> @llrint_v8i64f32(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[A]], align 32
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x float> [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i64> undef, i64 [[TMP1]], i64 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <8 x float> [[TMP0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <8 x i64> [[VECINS]], i64 [[TMP2]], i64 1
+; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <8 x float> [[TMP0]], i64 2
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_2]])
+; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <8 x i64> [[VECINS_1]], i64 [[TMP3]], i64 2
+; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <8 x float> [[TMP0]], i64 3
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_3]])
+; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <8 x i64> [[VECINS_2]], i64 [[TMP4]], i64 3
+; CHECK-NEXT: [[VECEXT_4:%.*]] = extractelement <8 x float> [[TMP0]], i64 4
+; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_4]])
+; CHECK-NEXT: [[VECINS_4:%.*]] = insertelement <8 x i64> [[VECINS_3]], i64 [[TMP5]], i64 4
+; CHECK-NEXT: [[VECEXT_5:%.*]] = extractelement <8 x float> [[TMP0]], i64 5
+; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_5]])
+; CHECK-NEXT: [[VECINS_5:%.*]] = insertelement <8 x i64> [[VECINS_4]], i64 [[TMP6]], i64 5
+; CHECK-NEXT: [[VECEXT_6:%.*]] = extractelement <8 x float> [[TMP0]], i64 6
+; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_6]])
+; CHECK-NEXT: [[VECINS_6:%.*]] = insertelement <8 x i64> [[VECINS_5]], i64 [[TMP7]], i64 6
+; CHECK-NEXT: [[VECEXT_7:%.*]] = extractelement <8 x float> [[TMP0]], i64 7
+; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_7]])
+; CHECK-NEXT: [[VECINS_7:%.*]] = insertelement <8 x i64> [[VECINS_6]], i64 [[TMP8]], i64 7
+; CHECK-NEXT: ret <8 x i64> [[VECINS_7]]
+;
+entry:
+ %0 = load <8 x float>, ptr %a
+ %vecext = extractelement <8 x float> %0, i64 0
+ %1 = call i64 @llvm.llrint.i64.f32(float %vecext)
+ %vecins = insertelement <8 x i64> undef, i64 %1, i64 0
+ %vecext.1 = extractelement <8 x float> %0, i64 1
+ %2 = call i64 @llvm.llrint.i64.f32(float %vecext.1)
+ %vecins.1 = insertelement <8 x i64> %vecins, i64 %2, i64 1
+ %vecext.2 = extractelement <8 x float> %0, i64 2
+ %3 = call i64 @llvm.llrint.i64.f32(float %vecext.2)
+ %vecins.2 = insertelement <8 x i64> %vecins.1, i64 %3, i64 2
+ %vecext.3 = extractelement <8 x float> %0, i64 3
+ %4 = call i64 @llvm.llrint.i64.f32(float %vecext.3)
+ %vecins.3 = insertelement <8 x i64> %vecins.2, i64 %4, i64 3
+ %vecext.4 = extractelement <8 x float> %0, i64 4
+ %5 = call i64 @llvm.llrint.i64.f32(float %vecext.4)
+ %vecins.4 = insertelement <8 x i64> %vecins.3, i64 %5, i64 4
+ %vecext.5 = extractelement <8 x float> %0, i64 5
+ %6 = call i64 @llvm.llrint.i64.f32(float %vecext.5)
+ %vecins.5 = insertelement <8 x i64> %vecins.4, i64 %6, i64 5
+ %vecext.6 = extractelement <8 x float> %0, i64 6
+ %7 = call i64 @llvm.llrint.i64.f32(float %vecext.6)
+ %vecins.6 = insertelement <8 x i64> %vecins.5, i64 %7, i64 6
+ %vecext.7 = extractelement <8 x float> %0, i64 7
+ %8 = call i64 @llvm.llrint.i64.f32(float %vecext.7)
+ %vecins.7 = insertelement <8 x i64> %vecins.6, i64 %8, i64 7
+ ret <8 x i64> %vecins.7
+}
+
declare float @llvm.rint.f32(float)
declare i64 @llvm.lrint.i64.f32(float)
+declare i64 @llvm.llrint.i64.f32(float)
|
Do we have same tests for i32? |
To follow-up on a06be8a (SLP/RISCV: add negative test for lrint), add a negative test for llvm.llrint as well, and increase the coverage to cover vectors of length 2, 4, and 8, and the i32 variant of lrint, in preparation to get SLPVectorizer to vectorize both lrint and llrint. This is now possible with the recent change 98c90a1 (ISel: introduce vector ISD::LRINT, ISD::LLRINT; custom RISCV lowering).
084495c
to
4253580
Compare
Fixed now. Thanks. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
To follow-up on a06be8a (SLP/RISCV: add negative test for lrint), add a negative test for llvm.llrint as well, and increase the coverage to cover vectors of length 2, 4, and 8, and the i32 variant of lrint, in preparation to get SLPVectorizer to vectorize both lrint and llrint. This is now possible with the recent change 98c90a1 (ISel: introduce vector ISD::LRINT, ISD::LLRINT; custom RISCV lowering).