-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Use a ta vslideup if inserting over end of InterSubVT #83230
[RISCV] Use a ta vslideup if inserting over end of InterSubVT #83230
Conversation
In llvm#83146 we relaxed a tail undisturbed vslideup to tail agnostic if we were inserting over the entire tail of the vector. However we shrink down the vector being inserted into so we're operating on an LMUL 1 slice of it. We can also use a tail agnostic policy if we're inserting over the entire tail of the LMUL1 slice, InterSubVT.
@llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) ChangesIn #83146 we relaxed a tail undisturbed vslideup to tail agnostic if we were inserting over the entire tail of the vector. However we shrink down the vector being inserted into so we're operating on an LMUL 1 slice of it. We can also use a tail agnostic policy if we're inserting over the entire tail of the LMUL1 slice, InterSubVT. Full diff: https://github.com/llvm/llvm-project/pull/83230.diff 4 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e95e21bda687e8..dde1882f5eea83 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -9732,9 +9732,9 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
ElementCount::getScalable(RemIdx) + SubVecVT.getVectorElementCount();
VL = computeVLMax(SubVecVT, DL, DAG);
- // Use tail agnostic policy if we're inserting over Vec's tail.
+ // Use tail agnostic policy if we're inserting over InterSubVT's tail.
unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
- if (EndIndex == VecVT.getVectorElementCount())
+ if (EndIndex == InterSubVT.getVectorElementCount())
Policy = RISCVII::TAIL_AGNOSTIC;
// If we're inserting into the lowest elements, use a tail undisturbed
diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
index a2d02b6bb641b2..76aa2b913c6525 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
@@ -474,7 +474,7 @@ define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_6(<vscale x 12 x half> %in)
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v12, v9, a0
; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v12, v10, a0
; CHECK-NEXT: vmv2r.v v8, v12
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
index d377082761736f..b15896580d4253 100644
--- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
@@ -227,7 +227,7 @@ define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_1(<vscale x 16 x i32> %vec,
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v16, a0
; CHECK-NEXT: ret
%v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 1)
@@ -306,7 +306,7 @@ define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_7(<vscale x 16 x i8> %vec, <vsc
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a1, a0, 3
; CHECK-NEXT: sub a1, a0, a1
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: ret
%v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 7)
@@ -319,7 +319,7 @@ define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_15(<vscale x 16 x i8> %vec, <vs
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a1, a0, 3
; CHECK-NEXT: sub a1, a0, a1
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v10, a1
; CHECK-NEXT: ret
%v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 15)
@@ -344,7 +344,7 @@ define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_2(<vscale x 32 x half> %vec
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v16, a0
; CHECK-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec, i64 2)
@@ -357,7 +357,7 @@ define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_26(<vscale x 32 x half> %ve
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v14, v16, a0
; CHECK-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec, i64 26)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
index 515d77109af9f7..6d42b15273cf86 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
@@ -916,7 +916,7 @@ define half @vreduce_ord_fadd_nxv6f16(<vscale x 6 x half> %v, half %s) {
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v10, a0
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
@@ -938,11 +938,11 @@ define half @vreduce_ord_fadd_nxv10f16(<vscale x 10 x half> %v, half %s) {
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v12, a0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
; CHECK-NEXT: vmv.v.v v11, v12
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v11, v12, a0
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
@@ -1002,7 +1002,7 @@ define half @vreduce_fadd_nxv6f16(<vscale x 6 x half> %v, half %s) {
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v10, a0
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
@@ -1025,11 +1025,11 @@ define half @vreduce_fmin_nxv10f16(<vscale x 10 x half> %v) {
; CHECK-NEXT: vlse16.v v12, (a1), zero
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v12, a0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
; CHECK-NEXT: vmv.v.v v11, v12
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v11, v12, a0
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
The description in #83146 is slightly inaccurate: it relaxes a tail undisturbed vslideup to tail agnostic if we are inserting over the entire tail of the vector and we didn't shrink the LMUL of the vector being inserted into.
This handles the case where we did shrink down the LMUL via InterSubVT by checking if we inserted over the entire tail of InterSubVT, the actual type that we're performing the vslideup on, not VecVT.