-
Notifications
You must be signed in to change notification settings - Fork 15.1k
release/21.x: [RISCV] Don't lose elements from False in vmerge -> vmv peephole (#149720) #149968
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@mshockwave What do you think about merging this PR to the release branch? |
@llvm/pr-subscribers-backend-risc-v Author: None (llvmbot) ChangesBackport eafe31b Requested by: @lukel97 Full diff: https://github.com/llvm/llvm-project/pull/149968.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index 84ef53985484f..c1cc19b503deb 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -434,6 +434,15 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
if (!isKnownSameDefs(TrueMask.getReg(), MIMask.getReg()))
return false;
+ // Masked off lanes past TrueVL will come from False, and converting to vmv
+ // will lose these lanes unless MIVL <= TrueVL.
+ // TODO: We could relax this for False == Passthru and True policy == TU
+ const MachineOperand &MIVL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
+ const MachineOperand &TrueVL =
+ True->getOperand(RISCVII::getVLOpNum(True->getDesc()));
+ if (!RISCV::isVLKnownLE(MIVL, TrueVL))
+ return false;
+
// True's passthru needs to be equivalent to False
Register TruePassthruReg = True->getOperand(1).getReg();
Register FalseReg = MI.getOperand(2).getReg();
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir
index a050034c63168..a7eaf39793236 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir
@@ -78,12 +78,12 @@ body: |
; CHECK-NEXT: %false:vrnov0 = COPY $v9
; CHECK-NEXT: %mask:vmv0 = COPY $v0
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */
- ; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, 8, 5 /* e32 */, 0 /* tu, mu */
+ ; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, 4, 5 /* e32 */, 0 /* tu, mu */
%pt:vrnov0 = COPY $v8
%false:vrnov0 = COPY $v9
%mask:vmv0 = COPY $v0
- %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */
- %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 8, 5 /* e32 */
+ %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 8, 5 /* e32 */, 0 /* tu, mu */
+ %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 4, 5 /* e32 */
...
---
# Shouldn't be converted because false operands are different
@@ -163,3 +163,47 @@ body: |
%true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */
bb.1:
%5:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, %mask, 4, 5 /* e32 */
+...
+---
+# Shouldn't be converted because vmerge adds back in elements from false past avl that would be lost if we converted to vmv.v.v
+name: preserve_false
+body: |
+ bb.0:
+ liveins: $v8, $v9, $v0, $x8, $x9
+ ; CHECK-LABEL: name: preserve_false
+ ; CHECK: liveins: $v8, $v9, $v0, $x8, $x9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %pt:vrnov0 = COPY $v8
+ ; CHECK-NEXT: %false:vr = COPY $v9
+ ; CHECK-NEXT: %mask:vmv0 = COPY $v0
+ ; CHECK-NEXT: %avl1:gprnox0 = COPY $x8
+ ; CHECK-NEXT: %avl2:gprnox0 = COPY $x9
+ ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, %avl1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, %avl2, 5 /* e32 */
+ %pt:vrnov0 = COPY $v8
+ %false:vr = COPY $v9
+ %mask:vmv0 = COPY $v0
+ %avl1:gprnox0 = COPY $x8
+ %avl2:gprnox0 = COPY $x9
+ %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, %avl1, 5 /* e32 */, 3 /* ta, ma */
+ %5:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, %avl2, 5 /* e32 */
+...
+---
+# But we can convert this one because vmerge's avl being <= true's means we don't lose any false elements past avl.
+name: preserve_false_avl_known_le
+body: |
+ bb.0:
+ liveins: $v8, $v9, $v0
+ ; CHECK-LABEL: name: preserve_false_avl_known_le
+ ; CHECK: liveins: $v8, $v9, $v0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %pt:vr = COPY $v8
+ ; CHECK-NEXT: %false:vrnov0 = COPY $v9
+ ; CHECK-NEXT: %mask:vmv0 = COPY $v0
+ ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[PseudoVMV_V_V_M1_:%[0-9]+]]:vr = PseudoVMV_V_V_M1 %pt, %true, 1, 5 /* e32 */, 0 /* tu, mu */
+ %pt:vrnov0 = COPY $v8
+ %false:vr = COPY $v9
+ %mask:vmv0 = COPY $v0
+ %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 2, 5 /* e32 */, 3 /* ta, ma */
+ %5:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 1, 5 /* e32 */
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll
index 3aeb4e864627c..9ffc84a8a0e4a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll
@@ -71,10 +71,31 @@ define <vscale x 8 x i64> @vpmerge_m8(<vscale x 8 x i64> %x, <vscale x 8 x i64>
ret <vscale x 8 x i64> %1
}
-declare <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
-declare <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, i32)
-declare <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32)
-declare <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, i32)
-declare <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
-declare <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, i32)
-declare <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1>, <vscale x 8 x i64>, <vscale x 8 x i64>, i32)
+; Shouldn't be converted because vmerge adds back in elements from false past avl that would be lost if we converted to vmv.v.v
+define <vscale x 2 x i32> @preserve_false(ptr %p, <vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i1> %mask, i64 %avl1, i64 %avl2) {
+; CHECK-LABEL: preserve_false:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vle32.v v10, (a0), v0.t
+; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
+; CHECK-NEXT: ret
+ %true = call <vscale x 2 x i32> @llvm.riscv.vle.mask(<vscale x 2 x i32> %false, ptr %p, <vscale x 2 x i1> %mask, i64 %avl1, i64 3)
+ %res = call <vscale x 2 x i32> @llvm.riscv.vmerge(<vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i32> %true, <vscale x 2 x i1> %mask, i64 %avl2)
+ ret <vscale x 2 x i32> %res
+}
+
+; Can fold this because its avl is known to be <= than true, so no elements from false need to be introduced past avl.
+define <vscale x 2 x i32> @preserve_false_avl_known_le(ptr %p, <vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: preserve_false_avl_known_le:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vle32.v v9, (a0), v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %true = call <vscale x 2 x i32> @llvm.riscv.vle.mask(<vscale x 2 x i32> %false, ptr %p, <vscale x 2 x i1> %mask, i64 2, i64 3)
+ %res = call <vscale x 2 x i32> @llvm.riscv.vmerge(<vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i32> %true, <vscale x 2 x i1> %mask, i64 1)
+ ret <vscale x 2 x i32> %res
+}
|
@mshockwave can we get a review if this can be included in the release branch? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…m#149720) In the vmerge peephole, we currently allow different AVLs for the vmerge and its true operand. If vmerge's VL > true's VL, vmerge can "preserve" elements from false that would otherwise be clobbered with a tail agnostic policy on true. mask 1 1 1 1 0 0 0 0 true x x x x|. . . . AVL=4 vmerge x x x x f f|. . AVL=6 If we convert this to vmv.v.v we will lose those false elements: mask 1 1 1 1 0 0 0 0 true x x x x|. . . . AVL=4 vmv.v.v x x x x . .|. . AVL=6 Fix this by checking that vmerge's AVL is <= true's AVL. Should fix llvm#149335 (cherry picked from commit eafe31b)
@lukel97 (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
Backport eafe31b
Requested by: @lukel97