-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Use predicated FP extension when promoting VP operands #79328
base: main
Are you sure you want to change the base?
Conversation
This ensures that the VL used for the emitted vfwcvt matches the one in the original operation. Otherwise, a larger VL may be used than is necessary.
@llvm/pr-subscribers-llvm-selectiondag Author: Simeon K (simeonkr) ChangesThis ensures that the VL used for the emitted vfwcvt matches the one in the original operation. Otherwise, a larger VL may be used than is necessary. Patch is 414.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/79328.diff 38 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 1fbd6322f9ed9c0..937a6a0df655868 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -593,8 +593,16 @@ void VectorLegalizer::PromoteReduction(SDNode *Node,
ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand.
// promote the vector operand.
if (Node->getOperand(j).getValueType().isFloatingPoint())
- Operands[j] =
- DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j));
+ if (!ISD::isVPOpcode(Node->getOpcode()))
+ Operands[j] =
+ DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j));
+ else {
+ auto MaskIdx = *ISD::getVPMaskIdx(Node->getOpcode());
+ auto EVLIdx = *ISD::getVPExplicitVectorLengthIdx(Node->getOpcode());
+ Operands[j] =
+ DAG.getNode(ISD::VP_FP_EXTEND, DL, NewVecVT, Node->getOperand(j),
+ Node->getOperand(MaskIdx), Node->getOperand(EVLIdx));
+ }
else
Operands[j] =
DAG.getNode(ISD::ANY_EXTEND, DL, NewVecVT, Node->getOperand(j));
@@ -756,7 +764,16 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
.getVectorElementType()
.isFloatingPoint() &&
NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
- Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j));
+ if (!ISD::isVPOpcode(Node->getOpcode()))
+ Operands[j] =
+ DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j));
+ else {
+ auto MaskIdx = *ISD::getVPMaskIdx(Node->getOpcode());
+ auto EVLIdx = *ISD::getVPExplicitVectorLengthIdx(Node->getOpcode());
+ Operands[j] =
+ DAG.getNode(ISD::VP_FP_EXTEND, dl, NVT, Node->getOperand(j),
+ Node->getOperand(MaskIdx), Node->getOperand(EVLIdx));
+ }
else
Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j));
else
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
index 194179f9f470e16..375ac3e29a71e66 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
@@ -30,9 +30,9 @@ define <2 x half> @vp_ceil_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl)
;
; ZVFHMIN-LABEL: vp_ceil_v2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
@@ -70,9 +70,9 @@ define <2 x half> @vp_ceil_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
;
; ZVFHMIN-LABEL: vp_ceil_v2f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vfabs.v v8, v9
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
@@ -114,9 +114,9 @@ define <4 x half> @vp_ceil_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl)
;
; ZVFHMIN-LABEL: vp_ceil_v4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
@@ -154,9 +154,9 @@ define <4 x half> @vp_ceil_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
;
; ZVFHMIN-LABEL: vp_ceil_v4f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; ZVFHMIN-NEXT: vfabs.v v8, v9
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
@@ -199,9 +199,9 @@ define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
; ZVFHMIN-LABEL: vp_ceil_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vmv1r.v v9, v0
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
@@ -240,9 +240,9 @@ define <8 x half> @vp_ceil_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
;
; ZVFHMIN-LABEL: vp_ceil_v8f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfabs.v v8, v10
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
@@ -287,9 +287,9 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e
; ZVFHMIN-LABEL: vp_ceil_v16f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vmv1r.v v10, v0
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
@@ -328,9 +328,9 @@ define <16 x half> @vp_ceil_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) {
;
; ZVFHMIN-LABEL: vp_ceil_v16f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: vfabs.v v8, v12
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
index 583742224f8cf1f..bdefb1525b71eaa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
@@ -30,9 +30,9 @@ define <2 x half> @vp_floor_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl)
;
; ZVFHMIN-LABEL: vp_floor_v2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
@@ -70,9 +70,9 @@ define <2 x half> @vp_floor_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
;
; ZVFHMIN-LABEL: vp_floor_v2f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vfabs.v v8, v9
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
@@ -114,9 +114,9 @@ define <4 x half> @vp_floor_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl)
;
; ZVFHMIN-LABEL: vp_floor_v4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
@@ -154,9 +154,9 @@ define <4 x half> @vp_floor_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
;
; ZVFHMIN-LABEL: vp_floor_v4f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; ZVFHMIN-NEXT: vfabs.v v8, v9
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
@@ -199,9 +199,9 @@ define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
; ZVFHMIN-LABEL: vp_floor_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vmv1r.v v9, v0
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
@@ -240,9 +240,9 @@ define <8 x half> @vp_floor_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
;
; ZVFHMIN-LABEL: vp_floor_v8f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfabs.v v8, v10
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
@@ -287,9 +287,9 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %
; ZVFHMIN-LABEL: vp_floor_v16f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vmv1r.v v10, v0
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
@@ -328,9 +328,9 @@ define <16 x half> @vp_floor_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl)
;
; ZVFHMIN-LABEL: vp_floor_v16f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: vfabs.v v8, v12
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll
index 4a5ef21efdb9681..fe1a409288dbcfb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll
@@ -27,19 +27,17 @@ define <2 x half> @vfmax_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i
; ZVFHMIN-LABEL: vfmax_vv_v2f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vmv1r.v v10, v0
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9, v0.t
+; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v10
-; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t
-; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t
+; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v9, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v10
-; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -61,18 +59,16 @@ define <2 x half> @vfmax_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z
;
; ZVFHMIN-LABEL: vfmax_vv_v2f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11
-; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
+; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
+; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
-; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0
-; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
+; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -101,19 +97,17 @@ define <4 x half> @vfmax_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i
; ZVFHMIN-LABEL: vfmax_vv_v4f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vmv1r.v v10, v0
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9, v0.t
+; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v10
-; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t
-; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t
+; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v9, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v10
-; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -135,18 +129,16 @@ define <4 x half> @vfmax_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z
;
; ZVFHMIN-LABEL: vfmax_vv_v4f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11
-; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
+; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
+; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
; ZVFHMIN-NEXT: vmv.v.v v0, v8
-; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0
-; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
+; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
@@ -175,19 +167,17 @@ define <8 x half> @vfmax_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i
; ZVFHMIN-LABEL: vfmax_vv_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vmv1r.v v10, v0
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t
; ZVFHMIN-NEXT: vmv1r.v v0, v8
-; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0
+; ZVFHMIN-NEXT: vmerge.vvm v16, v14, v12, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v10
-; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t
+; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t
; ZVFHMIN-NEXT: vmv1r.v v0, v8
-; ZVFHMIN-NEXT: vmerge.vvm v8, v14, v12, v0
+; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v14, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v10
; ZVFHMIN-NEXT: vfmax.vv v10, v8, v16, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
@@ -211,17 +201,15 @@ define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z
;
; ZVFHMIN-LABEL: vfmax_vv_v8f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12
-; ZVFHMIN-NEXT: vmerge.vvm v14, v10, v12, v0
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
+; ZVFHMIN-NEXT: vmerge.vvm v14, v12, v10, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
-; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0
+; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0
; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
@@ -253,19 +241,17 @@ define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1>
; ZVFHMIN-LABEL: vfmax_vv_v16f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vmv1r.v v12, v0
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
-; ZVFHMIN-NEXT: vsetvli z...
[truncated]
|
if (!ISD::isVPOpcode(Node->getOpcode())) | ||
Operands[j] = | ||
DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j)); | ||
else { | ||
auto MaskIdx = *ISD::getVPMaskIdx(Node->getOpcode()); | ||
auto EVLIdx = *ISD::getVPExplicitVectorLengthIdx(Node->getOpcode()); | ||
Operands[j] = | ||
DAG.getNode(ISD::VP_FP_EXTEND, dl, NVT, Node->getOperand(j), | ||
Node->getOperand(MaskIdx), Node->getOperand(EVLIdx)); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems like we need helper methods in the DAG to build VP or non-VP versions. The prospect of permutations combined with strictfp versions is also horrifying
✅ With the latest revision this PR passed the C/C++ code formatter. |
366e349
to
aa93433
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As it is we still have a bad interaction where you can't use vp with strictfp
This ensures that the VL used for the emitted vfwcvt matches the one in the original operation. Otherwise, a larger VL may be used than is necessary.