-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Use unmasked vfsgnj during fp unary op lowering #81211
Conversation
vfsgnj doesn't set any exception flags so we don't need to mask it. It might also avoid a vsetvli policy toggle. We can do this for VP ops too, since disabled lanes are poison.
@llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) Changesvfsgnj doesn't set any exception flags so we don't need to mask it. It might Patch is 568.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/81211.diff 40 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 27037f4d5c5c8..835e92565695b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2988,8 +2988,10 @@ lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
Mask, VL);
// Restore the original sign so that -0.0 is preserved.
+ // vfsgnj[n] doesn't set any exception flags, so use unmasked instruction.
Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
- Src, Src, Mask, VL);
+ Src, DAG.getUNDEF(ContainerVT),
+ getAllOnesMask(ContainerVT, VL, DL, DAG), VL);
if (!VT.isFixedLengthVector())
return Truncated;
@@ -3093,8 +3095,10 @@ lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
}
// Restore the original sign so that -0.0 is preserved.
+ // vfsgnj[n] doesn't set any exception flags, so use unmasked instruction.
Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
- Src, Src, Mask, VL);
+ Src, DAG.getUNDEF(ContainerVT),
+ getAllOnesMask(ContainerVT, VL, DL, DAG), VL);
if (VT.isFixedLengthVector())
Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
index edc348ebc68ff..dcad91ad61d96 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
@@ -20,8 +20,7 @@ define <vscale x 1 x half> @vp_ceil_vv_nxv1f16(<vscale x 1 x half> %va, <vscale
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8
; CHECK-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
@@ -39,8 +38,7 @@ define <vscale x 1 x half> @vp_ceil_vv_nxv1f16_unmasked(<vscale x 1 x half> %va,
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
@@ -64,8 +62,7 @@ define <vscale x 2 x half> @vp_ceil_vv_nxv2f16(<vscale x 2 x half> %va, <vscale
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8
; CHECK-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
@@ -83,8 +80,7 @@ define <vscale x 2 x half> @vp_ceil_vv_nxv2f16_unmasked(<vscale x 2 x half> %va,
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
@@ -108,8 +104,7 @@ define <vscale x 4 x half> @vp_ceil_vv_nxv4f16(<vscale x 4 x half> %va, <vscale
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8
; CHECK-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
@@ -127,8 +122,7 @@ define <vscale x 4 x half> @vp_ceil_vv_nxv4f16_unmasked(<vscale x 4 x half> %va,
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
@@ -153,9 +147,8 @@ define <vscale x 8 x half> @vp_ceil_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v10, v12, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8
; CHECK-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
@@ -173,8 +166,7 @@ define <vscale x 8 x half> @vp_ceil_vv_nxv8f16_unmasked(<vscale x 8 x half> %va,
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
@@ -199,9 +191,8 @@ define <vscale x 16 x half> @vp_ceil_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v12, v16, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v12, v8
; CHECK-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
@@ -219,8 +210,7 @@ define <vscale x 16 x half> @vp_ceil_vv_nxv16f16_unmasked(<vscale x 16 x half> %
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v12, v8
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
@@ -245,9 +235,8 @@ define <vscale x 32 x half> @vp_ceil_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v16, v24, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v16, v8
; CHECK-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.ceil.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
@@ -265,8 +254,7 @@ define <vscale x 32 x half> @vp_ceil_vv_nxv32f16_unmasked(<vscale x 32 x half> %
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v16, v8
; CHECK-NEXT: ret
%head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
@@ -290,8 +278,7 @@ define <vscale x 1 x float> @vp_ceil_vv_nxv1f32(<vscale x 1 x float> %va, <vscal
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8
; CHECK-NEXT: ret
%v = call <vscale x 1 x float> @llvm.vp.ceil.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x float> %v
@@ -309,8 +296,7 @@ define <vscale x 1 x float> @vp_ceil_vv_nxv1f32_unmasked(<vscale x 1 x float> %v
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
@@ -334,8 +320,7 @@ define <vscale x 2 x float> @vp_ceil_vv_nxv2f32(<vscale x 2 x float> %va, <vscal
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8
; CHECK-NEXT: ret
%v = call <vscale x 2 x float> @llvm.vp.ceil.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x float> %v
@@ -353,8 +338,7 @@ define <vscale x 2 x float> @vp_ceil_vv_nxv2f32_unmasked(<vscale x 2 x float> %v
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
@@ -379,9 +363,8 @@ define <vscale x 4 x float> @vp_ceil_vv_nxv4f32(<vscale x 4 x float> %va, <vscal
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v10, v12, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8
; CHECK-NEXT: ret
%v = call <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x float> %v
@@ -399,8 +382,7 @@ define <vscale x 4 x float> @vp_ceil_vv_nxv4f32_unmasked(<vscale x 4 x float> %v
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
@@ -425,9 +407,8 @@ define <vscale x 8 x float> @vp_ceil_vv_nxv8f32(<vscale x 8 x float> %va, <vscal
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v12, v16, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v12, v8
; CHECK-NEXT: ret
%v = call <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
@@ -445,8 +426,7 @@ define <vscale x 8 x float> @vp_ceil_vv_nxv8f32_unmasked(<vscale x 8 x float> %v
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v12, v8
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
@@ -471,9 +451,8 @@ define <vscale x 16 x float> @vp_ceil_vv_nxv16f32(<vscale x 16 x float> %va, <vs
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v16, v24, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v16, v8
; CHECK-NEXT: ret
%v = call <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x float> %v
@@ -491,8 +470,7 @@ define <vscale x 16 x float> @vp_ceil_vv_nxv16f32_unmasked(<vscale x 16 x float>
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v16, v8
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
@@ -516,8 +494,7 @@ define <vscale x 1 x double> @vp_ceil_vv_nxv1f64(<vscale x 1 x double> %va, <vsc
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8
; CHECK-NEXT: ret
%v = call <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x double> %v
@@ -535,8 +512,7 @@ define <vscale x 1 x double> @vp_ceil_vv_nxv1f64_unmasked(<vscale x 1 x double>
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
@@ -561,9 +537,8 @@ define <vscale x 2 x double> @vp_ceil_vv_nxv2f64(<vscale x 2 x double> %va, <vsc
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v10, v12, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8
; CHECK-NEXT: ret
%v = call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x double> %v
@@ -581,8 +556,7 @@ define <vscale x 2 x double> @vp_ceil_vv_nxv2f64_unmasked(<vscale x 2 x double>
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
@@ -607,9 +581,8 @@ define <vscale x 4 x double> @vp_ceil_vv_nxv4f64(<vscale x 4 x double> %va, <vsc
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v12, v16, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v12, v8
; CHECK-NEXT: ret
%v = call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x double> %v
@@ -627,8 +600,7 @@ define <vscale x 4 x double> @vp_ceil_vv_nxv4f64_unmasked(<vscale x 4 x double>
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v12, v8
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
@@ -653,9 +625,8 @@ define <vscale x 7 x double> @vp_ceil_vv_nxv7f64(<vscale x 7 x double> %va, <vsc
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v16, v24, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v16, v8
; CHECK-NEXT: ret
%v = call <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
ret <vscale x 7 x double> %v
@@ -673,8 +644,7 @@ define <vscale x 7 x double> @vp_ceil_vv_nxv7f64_unmasked(<vscale x 7 x double>
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v16, v8
; CHECK-NEXT: ret
%head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
@@ -699,9 +669,8 @@ define <vscale x 8 x double> @vp_ceil_vv_nxv8f64(<vscale x 8 x double> %va, <vsc
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v16, v24, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v16, v8
; CHECK-NEXT: ret
%v = call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
@@ -719,8 +688,7 @@ define <vscale x 8 x double> @vp_ceil_vv_nxv8f64_unmasked(<vscale x 8 x double>
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v16, v8
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
@@ -737,10 +705,10 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<vscale x 16 x double> %va, <
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
@@ -764,39 +732,27 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<v...
[truncated]
|
@@ -20,8 +20,7 @@ define <vscale x 1 x half> @vp_ceil_vv_nxv1f16(<vscale x 1 x half> %va, <vscale | |||
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t | |||
; CHECK-NEXT: fsrm a0 | |||
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t | |||
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu | |||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t | |||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This puts all ones in all of the masked off lanes. That's not what we want. We need to keep the original input value for those lanes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I overlooked that part, closing this PR. Thanks for pointing that out
vfsgnj doesn't set any exception flags so we don't need to mask it. It might
also avoid a vsetvli policy toggle. We can do this for VP ops too, since
disabled lanes are poison.