142 changes: 39 additions & 103 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll
Original file line number Diff line number Diff line change
Expand Up @@ -401,54 +401,30 @@ define void @masked_store_v32i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: li a4, 18
; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: slli a3, a3, 1
; RV32-NEXT: sub sp, sp, a3
; RV32-NEXT: addi a3, a2, 128
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vle64.v v8, (a3)
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a4, a3, 3
; RV32-NEXT: add a3, a4, a3
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vle64.v v0, (a2)
; RV32-NEXT: vle64.v v0, (a3)
; RV32-NEXT: vle64.v v8, (a2)
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vmv.v.i v24, 0
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vmseq.vv v8, v0, v24
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: slli a2, a2, 3
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 16
; RV32-NEXT: vmseq.vv v8, v8, v24
; RV32-NEXT: addi a2, sp, 16
; RV32-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill
; RV32-NEXT: addi a2, a0, 128
; RV32-NEXT: vle64.v v8, (a2)
; RV32-NEXT: vle64.v v16, (a0)
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a2, a0, 3
; RV32-NEXT: add a0, a2, a0
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vmseq.vv v0, v16, v24
; RV32-NEXT: vle64.v v16, (a2)
; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: vmseq.vv v0, v0, v24
; RV32-NEXT: addi a0, a1, 128
; RV32-NEXT: vse64.v v8, (a0), v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vse64.v v16, (a0), v0.t
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vse64.v v8, (a1), v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 18
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
Expand All @@ -457,37 +433,27 @@ define void @masked_store_v32i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: csrr a3, vlenb
; RV64-NEXT: slli a3, a3, 4
; RV64-NEXT: slli a3, a3, 3
; RV64-NEXT: sub sp, sp, a3
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vle64.v v8, (a2)
; RV64-NEXT: addi a2, a2, 128
; RV64-NEXT: vle64.v v16, (a2)
; RV64-NEXT: csrr a2, vlenb
; RV64-NEXT: slli a2, a2, 3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: addi a2, sp, 16
; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; RV64-NEXT: vmseq.vi v0, v8, 0
; RV64-NEXT: vle64.v v24, (a0)
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle64.v v8, (a0)
; RV64-NEXT: vle64.v v16, (a0)
; RV64-NEXT: addi a0, sp, 16
; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 3
; RV64-NEXT: add a0, sp, a0
; RV64-NEXT: addi a0, a0, 16
; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV64-NEXT: vmseq.vi v8, v16, 0
; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV64-NEXT: vmseq.vi v8, v8, 0
; RV64-NEXT: vse64.v v24, (a1), v0.t
; RV64-NEXT: addi a0, a1, 128
; RV64-NEXT: vmv1r.v v0, v8
; RV64-NEXT: addi a1, sp, 16
; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vse64.v v8, (a0), v0.t
; RV64-NEXT: vse64.v v16, (a0), v0.t
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 4
; RV64-NEXT: slli a0, a0, 3
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
Expand Down Expand Up @@ -540,38 +506,28 @@ define void @masked_store_v64i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: slli a3, a3, 4
; CHECK-NEXT: slli a3, a3, 3
; CHECK-NEXT: sub sp, sp, a3
; CHECK-NEXT: li a3, 32
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a2)
; CHECK-NEXT: addi a2, a2, 128
; CHECK-NEXT: vle32.v v16, (a2)
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vmseq.vi v0, v8, 0
; CHECK-NEXT: vle32.v v24, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vle32.v v16, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmseq.vi v8, v16, 0
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmseq.vi v8, v8, 0
; CHECK-NEXT: vse32.v v24, (a1), v0.t
; CHECK-NEXT: addi a0, a1, 128
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vse32.v v8, (a0), v0.t
; CHECK-NEXT: vse32.v v16, (a0), v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
Expand Down Expand Up @@ -606,38 +562,28 @@ define void @masked_store_v128i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: slli a3, a3, 4
; CHECK-NEXT: slli a3, a3, 3
; CHECK-NEXT: sub sp, sp, a3
; CHECK-NEXT: li a3, 64
; CHECK-NEXT: vsetvli zero, a3, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a2)
; CHECK-NEXT: addi a2, a2, 128
; CHECK-NEXT: vle16.v v16, (a2)
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vmseq.vi v0, v8, 0
; CHECK-NEXT: vle16.v v24, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vle16.v v16, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmseq.vi v8, v16, 0
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmseq.vi v8, v8, 0
; CHECK-NEXT: vse16.v v24, (a1), v0.t
; CHECK-NEXT: addi a0, a1, 128
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vse16.v v8, (a0), v0.t
; CHECK-NEXT: vse16.v v16, (a0), v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
Expand All @@ -654,38 +600,28 @@ define void @masked_store_v256i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: slli a3, a3, 4
; CHECK-NEXT: slli a3, a3, 3
; CHECK-NEXT: sub sp, sp, a3
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a2)
; CHECK-NEXT: addi a2, a2, 128
; CHECK-NEXT: vle8.v v16, (a2)
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vmseq.vi v0, v8, 0
; CHECK-NEXT: vle8.v v24, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vle8.v v16, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmseq.vi v8, v16, 0
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmseq.vi v8, v8, 0
; CHECK-NEXT: vse8.v v24, (a1), v0.t
; CHECK-NEXT: addi a0, a1, 128
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vse8.v v8, (a0), v0.t
; CHECK-NEXT: vse8.v v16, (a0), v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
Expand Down
119 changes: 57 additions & 62 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -135,21 +135,19 @@ declare <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_nearbyint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI6_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
Expand Down Expand Up @@ -263,21 +261,19 @@ declare <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float>, <8 x i1>, i32)
define <8 x float> @vp_nearbyint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8, v0.t
; CHECK-NEXT: lui a0, 307200
; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
Expand Down Expand Up @@ -307,21 +303,19 @@ declare <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float>, <16 x i1>, i32)
define <16 x float> @vp_nearbyint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: lui a0, 307200
; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
Expand Down Expand Up @@ -393,21 +387,19 @@ declare <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_nearbyint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
Expand Down Expand Up @@ -437,21 +429,19 @@ declare <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_nearbyint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
Expand Down Expand Up @@ -481,21 +471,19 @@ declare <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_nearbyint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_v15f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
ret <15 x double> %v
Expand Down Expand Up @@ -525,21 +513,19 @@ declare <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_nearbyint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_v16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
Expand Down Expand Up @@ -572,15 +558,17 @@ define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 z
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: vslidedown.vi v7, v0, 2
; CHECK-NEXT: vslidedown.vi v24, v0, 2
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
Expand All @@ -589,40 +577,47 @@ define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 z
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t
; CHECK-NEXT: frflags a1
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: fsflags a1
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmv1r.v v8, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfabs.v v16, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmv.v.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
Expand Down
496 changes: 336 additions & 160 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll

Large diffs are not rendered by default.

44 changes: 22 additions & 22 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1586,8 +1586,8 @@ define signext i8 @vpreduce_mul_v8i8(i8 signext %s, <8 x i8> %v, <8 x i1> %m, i3
; RV32-NEXT: mv a2, a0
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vid.v v10
; RV32-NEXT: vmsltu.vx v9, v10, a1
; RV32-NEXT: vmand.mm v0, v9, v0
; RV32-NEXT: vmsltu.vx v2, v10, a1
; RV32-NEXT: vmand.mm v0, v2, v0
; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32-NEXT: vmv.v.i v9, 1
; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
Expand Down Expand Up @@ -1615,8 +1615,8 @@ define signext i8 @vpreduce_mul_v8i8(i8 signext %s, <8 x i8> %v, <8 x i1> %m, i3
; RV64-NEXT: mv a2, a0
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64-NEXT: vid.v v10
; RV64-NEXT: vmsltu.vx v9, v10, a1
; RV64-NEXT: vmand.mm v0, v9, v0
; RV64-NEXT: vmsltu.vx v2, v10, a1
; RV64-NEXT: vmand.mm v0, v2, v0
; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV64-NEXT: vmv.v.i v9, 1
; RV64-NEXT: vmerge.vvm v8, v9, v8, v0
Expand Down Expand Up @@ -1650,8 +1650,8 @@ define signext i8 @vpreduce_mul_v16i8(i8 signext %s, <16 x i8> %v, <16 x i1> %m,
; RV32-NEXT: mv a2, a0
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vid.v v12
; RV32-NEXT: vmsltu.vx v9, v12, a1
; RV32-NEXT: vmand.mm v0, v9, v0
; RV32-NEXT: vmsltu.vx v4, v12, a1
; RV32-NEXT: vmand.mm v0, v4, v0
; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; RV32-NEXT: vmv.v.i v9, 1
; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
Expand Down Expand Up @@ -1681,8 +1681,8 @@ define signext i8 @vpreduce_mul_v16i8(i8 signext %s, <16 x i8> %v, <16 x i1> %m,
; RV64-NEXT: mv a2, a0
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vid.v v12
; RV64-NEXT: vmsltu.vx v9, v12, a1
; RV64-NEXT: vmand.mm v0, v9, v0
; RV64-NEXT: vmsltu.vx v4, v12, a1
; RV64-NEXT: vmand.mm v0, v4, v0
; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; RV64-NEXT: vmv.v.i v9, 1
; RV64-NEXT: vmerge.vvm v8, v9, v8, v0
Expand Down Expand Up @@ -1719,8 +1719,8 @@ define signext i8 @vpreduce_mul_v32i8(i8 signext %s, <32 x i8> %v, <32 x i1> %m,
; RV32-NEXT: li a0, 32
; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; RV32-NEXT: vid.v v16
; RV32-NEXT: vmsltu.vx v10, v16, a1
; RV32-NEXT: vmand.mm v0, v10, v0
; RV32-NEXT: vmsltu.vx v16, v16, a1
; RV32-NEXT: vmand.mm v0, v16, v0
; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; RV32-NEXT: vmv.v.i v10, 1
; RV32-NEXT: vmerge.vvm v8, v10, v8, v0
Expand Down Expand Up @@ -1753,8 +1753,8 @@ define signext i8 @vpreduce_mul_v32i8(i8 signext %s, <32 x i8> %v, <32 x i1> %m,
; RV64-NEXT: li a0, 32
; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; RV64-NEXT: vid.v v16
; RV64-NEXT: vmsltu.vx v10, v16, a1
; RV64-NEXT: vmand.mm v0, v10, v0
; RV64-NEXT: vmsltu.vx v16, v16, a1
; RV64-NEXT: vmand.mm v0, v16, v0
; RV64-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; RV64-NEXT: vmv.v.i v10, 1
; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
Expand Down Expand Up @@ -1796,14 +1796,14 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m,
; RV32-NEXT: vle8.v v12, (a2)
; RV32-NEXT: mv a2, a0
; RV32-NEXT: vid.v v16
; RV32-NEXT: vmsltu.vx v14, v16, a1
; RV32-NEXT: vsext.vf4 v16, v12
; RV32-NEXT: vmsltu.vx v12, v16, a1
; RV32-NEXT: vmsltu.vx v16, v16, a1
; RV32-NEXT: vsext.vf4 v24, v12
; RV32-NEXT: vmsltu.vx v24, v24, a1
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: vslideup.vi v14, v12, 4
; RV32-NEXT: vslideup.vi v16, v24, 4
; RV32-NEXT: li a0, 64
; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV32-NEXT: vmand.mm v0, v14, v0
; RV32-NEXT: vmand.mm v0, v16, v0
; RV32-NEXT: vmv.v.i v12, 1
; RV32-NEXT: vmerge.vvm v8, v12, v8, v0
; RV32-NEXT: vslidedown.vx v12, v8, a3
Expand Down Expand Up @@ -1840,14 +1840,14 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m,
; RV64-NEXT: vle8.v v12, (a2)
; RV64-NEXT: mv a2, a0
; RV64-NEXT: vid.v v16
; RV64-NEXT: vmsltu.vx v14, v16, a1
; RV64-NEXT: vsext.vf4 v16, v12
; RV64-NEXT: vmsltu.vx v12, v16, a1
; RV64-NEXT: vmsltu.vx v16, v16, a1
; RV64-NEXT: vsext.vf4 v24, v12
; RV64-NEXT: vmsltu.vx v24, v24, a1
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: vslideup.vi v14, v12, 4
; RV64-NEXT: vslideup.vi v16, v24, 4
; RV64-NEXT: li a0, 64
; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64-NEXT: vmand.mm v0, v14, v0
; RV64-NEXT: vmand.mm v0, v16, v0
; RV64-NEXT: vmv.v.i v12, 1
; RV64-NEXT: vmerge.vvm v8, v12, v8, v0
; RV64-NEXT: vslidedown.vx v12, v8, a3
Expand Down
89 changes: 36 additions & 53 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -123,19 +123,17 @@ declare <16 x half> @llvm.vp.rint.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_rint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI6_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x half> @llvm.vp.rint.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
Expand Down Expand Up @@ -239,19 +237,17 @@ declare <8 x float> @llvm.vp.rint.v8f32(<8 x float>, <8 x i1>, i32)
define <8 x float> @vp_rint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8, v0.t
; CHECK-NEXT: lui a0, 307200
; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x float> @llvm.vp.rint.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
Expand Down Expand Up @@ -279,19 +275,17 @@ declare <16 x float> @llvm.vp.rint.v16f32(<16 x float>, <16 x i1>, i32)
define <16 x float> @vp_rint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: lui a0, 307200
; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x float> @llvm.vp.rint.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
Expand Down Expand Up @@ -357,19 +351,17 @@ declare <4 x double> @llvm.vp.rint.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_rint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
Expand Down Expand Up @@ -397,19 +389,17 @@ declare <8 x double> @llvm.vp.rint.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_rint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
Expand Down Expand Up @@ -437,19 +427,17 @@ declare <15 x double> @llvm.vp.rint.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_rint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v15f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <15 x double> @llvm.vp.rint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
ret <15 x double> %v
Expand Down Expand Up @@ -477,19 +465,17 @@ declare <16 x double> @llvm.vp.rint.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_rint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
Expand Down Expand Up @@ -523,7 +509,6 @@ define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
Expand All @@ -540,12 +525,10 @@ define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
Expand Down
113 changes: 46 additions & 67 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -194,23 +194,21 @@ define <8 x half> @vp_round_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
;
; ZVFHMIN-LABEL: vp_round_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vmv1r.v v9, v0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t
; ZVFHMIN-NEXT: vfabs.v v8, v10, v0.t
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: fsrmi a0, 4
; ZVFHMIN-NEXT: vmv1r.v v0, v9
; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: fsrm a0
; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
Expand Down Expand Up @@ -261,42 +259,38 @@ declare <16 x half> @llvm.vp.round.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_round_v16f16:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI6_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
; ZVFH-NEXT: vfabs.v v10, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t
; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFH-NEXT: fsrmi a0, 4
; ZVFH-NEXT: vmv1r.v v0, v10
; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t
; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t
; ZVFH-NEXT: fsrm a0
; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t
; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vp_round_v16f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vmv1r.v v10, v0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t
; ZVFHMIN-NEXT: vfabs.v v8, v12, v0.t
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: fsrmi a0, 4
; ZVFHMIN-NEXT: vmv1r.v v0, v10
; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t
; ZVFHMIN-NEXT: fsrm a0
; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t
; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
Expand Down Expand Up @@ -431,21 +425,19 @@ declare <8 x float> @llvm.vp.round.v8f32(<8 x float>, <8 x i1>, i32)
define <8 x float> @vp_round_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8, v0.t
; CHECK-NEXT: lui a0, 307200
; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x float> @llvm.vp.round.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
Expand Down Expand Up @@ -475,21 +467,19 @@ declare <16 x float> @llvm.vp.round.v16f32(<16 x float>, <16 x i1>, i32)
define <16 x float> @vp_round_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: lui a0, 307200
; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x float> @llvm.vp.round.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
Expand Down Expand Up @@ -561,21 +551,19 @@ declare <4 x double> @llvm.vp.round.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_round_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x double> @llvm.vp.round.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
Expand Down Expand Up @@ -605,21 +593,19 @@ declare <8 x double> @llvm.vp.round.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_round_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x double> @llvm.vp.round.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
Expand Down Expand Up @@ -649,21 +635,19 @@ declare <15 x double> @llvm.vp.round.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_round_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v15f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <15 x double> @llvm.vp.round.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
ret <15 x double> %v
Expand Down Expand Up @@ -693,21 +677,19 @@ declare <16 x double> @llvm.vp.round.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_round_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x double> @llvm.vp.round.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
Expand Down Expand Up @@ -743,7 +725,6 @@ define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
Expand All @@ -760,13 +741,11 @@ define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a1, 4
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
Expand Down
113 changes: 46 additions & 67 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -194,23 +194,21 @@ define <8 x half> @vp_roundeven_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %
;
; ZVFHMIN-LABEL: vp_roundeven_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vmv1r.v v9, v0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t
; ZVFHMIN-NEXT: vfabs.v v8, v10, v0.t
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: fsrmi a0, 0
; ZVFHMIN-NEXT: vmv1r.v v0, v9
; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: fsrm a0
; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
Expand Down Expand Up @@ -261,42 +259,38 @@ declare <16 x half> @llvm.vp.roundeven.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundeven_v16f16:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI6_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
; ZVFH-NEXT: vfabs.v v10, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t
; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFH-NEXT: fsrmi a0, 0
; ZVFH-NEXT: vmv1r.v v0, v10
; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t
; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t
; ZVFH-NEXT: fsrm a0
; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t
; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vp_roundeven_v16f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vmv1r.v v10, v0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t
; ZVFHMIN-NEXT: vfabs.v v8, v12, v0.t
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: fsrmi a0, 0
; ZVFHMIN-NEXT: vmv1r.v v0, v10
; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t
; ZVFHMIN-NEXT: fsrm a0
; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t
; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
Expand Down Expand Up @@ -431,21 +425,19 @@ declare <8 x float> @llvm.vp.roundeven.v8f32(<8 x float>, <8 x i1>, i32)
define <8 x float> @vp_roundeven_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8, v0.t
; CHECK-NEXT: lui a0, 307200
; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
Expand Down Expand Up @@ -475,21 +467,19 @@ declare <16 x float> @llvm.vp.roundeven.v16f32(<16 x float>, <16 x i1>, i32)
define <16 x float> @vp_roundeven_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: lui a0, 307200
; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
Expand Down Expand Up @@ -561,21 +551,19 @@ declare <4 x double> @llvm.vp.roundeven.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_roundeven_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
Expand Down Expand Up @@ -605,21 +593,19 @@ declare <8 x double> @llvm.vp.roundeven.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_roundeven_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
Expand Down Expand Up @@ -649,21 +635,19 @@ declare <15 x double> @llvm.vp.roundeven.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_roundeven_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v15f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <15 x double> @llvm.vp.roundeven.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
ret <15 x double> %v
Expand Down Expand Up @@ -693,21 +677,19 @@ declare <16 x double> @llvm.vp.roundeven.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_roundeven_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
Expand Down Expand Up @@ -743,7 +725,6 @@ define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 z
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
Expand All @@ -760,13 +741,11 @@ define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 z
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
Expand Down
113 changes: 46 additions & 67 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -194,23 +194,21 @@ define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext
;
; ZVFHMIN-LABEL: vp_roundtozero_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vmv1r.v v9, v0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t
; ZVFHMIN-NEXT: vfabs.v v8, v10, v0.t
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: fsrmi a0, 1
; ZVFHMIN-NEXT: vmv1r.v v0, v9
; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: fsrm a0
; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
Expand Down Expand Up @@ -261,42 +259,38 @@ declare <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundtozero_v16f16:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI6_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
; ZVFH-NEXT: vfabs.v v10, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t
; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFH-NEXT: fsrmi a0, 1
; ZVFH-NEXT: vmv1r.v v0, v10
; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t
; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t
; ZVFH-NEXT: fsrm a0
; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t
; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vp_roundtozero_v16f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vmv1r.v v10, v0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t
; ZVFHMIN-NEXT: vfabs.v v8, v12, v0.t
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: fsrmi a0, 1
; ZVFHMIN-NEXT: vmv1r.v v0, v10
; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t
; ZVFHMIN-NEXT: fsrm a0
; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t
; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
Expand Down Expand Up @@ -431,21 +425,19 @@ declare <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float>, <8 x i1>, i32)
define <8 x float> @vp_roundtozero_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8, v0.t
; CHECK-NEXT: lui a0, 307200
; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: fsrmi a0, 1
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
Expand Down Expand Up @@ -475,21 +467,19 @@ declare <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float>, <16 x i1>, i32)
define <16 x float> @vp_roundtozero_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: lui a0, 307200
; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: fsrmi a0, 1
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
Expand Down Expand Up @@ -561,21 +551,19 @@ declare <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_roundtozero_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: fsrmi a0, 1
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
Expand Down Expand Up @@ -605,21 +593,19 @@ declare <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_roundtozero_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: fsrmi a0, 1
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
Expand Down Expand Up @@ -649,21 +635,19 @@ declare <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_roundtozero_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_v15f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a0, 1
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
ret <15 x double> %v
Expand Down Expand Up @@ -693,21 +677,19 @@ declare <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_roundtozero_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_v16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a0, 1
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
Expand Down Expand Up @@ -743,7 +725,6 @@ define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
Expand All @@ -760,13 +741,11 @@ define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a1, 1
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
Expand Down
412 changes: 177 additions & 235 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll

Large diffs are not rendered by default.

459 changes: 164 additions & 295 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll

Large diffs are not rendered by default.

50 changes: 25 additions & 25 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -246,12 +246,13 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: vslidedown.vi v5, v0, 8
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vi v26, v0, 4
; CHECK-NEXT: addi a3, a1, 128
; CHECK-NEXT: addi a2, a1, 512
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v8, (a2)
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: li a3, 48
; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: li a4, 48
; CHECK-NEXT: mul a2, a2, a4
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
Expand All @@ -262,39 +263,38 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: vle64.v v16, (a2)
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: addi a2, a7, -64
; CHECK-NEXT: sltu a3, a7, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a4, a3, a2
; CHECK-NEXT: sltu a4, a7, a2
; CHECK-NEXT: addi a4, a4, -1
; CHECK-NEXT: and a4, a4, a2
; CHECK-NEXT: addi a2, a4, -32
; CHECK-NEXT: sltu a3, a4, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a3, a3, a2
; CHECK-NEXT: addi a2, a3, -16
; CHECK-NEXT: sltu a5, a3, a2
; CHECK-NEXT: sltu a5, a4, a2
; CHECK-NEXT: addi a5, a5, -1
; CHECK-NEXT: and a2, a5, a2
; CHECK-NEXT: and a5, a5, a2
; CHECK-NEXT: addi a2, a5, -16
; CHECK-NEXT: sltu a6, a5, a2
; CHECK-NEXT: addi a6, a6, -1
; CHECK-NEXT: and a2, a6, a2
; CHECK-NEXT: vslidedown.vi v0, v27, 2
; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: li a5, 24
; CHECK-NEXT: mul a2, a2, a5
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: addi a5, a1, 128
; CHECK-NEXT: bltu a3, a2, .LBB16_2
; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t
; CHECK-NEXT: csrr a6, vlenb
; CHECK-NEXT: li t0, 24
; CHECK-NEXT: mul a6, a6, t0
; CHECK-NEXT: add a6, sp, a6
; CHECK-NEXT: addi a6, a6, 16
; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a5, a2, .LBB16_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 16
; CHECK-NEXT: li a5, 16
; CHECK-NEXT: .LBB16_2:
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v28, v26, 2
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v8, (a5)
; CHECK-NEXT: addi a5, sp, 16
; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma
; CHECK-NEXT: vle64.v v8, (a3)
; CHECK-NEXT: addi a3, sp, 16
; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma
; CHECK-NEXT: li a3, 64
; CHECK-NEXT: vmv1r.v v0, v27
; CHECK-NEXT: csrr a5, vlenb
Expand Down
35 changes: 14 additions & 21 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,9 @@ define <8 x i1> @isnan_v8f32(<8 x float> %x, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: isnan_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; CHECK-NEXT: vfclass.v v10, v8, v0.t
; CHECK-NEXT: vfclass.v v8, v8, v0.t
; CHECK-NEXT: li a0, 512
; CHECK-NEXT: vmseq.vx v8, v10, a0, v0.t
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t
; CHECK-NEXT: ret
%1 = call <8 x i1> @llvm.vp.is.fpclass.v8f32(<8 x float> %x, i32 2, <8 x i1> %m, i32 %evl)
ret <8 x i1> %1
Expand All @@ -111,10 +110,9 @@ define <16 x i1> @isnan_v16f32(<16 x float> %x, <16 x i1> %m, i32 zeroext %evl)
; CHECK-LABEL: isnan_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vfclass.v v12, v8, v0.t
; CHECK-NEXT: vfclass.v v8, v8, v0.t
; CHECK-NEXT: li a0, 256
; CHECK-NEXT: vmseq.vx v8, v12, a0, v0.t
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t
; CHECK-NEXT: ret
%1 = call <16 x i1> @llvm.vp.is.fpclass.v16f32(<16 x float> %x, i32 1, <16 x i1> %m, i32 %evl)
ret <16 x i1> %1
Expand Down Expand Up @@ -162,10 +160,9 @@ define <4 x i1> @isposinf_v4f64(<4 x double> %x, <4 x i1> %m, i32 zeroext %evl)
; CHECK-LABEL: isposinf_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfclass.v v10, v8, v0.t
; CHECK-NEXT: vfclass.v v8, v8, v0.t
; CHECK-NEXT: li a0, 128
; CHECK-NEXT: vmseq.vx v8, v10, a0, v0.t
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t
; CHECK-NEXT: ret
%1 = call <4 x i1> @llvm.vp.is.fpclass.v4f64(<4 x double> %x, i32 512, <4 x i1> %m, i32 %evl) ; 0x200 = "+inf"
ret <4 x i1> %1
Expand All @@ -187,9 +184,8 @@ define <8 x i1> @isneginf_v8f64(<8 x double> %x, <8 x i1> %m, i32 zeroext %evl)
; CHECK-LABEL: isneginf_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfclass.v v12, v8, v0.t
; CHECK-NEXT: vmseq.vi v8, v12, 1, v0.t
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vfclass.v v8, v8, v0.t
; CHECK-NEXT: vmseq.vi v0, v8, 1, v0.t
; CHECK-NEXT: ret
%1 = call <8 x i1> @llvm.vp.is.fpclass.v8f64(<8 x double> %x, i32 4, <8 x i1> %m, i32 %evl) ; "-inf"
ret <8 x i1> %1
Expand All @@ -212,9 +208,8 @@ define <16 x i1> @isfinite_v16f64(<16 x double> %x, <16 x i1> %m, i32 zeroext %e
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfclass.v v8, v8, v0.t
; CHECK-NEXT: li a0, 126
; CHECK-NEXT: vand.vx v16, v8, a0, v0.t
; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t
; CHECK-NEXT: ret
%1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 504, <16 x i1> %m, i32 %evl) ; 0x1f8 = "finite"
ret <16 x i1> %1
Expand All @@ -239,9 +234,8 @@ define <16 x i1> @isposfinite_v16f64(<16 x double> %x, <16 x i1> %m, i32 zeroext
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfclass.v v8, v8, v0.t
; CHECK-NEXT: li a0, 112
; CHECK-NEXT: vand.vx v16, v8, a0, v0.t
; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t
; CHECK-NEXT: ret
%1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 448, <16 x i1> %m, i32 %evl) ; 0x1c0 = "+finite"
ret <16 x i1> %1
Expand All @@ -265,9 +259,8 @@ define <16 x i1> @isnotfinite_v16f64(<16 x double> %x, <16 x i1> %m, i32 zeroext
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfclass.v v8, v8, v0.t
; CHECK-NEXT: li a0, 897
; CHECK-NEXT: vand.vx v16, v8, a0, v0.t
; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t
; CHECK-NEXT: ret
%1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 519, <16 x i1> %m, i32 %evl) ; 0x207 = "inf|nan"
ret <16 x i1> %1
Expand Down
2,268 changes: 1,008 additions & 1,260 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll

Large diffs are not rendered by default.

972 changes: 486 additions & 486 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ define <4 x i1> @vpmerge_vv_v4i1(<4 x i1> %va, <4 x i1> %vb, <4 x i1> %m, i32 ze
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vid.v v10
; RV64-NEXT: vmsltu.vx v12, v10, a0
; RV64-NEXT: vmand.mm v9, v9, v12
; RV64-NEXT: vmsltu.vx v2, v10, a0
; RV64-NEXT: vmand.mm v9, v9, v2
; RV64-NEXT: vmandn.mm v8, v8, v9
; RV64-NEXT: vmand.mm v9, v0, v9
; RV64-NEXT: vmor.mm v0, v9, v8
Expand All @@ -48,8 +48,8 @@ define <4 x i1> @vpmerge_vv_v4i1(<4 x i1> %va, <4 x i1> %vb, <4 x i1> %m, i32 ze
; RV64ZVFHMIN: # %bb.0:
; RV64ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64ZVFHMIN-NEXT: vid.v v10
; RV64ZVFHMIN-NEXT: vmsltu.vx v12, v10, a0
; RV64ZVFHMIN-NEXT: vmand.mm v9, v9, v12
; RV64ZVFHMIN-NEXT: vmsltu.vx v2, v10, a0
; RV64ZVFHMIN-NEXT: vmand.mm v9, v9, v2
; RV64ZVFHMIN-NEXT: vmandn.mm v8, v8, v9
; RV64ZVFHMIN-NEXT: vmand.mm v9, v0, v9
; RV64ZVFHMIN-NEXT: vmor.mm v0, v9, v8
Expand Down
20 changes: 17 additions & 3 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -456,15 +456,29 @@ define <32 x i64> @select_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32
define <32 x i64> @select_evl_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c) {
; CHECK-LABEL: select_evl_v32i64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: vle64.v v16, (a0)
; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma
; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%v = call <32 x i64> @llvm.vp.select.v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 17)
ret <32 x i64> %v
Expand Down
126 changes: 52 additions & 74 deletions llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -135,21 +135,19 @@ declare <vscale x 8 x half> @llvm.vp.floor.nxv8f16(<vscale x 8 x half>, <vscale
define <vscale x 8 x half> @vp_floor_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI6_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.floor.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
Expand Down Expand Up @@ -179,21 +177,19 @@ declare <vscale x 16 x half> @llvm.vp.floor.nxv16f16(<vscale x 16 x half>, <vsca
define <vscale x 16 x half> @vp_floor_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI8_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI8_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.floor.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
Expand Down Expand Up @@ -223,21 +219,19 @@ declare <vscale x 32 x half> @llvm.vp.floor.nxv32f16(<vscale x 32 x half>, <vsca
define <vscale x 32 x half> @vp_floor_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv32f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI10_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI10_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.floor.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
Expand Down Expand Up @@ -351,21 +345,19 @@ declare <vscale x 4 x float> @llvm.vp.floor.nxv4f32(<vscale x 4 x float>, <vscal
define <vscale x 4 x float> @vp_floor_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8, v0.t
; CHECK-NEXT: lui a0, 307200
; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x float> @llvm.vp.floor.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x float> %v
Expand Down Expand Up @@ -395,21 +387,19 @@ declare <vscale x 8 x float> @llvm.vp.floor.nxv8f32(<vscale x 8 x float>, <vscal
define <vscale x 8 x float> @vp_floor_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: lui a0, 307200
; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x float> @llvm.vp.floor.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
Expand Down Expand Up @@ -439,21 +429,19 @@ declare <vscale x 16 x float> @llvm.vp.floor.nxv16f32(<vscale x 16 x float>, <vs
define <vscale x 16 x float> @vp_floor_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: lui a0, 307200
; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 16 x float> @llvm.vp.floor.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x float> %v
Expand Down Expand Up @@ -525,21 +513,19 @@ declare <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double>, <vsc
define <vscale x 2 x double> @vp_floor_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x double> %v
Expand Down Expand Up @@ -569,21 +555,19 @@ declare <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double>, <vsc
define <vscale x 4 x double> @vp_floor_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI26_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x double> %v
Expand Down Expand Up @@ -613,21 +597,19 @@ declare <vscale x 7 x double> @llvm.vp.floor.nxv7f64(<vscale x 7 x double>, <vsc
define <vscale x 7 x double> @vp_floor_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv7f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI28_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 7 x double> @llvm.vp.floor.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
ret <vscale x 7 x double> %v
Expand Down Expand Up @@ -657,21 +639,19 @@ declare <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double>, <vsc
define <vscale x 8 x double> @vp_floor_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI30_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
Expand Down Expand Up @@ -714,21 +694,19 @@ define <vscale x 16 x double> @vp_floor_nxv16f64(<vscale x 16 x double> %va, <vs
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a2, a1, 3
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vx v25, v0, a2
; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: sub a2, a0, a1
; CHECK-NEXT: sltu a3, a0, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3)
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfabs.v v8, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t
; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a2, 2
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
; CHECK-NEXT: fsrm a2
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
Expand Down
Loading