From 335f2ffcdfc24f37c3abe54873a55faa3c393cdf Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Wed, 17 Sep 2025 13:36:22 +0100 Subject: [PATCH] [RISCV] Set riscv-fpimm-cost threshold to 3 by default `-riscv-fp-imm-cost` controls the threshold at which the constant pool is used for float constants rather than generating directly (typically into a GPR followed by an `fmv`). The value used for this knob indicates the number of instructions that can be used to produce the value (otherwise we fall back to the constant pool). Upping to to 3 covers a huge number of additional constants (see ), e.g. most whole numbers which can be generated through lui+shift+fmv. As in general we struggle with efficient code generation for constant pool accesses, reducing the number of constant pool accesses is beneficial. We are typically replacing a two-instruction sequence (which includes a load) with a three instruction sequence (two simple arithmetic operations plus a fmv), which. The CHECK prefixes for various tests had to be updated to avoid conflicts leading to check lines being dropped altogether (see for a change to update_llc_test_checks to aid diagnosing this). --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +- llvm/test/CodeGen/RISCV/bfloat-convert.ll | 219 +-- llvm/test/CodeGen/RISCV/bfloat-imm.ll | 5 +- llvm/test/CodeGen/RISCV/calling-conv-half.ll | 284 ++-- llvm/test/CodeGen/RISCV/codemodel-lowering.ll | 106 +- llvm/test/CodeGen/RISCV/double-convert.ll | 62 +- llvm/test/CodeGen/RISCV/double-imm.ll | 5 +- llvm/test/CodeGen/RISCV/double-intrinsics.ll | 30 +- llvm/test/CodeGen/RISCV/double-round-conv.ll | 25 +- llvm/test/CodeGen/RISCV/double-zfa.ll | 51 +- llvm/test/CodeGen/RISCV/float-convert.ll | 62 +- llvm/test/CodeGen/RISCV/float-imm.ll | 5 +- .../CodeGen/RISCV/float-round-conv-sat.ll | 60 +- llvm/test/CodeGen/RISCV/half-arith.ll | 47 +- llvm/test/CodeGen/RISCV/half-convert.ll | 740 +++++----- llvm/test/CodeGen/RISCV/half-imm.ll | 15 +- llvm/test/CodeGen/RISCV/half-intrinsics.ll | 30 +- .../test/CodeGen/RISCV/half-round-conv-sat.ll | 180 ++- llvm/test/CodeGen/RISCV/half-round-conv.ll | 75 +- llvm/test/CodeGen/RISCV/half-select-fcmp.ll | 32 +- llvm/test/CodeGen/RISCV/half-zfa-fli.ll | 65 +- llvm/test/CodeGen/RISCV/half-zfa.ll | 15 +- .../CodeGen/RISCV/repeated-fp-divisors.ll | 5 +- .../test/CodeGen/RISCV/rv64-double-convert.ll | 21 +- llvm/test/CodeGen/RISCV/rv64-float-convert.ll | 12 +- llvm/test/CodeGen/RISCV/rv64-half-convert.ll | 37 +- llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll | 796 ++++++---- .../CodeGen/RISCV/rvv/double-round-conv.ll | 120 +- .../RISCV/rvv/fceil-constrained-sdnode.ll | 246 ++-- llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll | 338 ++++- .../RISCV/rvv/ffloor-constrained-sdnode.ll | 246 ++-- llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll | 338 ++++- .../RISCV/rvv/fixed-vectors-ceil-vp.ll | 1298 +++++++++++++---- .../fixed-vectors-fceil-constrained-sdnode.ll | 246 ++-- ...fixed-vectors-ffloor-constrained-sdnode.ll | 246 ++-- .../RISCV/rvv/fixed-vectors-floor-vp.ll | 1298 +++++++++++++---- ...d-vectors-fnearbyint-constrained-sdnode.ll | 188 ++- .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 548 +++++-- ...fixed-vectors-fround-constrained-sdnode.ll | 246 ++-- .../CodeGen/RISCV/rvv/fixed-vectors-fround.ll | 338 ++++- ...d-vectors-froundeven-constrained-sdnode.ll | 246 ++-- .../RISCV/rvv/fixed-vectors-froundeven.ll | 338 ++++- ...fixed-vectors-ftrunc-constrained-sdnode.ll | 222 ++- .../RISCV/rvv/fixed-vectors-nearbyint-vp.ll | 782 ++++++---- .../RISCV/rvv/fixed-vectors-reduction-fp.ll | 1000 ++++++++----- .../RISCV/rvv/fixed-vectors-rint-vp.ll | 698 ++++++--- .../RISCV/rvv/fixed-vectors-round-vp.ll | 1298 +++++++++++++---- .../RISCV/rvv/fixed-vectors-roundeven-vp.ll | 1298 +++++++++++++---- .../RISCV/rvv/fixed-vectors-roundtozero-vp.ll | 1298 +++++++++++++---- .../RISCV/rvv/fixed-vectors-shuffle-fp.ll | 181 ++- llvm/test/CodeGen/RISCV/rvv/floor-vp.ll | 1298 +++++++++++++---- .../rvv/fnearbyint-constrained-sdnode.ll | 246 ++-- .../CodeGen/RISCV/rvv/fnearbyint-sdnode.ll | 338 ++++- llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll | 298 +++- .../RISCV/rvv/fround-constrained-sdnode.ll | 246 ++-- llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll | 338 ++++- .../rvv/froundeven-constrained-sdnode.ll | 246 ++-- .../CodeGen/RISCV/rvv/froundeven-sdnode.ll | 338 ++++- .../RISCV/rvv/ftrunc-constrained-sdnode.ll | 222 ++- llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll | 298 +++- .../test/CodeGen/RISCV/rvv/half-round-conv.ll | 60 +- llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll | 1298 +++++++++++++---- llvm/test/CodeGen/RISCV/rvv/rint-vp.ll | 1158 ++++++++++++--- llvm/test/CodeGen/RISCV/rvv/round-vp.ll | 1298 +++++++++++++---- llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll | 1298 +++++++++++++---- llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll | 1298 +++++++++++++---- .../test/CodeGen/RISCV/rvv/vfma-vp-combine.ll | 77 +- .../RISCV/rvv/vreductions-fp-sdnode-f16.ll | 10 +- .../RISCV/rvv/vreductions-fp-sdnode.ll | 8 +- .../RISCV/rvv/vreductions-fp-vp-f16.ll | 10 +- .../RISCV/rvv/vsetvli-insert-crossbb.ll | 24 +- llvm/test/CodeGen/RISCV/srodata.ll | 15 - 72 files changed, 19694 insertions(+), 6771 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 10b3f0b213811..9de57a2879d5b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -79,7 +79,7 @@ static cl::opt FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), - cl::init(2)); + cl::init(3)); static cl::opt ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden, diff --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll index 6207a17734d62..73ff888e44b3b 100644 --- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll +++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll @@ -51,13 +51,14 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind { ; CHECK32ZFBFMIN-LABEL: fcvt_si_bf16_sat: ; CHECK32ZFBFMIN: # %bb.0: # %start ; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK32ZFBFMIN-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK32ZFBFMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK32ZFBFMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; CHECK32ZFBFMIN-NEXT: lui a0, 815104 -; CHECK32ZFBFMIN-NEXT: fmv.w.x fa3, a0 -; CHECK32ZFBFMIN-NEXT: fmax.s fa5, fa5, fa3 -; CHECK32ZFBFMIN-NEXT: neg a0, a1 +; CHECK32ZFBFMIN-NEXT: lui a1, 290816 +; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, a0 +; CHECK32ZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK32ZFBFMIN-NEXT: addi a1, a1, -512 +; CHECK32ZFBFMIN-NEXT: neg a0, a0 +; CHECK32ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, a1 ; CHECK32ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK32ZFBFMIN-NEXT: fcvt.w.s a1, fa5, rtz ; CHECK32ZFBFMIN-NEXT: and a0, a0, a1 @@ -68,12 +69,13 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind { ; RV32ID-NEXT: fmv.x.w a0, fa0 ; RV32ID-NEXT: lui a1, 815104 ; RV32ID-NEXT: fmv.w.x fa5, a1 -; RV32ID-NEXT: lui a1, %hi(.LCPI1_0) +; RV32ID-NEXT: lui a1, 290816 ; RV32ID-NEXT: slli a0, a0, 16 -; RV32ID-NEXT: flw fa4, %lo(.LCPI1_0)(a1) -; RV32ID-NEXT: fmv.w.x fa3, a0 -; RV32ID-NEXT: feq.s a0, fa3, fa3 -; RV32ID-NEXT: fmax.s fa5, fa3, fa5 +; RV32ID-NEXT: addi a1, a1, -512 +; RV32ID-NEXT: fmv.w.x fa4, a0 +; RV32ID-NEXT: feq.s a0, fa4, fa4 +; RV32ID-NEXT: fmax.s fa5, fa4, fa5 +; RV32ID-NEXT: fmv.w.x fa4, a1 ; RV32ID-NEXT: neg a0, a0 ; RV32ID-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-NEXT: fcvt.w.s a1, fa5, rtz @@ -83,13 +85,14 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind { ; CHECK64ZFBFMIN-LABEL: fcvt_si_bf16_sat: ; CHECK64ZFBFMIN: # %bb.0: # %start ; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK64ZFBFMIN-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK64ZFBFMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK64ZFBFMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; CHECK64ZFBFMIN-NEXT: lui a0, 815104 -; CHECK64ZFBFMIN-NEXT: fmv.w.x fa3, a0 -; CHECK64ZFBFMIN-NEXT: fmax.s fa5, fa5, fa3 -; CHECK64ZFBFMIN-NEXT: neg a0, a1 +; CHECK64ZFBFMIN-NEXT: lui a1, 290816 +; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, a0 +; CHECK64ZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK64ZFBFMIN-NEXT: addi a1, a1, -512 +; CHECK64ZFBFMIN-NEXT: neg a0, a0 +; CHECK64ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, a1 ; CHECK64ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK64ZFBFMIN-NEXT: fcvt.l.s a1, fa5, rtz ; CHECK64ZFBFMIN-NEXT: and a0, a0, a1 @@ -100,12 +103,13 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind { ; RV64ID-NEXT: fmv.x.w a0, fa0 ; RV64ID-NEXT: lui a1, 815104 ; RV64ID-NEXT: fmv.w.x fa5, a1 -; RV64ID-NEXT: lui a1, %hi(.LCPI1_0) +; RV64ID-NEXT: lui a1, 290816 ; RV64ID-NEXT: slli a0, a0, 16 -; RV64ID-NEXT: flw fa4, %lo(.LCPI1_0)(a1) -; RV64ID-NEXT: fmv.w.x fa3, a0 -; RV64ID-NEXT: feq.s a0, fa3, fa3 -; RV64ID-NEXT: fmax.s fa5, fa3, fa5 +; RV64ID-NEXT: addi a1, a1, -512 +; RV64ID-NEXT: fmv.w.x fa4, a0 +; RV64ID-NEXT: feq.s a0, fa4, fa4 +; RV64ID-NEXT: fmax.s fa5, fa4, fa5 +; RV64ID-NEXT: fmv.w.x fa4, a1 ; RV64ID-NEXT: neg a0, a0 ; RV64ID-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-NEXT: fcvt.l.s a1, fa5, rtz @@ -152,49 +156,53 @@ define i16 @fcvt_ui_bf16(bfloat %a) nounwind { define i16 @fcvt_ui_bf16_sat(bfloat %a) nounwind { ; CHECK32ZFBFMIN-LABEL: fcvt_ui_bf16_sat: ; CHECK32ZFBFMIN: # %bb.0: # %start -; CHECK32ZFBFMIN-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK32ZFBFMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 -; CHECK32ZFBFMIN-NEXT: fmv.w.x fa3, zero -; CHECK32ZFBFMIN-NEXT: fmax.s fa4, fa4, fa3 -; CHECK32ZFBFMIN-NEXT: fmin.s fa5, fa4, fa5 +; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, zero +; CHECK32ZFBFMIN-NEXT: lui a0, 292864 +; CHECK32ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK32ZFBFMIN-NEXT: addi a0, a0, -256 +; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, a0 +; CHECK32ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK32ZFBFMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; CHECK32ZFBFMIN-NEXT: ret ; ; RV32ID-LABEL: fcvt_ui_bf16_sat: ; RV32ID: # %bb.0: # %start -; RV32ID-NEXT: lui a0, %hi(.LCPI3_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI3_0)(a0) ; RV32ID-NEXT: fmv.x.w a0, fa0 +; RV32ID-NEXT: fmv.w.x fa5, zero ; RV32ID-NEXT: slli a0, a0, 16 ; RV32ID-NEXT: fmv.w.x fa4, a0 -; RV32ID-NEXT: fmv.w.x fa3, zero -; RV32ID-NEXT: fmax.s fa4, fa4, fa3 -; RV32ID-NEXT: fmin.s fa5, fa4, fa5 +; RV32ID-NEXT: lui a0, 292864 +; RV32ID-NEXT: addi a0, a0, -256 +; RV32ID-NEXT: fmax.s fa5, fa4, fa5 +; RV32ID-NEXT: fmv.w.x fa4, a0 +; RV32ID-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32ID-NEXT: ret ; ; CHECK64ZFBFMIN-LABEL: fcvt_ui_bf16_sat: ; CHECK64ZFBFMIN: # %bb.0: # %start -; CHECK64ZFBFMIN-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK64ZFBFMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 -; CHECK64ZFBFMIN-NEXT: fmv.w.x fa3, zero -; CHECK64ZFBFMIN-NEXT: fmax.s fa4, fa4, fa3 -; CHECK64ZFBFMIN-NEXT: fmin.s fa5, fa4, fa5 +; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, zero +; CHECK64ZFBFMIN-NEXT: lui a0, 292864 +; CHECK64ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK64ZFBFMIN-NEXT: addi a0, a0, -256 +; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, a0 +; CHECK64ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK64ZFBFMIN-NEXT: fcvt.lu.s a0, fa5, rtz ; CHECK64ZFBFMIN-NEXT: ret ; ; RV64ID-LABEL: fcvt_ui_bf16_sat: ; RV64ID: # %bb.0: # %start -; RV64ID-NEXT: lui a0, %hi(.LCPI3_0) -; RV64ID-NEXT: flw fa5, %lo(.LCPI3_0)(a0) ; RV64ID-NEXT: fmv.x.w a0, fa0 +; RV64ID-NEXT: fmv.w.x fa5, zero ; RV64ID-NEXT: slli a0, a0, 16 ; RV64ID-NEXT: fmv.w.x fa4, a0 -; RV64ID-NEXT: fmv.w.x fa3, zero -; RV64ID-NEXT: fmax.s fa4, fa4, fa3 -; RV64ID-NEXT: fmin.s fa5, fa4, fa5 +; RV64ID-NEXT: lui a0, 292864 +; RV64ID-NEXT: addi a0, a0, -256 +; RV64ID-NEXT: fmax.s fa5, fa4, fa5 +; RV64ID-NEXT: fmv.w.x fa4, a0 +; RV64ID-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64ID-NEXT: ret start: @@ -472,20 +480,21 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind { ; RV32IZFBFMIN-NEXT: # %bb.1: # %start ; RV32IZFBFMIN-NEXT: mv a2, a1 ; RV32IZFBFMIN-NEXT: .LBB10_2: # %start -; RV32IZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) +; RV32IZFBFMIN-NEXT: lui a1, 389120 +; RV32IZFBFMIN-NEXT: addi a1, a1, -1 +; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a1 ; RV32IZFBFMIN-NEXT: flt.s a1, fa5, fs0 ; RV32IZFBFMIN-NEXT: beqz a1, .LBB10_4 ; RV32IZFBFMIN-NEXT: # %bb.3: ; RV32IZFBFMIN-NEXT: addi a2, a3, -1 ; RV32IZFBFMIN-NEXT: .LBB10_4: # %start ; RV32IZFBFMIN-NEXT: feq.s a3, fs0, fs0 -; RV32IZFBFMIN-NEXT: neg a4, a1 -; RV32IZFBFMIN-NEXT: neg a1, s0 +; RV32IZFBFMIN-NEXT: neg a4, s0 +; RV32IZFBFMIN-NEXT: neg a5, a1 ; RV32IZFBFMIN-NEXT: neg a3, a3 -; RV32IZFBFMIN-NEXT: and a0, a1, a0 +; RV32IZFBFMIN-NEXT: and a0, a4, a0 ; RV32IZFBFMIN-NEXT: and a1, a3, a2 -; RV32IZFBFMIN-NEXT: or a0, a4, a0 +; RV32IZFBFMIN-NEXT: or a0, a5, a0 ; RV32IZFBFMIN-NEXT: and a0, a3, a0 ; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -511,20 +520,21 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind { ; R32IDZFBFMIN-NEXT: # %bb.1: # %start ; R32IDZFBFMIN-NEXT: mv a2, a1 ; R32IDZFBFMIN-NEXT: .LBB10_2: # %start -; R32IDZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0) -; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) +; R32IDZFBFMIN-NEXT: lui a1, 389120 +; R32IDZFBFMIN-NEXT: addi a1, a1, -1 +; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a1 ; R32IDZFBFMIN-NEXT: flt.s a1, fa5, fs0 ; R32IDZFBFMIN-NEXT: beqz a1, .LBB10_4 ; R32IDZFBFMIN-NEXT: # %bb.3: ; R32IDZFBFMIN-NEXT: addi a2, a3, -1 ; R32IDZFBFMIN-NEXT: .LBB10_4: # %start ; R32IDZFBFMIN-NEXT: feq.s a3, fs0, fs0 -; R32IDZFBFMIN-NEXT: neg a4, a1 -; R32IDZFBFMIN-NEXT: neg a1, s0 +; R32IDZFBFMIN-NEXT: neg a4, s0 +; R32IDZFBFMIN-NEXT: neg a5, a1 ; R32IDZFBFMIN-NEXT: neg a3, a3 -; R32IDZFBFMIN-NEXT: and a0, a1, a0 +; R32IDZFBFMIN-NEXT: and a0, a4, a0 ; R32IDZFBFMIN-NEXT: and a1, a3, a2 -; R32IDZFBFMIN-NEXT: or a0, a4, a0 +; R32IDZFBFMIN-NEXT: or a0, a5, a0 ; R32IDZFBFMIN-NEXT: and a0, a3, a0 ; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -552,8 +562,9 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind { ; RV32ID-NEXT: # %bb.1: # %start ; RV32ID-NEXT: mv a2, a1 ; RV32ID-NEXT: .LBB10_2: # %start -; RV32ID-NEXT: lui a1, %hi(.LCPI10_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1) +; RV32ID-NEXT: lui a1, 389120 +; RV32ID-NEXT: addi a1, a1, -1 +; RV32ID-NEXT: fmv.w.x fa5, a1 ; RV32ID-NEXT: flt.s a1, fa5, fs0 ; RV32ID-NEXT: beqz a1, .LBB10_4 ; RV32ID-NEXT: # %bb.3: @@ -641,30 +652,59 @@ define i64 @fcvt_lu_bf16(bfloat %a) nounwind { } define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind { -; CHECK32ZFBFMIN-LABEL: fcvt_lu_bf16_sat: -; CHECK32ZFBFMIN: # %bb.0: # %start -; CHECK32ZFBFMIN-NEXT: addi sp, sp, -16 -; CHECK32ZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; CHECK32ZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; CHECK32ZFBFMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; CHECK32ZFBFMIN-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK32ZFBFMIN-NEXT: flw fa5, %lo(.LCPI12_0)(a0) -; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa0, fa0 -; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, zero -; CHECK32ZFBFMIN-NEXT: fle.s a0, fa4, fa0 -; CHECK32ZFBFMIN-NEXT: flt.s a1, fa5, fa0 -; CHECK32ZFBFMIN-NEXT: neg s0, a1 -; CHECK32ZFBFMIN-NEXT: neg s1, a0 -; CHECK32ZFBFMIN-NEXT: call __fixunssfdi -; CHECK32ZFBFMIN-NEXT: and a0, s1, a0 -; CHECK32ZFBFMIN-NEXT: and a1, s1, a1 -; CHECK32ZFBFMIN-NEXT: or a0, s0, a0 -; CHECK32ZFBFMIN-NEXT: or a1, s0, a1 -; CHECK32ZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; CHECK32ZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; CHECK32ZFBFMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; CHECK32ZFBFMIN-NEXT: addi sp, sp, 16 -; CHECK32ZFBFMIN-NEXT: ret +; RV32IZFBFMIN-LABEL: fcvt_lu_bf16_sat: +; RV32IZFBFMIN: # %bb.0: # %start +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0 +; RV32IZFBFMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFBFMIN-NEXT: fle.s a0, fa5, fs0 +; RV32IZFBFMIN-NEXT: neg s0, a0 +; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0 +; RV32IZFBFMIN-NEXT: call __fixunssfdi +; RV32IZFBFMIN-NEXT: and a0, s0, a0 +; RV32IZFBFMIN-NEXT: lui a2, 391168 +; RV32IZFBFMIN-NEXT: and a1, s0, a1 +; RV32IZFBFMIN-NEXT: addi a2, a2, -1 +; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a2 +; RV32IZFBFMIN-NEXT: flt.s a2, fa5, fs0 +; RV32IZFBFMIN-NEXT: neg a2, a2 +; RV32IZFBFMIN-NEXT: or a0, a2, a0 +; RV32IZFBFMIN-NEXT: or a1, a2, a1 +; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: addi sp, sp, 16 +; RV32IZFBFMIN-NEXT: ret +; +; R32IDZFBFMIN-LABEL: fcvt_lu_bf16_sat: +; R32IDZFBFMIN: # %bb.0: # %start +; R32IDZFBFMIN-NEXT: addi sp, sp, -16 +; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0 +; R32IDZFBFMIN-NEXT: fmv.w.x fa5, zero +; R32IDZFBFMIN-NEXT: fle.s a0, fa5, fs0 +; R32IDZFBFMIN-NEXT: neg s0, a0 +; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0 +; R32IDZFBFMIN-NEXT: call __fixunssfdi +; R32IDZFBFMIN-NEXT: and a0, s0, a0 +; R32IDZFBFMIN-NEXT: lui a2, 391168 +; R32IDZFBFMIN-NEXT: and a1, s0, a1 +; R32IDZFBFMIN-NEXT: addi a2, a2, -1 +; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a2 +; R32IDZFBFMIN-NEXT: flt.s a2, fa5, fs0 +; R32IDZFBFMIN-NEXT: neg a2, a2 +; R32IDZFBFMIN-NEXT: or a0, a2, a0 +; R32IDZFBFMIN-NEXT: or a1, a2, a1 +; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; R32IDZFBFMIN-NEXT: addi sp, sp, 16 +; R32IDZFBFMIN-NEXT: ret ; ; RV32ID-LABEL: fcvt_lu_bf16_sat: ; RV32ID: # %bb.0: # %start @@ -673,15 +713,16 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind { ; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32ID-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32ID-NEXT: fmv.x.w a0, fa0 -; RV32ID-NEXT: lui a1, %hi(.LCPI12_0) -; RV32ID-NEXT: fmv.w.x fa5, zero -; RV32ID-NEXT: flw fa4, %lo(.LCPI12_0)(a1) +; RV32ID-NEXT: lui a1, 391168 ; RV32ID-NEXT: slli a0, a0, 16 +; RV32ID-NEXT: addi a1, a1, -1 ; RV32ID-NEXT: fmv.w.x fa0, a0 -; RV32ID-NEXT: fle.s a0, fa5, fa0 -; RV32ID-NEXT: flt.s a1, fa4, fa0 -; RV32ID-NEXT: neg s0, a1 -; RV32ID-NEXT: neg s1, a0 +; RV32ID-NEXT: fmv.w.x fa5, a1 +; RV32ID-NEXT: flt.s a0, fa5, fa0 +; RV32ID-NEXT: fmv.w.x fa5, zero +; RV32ID-NEXT: fle.s a1, fa5, fa0 +; RV32ID-NEXT: neg s0, a0 +; RV32ID-NEXT: neg s1, a1 ; RV32ID-NEXT: call __fixunssfdi ; RV32ID-NEXT: and a0, s1, a0 ; RV32ID-NEXT: and a1, s1, a1 diff --git a/llvm/test/CodeGen/RISCV/bfloat-imm.ll b/llvm/test/CodeGen/RISCV/bfloat-imm.ll index 76ff720b1c268..61014891414d8 100644 --- a/llvm/test/CodeGen/RISCV/bfloat-imm.ll +++ b/llvm/test/CodeGen/RISCV/bfloat-imm.ll @@ -7,8 +7,9 @@ define bfloat @bfloat_imm() nounwind { ; CHECK-LABEL: bfloat_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa0, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: lui a0, 4 +; CHECK-NEXT: addi a0, a0, 64 +; CHECK-NEXT: fmv.h.x fa0, a0 ; CHECK-NEXT: ret ret bfloat 3.0 } diff --git a/llvm/test/CodeGen/RISCV/calling-conv-half.ll b/llvm/test/CodeGen/RISCV/calling-conv-half.ll index d7957540d1b29..d8e6b7f3ede9a 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-half.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-half.ll @@ -519,15 +519,16 @@ define i32 @caller_half_on_stack() nounwind { ; RV32-ILP32F: # %bb.0: ; RV32-ILP32F-NEXT: addi sp, sp, -16 ; RV32-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ILP32F-NEXT: lui a4, %hi(.LCPI3_0) +; RV32-ILP32F-NEXT: lui a7, 1048565 ; RV32-ILP32F-NEXT: li a0, 1 ; RV32-ILP32F-NEXT: li a1, 2 ; RV32-ILP32F-NEXT: li a2, 3 ; RV32-ILP32F-NEXT: li a3, 4 -; RV32-ILP32F-NEXT: flw fa0, %lo(.LCPI3_0)(a4) ; RV32-ILP32F-NEXT: li a4, 5 ; RV32-ILP32F-NEXT: li a5, 6 ; RV32-ILP32F-NEXT: li a6, 7 +; RV32-ILP32F-NEXT: addi a7, a7, -1792 +; RV32-ILP32F-NEXT: fmv.w.x fa0, a7 ; RV32-ILP32F-NEXT: li a7, 8 ; RV32-ILP32F-NEXT: call callee_half_on_stack ; RV32-ILP32F-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -538,15 +539,16 @@ define i32 @caller_half_on_stack() nounwind { ; RV64-LP64F: # %bb.0: ; RV64-LP64F-NEXT: addi sp, sp, -16 ; RV64-LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-LP64F-NEXT: lui a4, %hi(.LCPI3_0) +; RV64-LP64F-NEXT: lui a7, 1048565 ; RV64-LP64F-NEXT: li a0, 1 ; RV64-LP64F-NEXT: li a1, 2 ; RV64-LP64F-NEXT: li a2, 3 ; RV64-LP64F-NEXT: li a3, 4 -; RV64-LP64F-NEXT: flw fa0, %lo(.LCPI3_0)(a4) ; RV64-LP64F-NEXT: li a4, 5 ; RV64-LP64F-NEXT: li a5, 6 ; RV64-LP64F-NEXT: li a6, 7 +; RV64-LP64F-NEXT: addi a7, a7, -1792 +; RV64-LP64F-NEXT: fmv.w.x fa0, a7 ; RV64-LP64F-NEXT: li a7, 8 ; RV64-LP64F-NEXT: call callee_half_on_stack ; RV64-LP64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -557,15 +559,16 @@ define i32 @caller_half_on_stack() nounwind { ; RV32-ILP32ZFHMIN: # %bb.0: ; RV32-ILP32ZFHMIN-NEXT: addi sp, sp, -16 ; RV32-ILP32ZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ILP32ZFHMIN-NEXT: lui a4, %hi(.LCPI3_0) +; RV32-ILP32ZFHMIN-NEXT: lui a7, 5 ; RV32-ILP32ZFHMIN-NEXT: li a0, 1 ; RV32-ILP32ZFHMIN-NEXT: li a1, 2 ; RV32-ILP32ZFHMIN-NEXT: li a2, 3 ; RV32-ILP32ZFHMIN-NEXT: li a3, 4 -; RV32-ILP32ZFHMIN-NEXT: flh fa0, %lo(.LCPI3_0)(a4) ; RV32-ILP32ZFHMIN-NEXT: li a4, 5 ; RV32-ILP32ZFHMIN-NEXT: li a5, 6 ; RV32-ILP32ZFHMIN-NEXT: li a6, 7 +; RV32-ILP32ZFHMIN-NEXT: addi a7, a7, -1792 +; RV32-ILP32ZFHMIN-NEXT: fmv.h.x fa0, a7 ; RV32-ILP32ZFHMIN-NEXT: li a7, 8 ; RV32-ILP32ZFHMIN-NEXT: call callee_half_on_stack ; RV32-ILP32ZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -576,15 +579,16 @@ define i32 @caller_half_on_stack() nounwind { ; RV64-LP64ZFHMIN: # %bb.0: ; RV64-LP64ZFHMIN-NEXT: addi sp, sp, -16 ; RV64-LP64ZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-LP64ZFHMIN-NEXT: lui a4, %hi(.LCPI3_0) +; RV64-LP64ZFHMIN-NEXT: lui a7, 5 ; RV64-LP64ZFHMIN-NEXT: li a0, 1 ; RV64-LP64ZFHMIN-NEXT: li a1, 2 ; RV64-LP64ZFHMIN-NEXT: li a2, 3 ; RV64-LP64ZFHMIN-NEXT: li a3, 4 -; RV64-LP64ZFHMIN-NEXT: flh fa0, %lo(.LCPI3_0)(a4) ; RV64-LP64ZFHMIN-NEXT: li a4, 5 ; RV64-LP64ZFHMIN-NEXT: li a5, 6 ; RV64-LP64ZFHMIN-NEXT: li a6, 7 +; RV64-LP64ZFHMIN-NEXT: addi a7, a7, -1792 +; RV64-LP64ZFHMIN-NEXT: fmv.h.x fa0, a7 ; RV64-LP64ZFHMIN-NEXT: li a7, 8 ; RV64-LP64ZFHMIN-NEXT: call callee_half_on_stack ; RV64-LP64ZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -595,15 +599,16 @@ define i32 @caller_half_on_stack() nounwind { ; RV32-ZFH-ILP32: # %bb.0: ; RV32-ZFH-ILP32-NEXT: addi sp, sp, -16 ; RV32-ZFH-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ZFH-ILP32-NEXT: lui a4, %hi(.LCPI3_0) +; RV32-ZFH-ILP32-NEXT: lui a7, 5 ; RV32-ZFH-ILP32-NEXT: li a0, 1 ; RV32-ZFH-ILP32-NEXT: li a1, 2 ; RV32-ZFH-ILP32-NEXT: li a2, 3 ; RV32-ZFH-ILP32-NEXT: li a3, 4 -; RV32-ZFH-ILP32-NEXT: flh fa5, %lo(.LCPI3_0)(a4) ; RV32-ZFH-ILP32-NEXT: li a4, 5 ; RV32-ZFH-ILP32-NEXT: li a5, 6 ; RV32-ZFH-ILP32-NEXT: li a6, 7 +; RV32-ZFH-ILP32-NEXT: addi a7, a7, -1792 +; RV32-ZFH-ILP32-NEXT: fmv.h.x fa5, a7 ; RV32-ZFH-ILP32-NEXT: li a7, 8 ; RV32-ZFH-ILP32-NEXT: fsh fa5, 0(sp) ; RV32-ZFH-ILP32-NEXT: call callee_half_on_stack @@ -615,15 +620,16 @@ define i32 @caller_half_on_stack() nounwind { ; RV32-ZFH-ILP32F: # %bb.0: ; RV32-ZFH-ILP32F-NEXT: addi sp, sp, -16 ; RV32-ZFH-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ZFH-ILP32F-NEXT: lui a4, %hi(.LCPI3_0) +; RV32-ZFH-ILP32F-NEXT: lui a7, 5 ; RV32-ZFH-ILP32F-NEXT: li a0, 1 ; RV32-ZFH-ILP32F-NEXT: li a1, 2 ; RV32-ZFH-ILP32F-NEXT: li a2, 3 ; RV32-ZFH-ILP32F-NEXT: li a3, 4 -; RV32-ZFH-ILP32F-NEXT: flh fa0, %lo(.LCPI3_0)(a4) ; RV32-ZFH-ILP32F-NEXT: li a4, 5 ; RV32-ZFH-ILP32F-NEXT: li a5, 6 ; RV32-ZFH-ILP32F-NEXT: li a6, 7 +; RV32-ZFH-ILP32F-NEXT: addi a7, a7, -1792 +; RV32-ZFH-ILP32F-NEXT: fmv.h.x fa0, a7 ; RV32-ZFH-ILP32F-NEXT: li a7, 8 ; RV32-ZFH-ILP32F-NEXT: call callee_half_on_stack ; RV32-ZFH-ILP32F-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -634,15 +640,16 @@ define i32 @caller_half_on_stack() nounwind { ; RV64-ZFH-LP64: # %bb.0: ; RV64-ZFH-LP64-NEXT: addi sp, sp, -16 ; RV64-ZFH-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-ZFH-LP64-NEXT: lui a4, %hi(.LCPI3_0) +; RV64-ZFH-LP64-NEXT: lui a7, 5 ; RV64-ZFH-LP64-NEXT: li a0, 1 ; RV64-ZFH-LP64-NEXT: li a1, 2 ; RV64-ZFH-LP64-NEXT: li a2, 3 ; RV64-ZFH-LP64-NEXT: li a3, 4 -; RV64-ZFH-LP64-NEXT: flh fa5, %lo(.LCPI3_0)(a4) ; RV64-ZFH-LP64-NEXT: li a4, 5 ; RV64-ZFH-LP64-NEXT: li a5, 6 ; RV64-ZFH-LP64-NEXT: li a6, 7 +; RV64-ZFH-LP64-NEXT: addi a7, a7, -1792 +; RV64-ZFH-LP64-NEXT: fmv.h.x fa5, a7 ; RV64-ZFH-LP64-NEXT: li a7, 8 ; RV64-ZFH-LP64-NEXT: fsh fa5, 0(sp) ; RV64-ZFH-LP64-NEXT: call callee_half_on_stack @@ -654,15 +661,16 @@ define i32 @caller_half_on_stack() nounwind { ; RV64-ZFH-LP64F: # %bb.0: ; RV64-ZFH-LP64F-NEXT: addi sp, sp, -16 ; RV64-ZFH-LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-ZFH-LP64F-NEXT: lui a4, %hi(.LCPI3_0) +; RV64-ZFH-LP64F-NEXT: lui a7, 5 ; RV64-ZFH-LP64F-NEXT: li a0, 1 ; RV64-ZFH-LP64F-NEXT: li a1, 2 ; RV64-ZFH-LP64F-NEXT: li a2, 3 ; RV64-ZFH-LP64F-NEXT: li a3, 4 -; RV64-ZFH-LP64F-NEXT: flh fa0, %lo(.LCPI3_0)(a4) ; RV64-ZFH-LP64F-NEXT: li a4, 5 ; RV64-ZFH-LP64F-NEXT: li a5, 6 ; RV64-ZFH-LP64F-NEXT: li a6, 7 +; RV64-ZFH-LP64F-NEXT: addi a7, a7, -1792 +; RV64-ZFH-LP64F-NEXT: fmv.h.x fa0, a7 ; RV64-ZFH-LP64F-NEXT: li a7, 8 ; RV64-ZFH-LP64F-NEXT: call callee_half_on_stack ; RV64-ZFH-LP64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -1038,31 +1046,32 @@ define i32 @caller_half_on_stack_exhausted_gprs_fprs() nounwind { ; RV32-ILP32ZFHMIN: # %bb.0: ; RV32-ILP32ZFHMIN-NEXT: addi sp, sp, -16 ; RV32-ILP32ZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ILP32ZFHMIN-NEXT: lui a0, %hi(.LCPI5_0) -; RV32-ILP32ZFHMIN-NEXT: lui a1, 260096 -; RV32-ILP32ZFHMIN-NEXT: lui a2, 262144 -; RV32-ILP32ZFHMIN-NEXT: lui a3, 263168 -; RV32-ILP32ZFHMIN-NEXT: lui a4, 264192 -; RV32-ILP32ZFHMIN-NEXT: lui a5, 264704 -; RV32-ILP32ZFHMIN-NEXT: lui a6, 265216 -; RV32-ILP32ZFHMIN-NEXT: lui a7, 265728 -; RV32-ILP32ZFHMIN-NEXT: flh ft0, %lo(.LCPI5_0)(a0) +; RV32-ILP32ZFHMIN-NEXT: lui a7, 5 +; RV32-ILP32ZFHMIN-NEXT: lui a0, 260096 +; RV32-ILP32ZFHMIN-NEXT: lui a1, 262144 +; RV32-ILP32ZFHMIN-NEXT: lui a2, 263168 +; RV32-ILP32ZFHMIN-NEXT: lui a3, 264192 +; RV32-ILP32ZFHMIN-NEXT: lui a4, 264704 +; RV32-ILP32ZFHMIN-NEXT: lui a5, 265216 +; RV32-ILP32ZFHMIN-NEXT: lui a6, 265728 +; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa0, a0 ; RV32-ILP32ZFHMIN-NEXT: lui t0, 266240 -; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa0, a1 +; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa1, a1 ; RV32-ILP32ZFHMIN-NEXT: li a0, 1 -; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa1, a2 +; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa2, a2 ; RV32-ILP32ZFHMIN-NEXT: li a1, 2 -; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa2, a3 +; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa3, a3 ; RV32-ILP32ZFHMIN-NEXT: li a2, 3 -; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa3, a4 +; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa4, a4 ; RV32-ILP32ZFHMIN-NEXT: li a3, 4 -; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa4, a5 -; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa5, a6 -; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa6, a7 -; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa7, t0 +; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa5, a5 ; RV32-ILP32ZFHMIN-NEXT: li a4, 5 +; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa6, a6 ; RV32-ILP32ZFHMIN-NEXT: li a5, 6 +; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa7, t0 ; RV32-ILP32ZFHMIN-NEXT: li a6, 7 +; RV32-ILP32ZFHMIN-NEXT: addi a7, a7, -1792 +; RV32-ILP32ZFHMIN-NEXT: fmv.h.x ft0, a7 ; RV32-ILP32ZFHMIN-NEXT: li a7, 8 ; RV32-ILP32ZFHMIN-NEXT: fsh ft0, 0(sp) ; RV32-ILP32ZFHMIN-NEXT: call callee_half_on_stack @@ -1074,31 +1083,32 @@ define i32 @caller_half_on_stack_exhausted_gprs_fprs() nounwind { ; RV64-LP64ZFHMIN: # %bb.0: ; RV64-LP64ZFHMIN-NEXT: addi sp, sp, -16 ; RV64-LP64ZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-LP64ZFHMIN-NEXT: lui a0, %hi(.LCPI5_0) -; RV64-LP64ZFHMIN-NEXT: lui a1, 260096 -; RV64-LP64ZFHMIN-NEXT: lui a2, 262144 -; RV64-LP64ZFHMIN-NEXT: lui a3, 263168 -; RV64-LP64ZFHMIN-NEXT: lui a4, 264192 -; RV64-LP64ZFHMIN-NEXT: lui a5, 264704 -; RV64-LP64ZFHMIN-NEXT: lui a6, 265216 -; RV64-LP64ZFHMIN-NEXT: lui a7, 265728 -; RV64-LP64ZFHMIN-NEXT: flh ft0, %lo(.LCPI5_0)(a0) +; RV64-LP64ZFHMIN-NEXT: lui a7, 5 +; RV64-LP64ZFHMIN-NEXT: lui a0, 260096 +; RV64-LP64ZFHMIN-NEXT: lui a1, 262144 +; RV64-LP64ZFHMIN-NEXT: lui a2, 263168 +; RV64-LP64ZFHMIN-NEXT: lui a3, 264192 +; RV64-LP64ZFHMIN-NEXT: lui a4, 264704 +; RV64-LP64ZFHMIN-NEXT: lui a5, 265216 +; RV64-LP64ZFHMIN-NEXT: lui a6, 265728 +; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa0, a0 ; RV64-LP64ZFHMIN-NEXT: lui t0, 266240 -; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa0, a1 +; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa1, a1 ; RV64-LP64ZFHMIN-NEXT: li a0, 1 -; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa1, a2 +; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa2, a2 ; RV64-LP64ZFHMIN-NEXT: li a1, 2 -; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa2, a3 +; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa3, a3 ; RV64-LP64ZFHMIN-NEXT: li a2, 3 -; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa3, a4 +; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa4, a4 ; RV64-LP64ZFHMIN-NEXT: li a3, 4 -; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa4, a5 -; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa5, a6 -; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa6, a7 -; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa7, t0 +; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa5, a5 ; RV64-LP64ZFHMIN-NEXT: li a4, 5 +; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa6, a6 ; RV64-LP64ZFHMIN-NEXT: li a5, 6 +; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa7, t0 ; RV64-LP64ZFHMIN-NEXT: li a6, 7 +; RV64-LP64ZFHMIN-NEXT: addi a7, a7, -1792 +; RV64-LP64ZFHMIN-NEXT: fmv.h.x ft0, a7 ; RV64-LP64ZFHMIN-NEXT: li a7, 8 ; RV64-LP64ZFHMIN-NEXT: fsh ft0, 0(sp) ; RV64-LP64ZFHMIN-NEXT: call callee_half_on_stack @@ -1110,31 +1120,32 @@ define i32 @caller_half_on_stack_exhausted_gprs_fprs() nounwind { ; RV32-ZFH-ILP32: # %bb.0: ; RV32-ZFH-ILP32-NEXT: addi sp, sp, -48 ; RV32-ZFH-ILP32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32-ZFH-ILP32-NEXT: lui a2, %hi(.LCPI5_0) -; RV32-ZFH-ILP32-NEXT: lui a3, 266240 -; RV32-ZFH-ILP32-NEXT: li a4, 8 -; RV32-ZFH-ILP32-NEXT: lui a5, 265728 -; RV32-ZFH-ILP32-NEXT: li a6, 7 -; RV32-ZFH-ILP32-NEXT: lui a7, 265216 -; RV32-ZFH-ILP32-NEXT: li t0, 6 -; RV32-ZFH-ILP32-NEXT: lui t1, 264704 -; RV32-ZFH-ILP32-NEXT: li t2, 5 +; RV32-ZFH-ILP32-NEXT: lui a5, 266240 +; RV32-ZFH-ILP32-NEXT: li a6, 8 +; RV32-ZFH-ILP32-NEXT: lui a7, 265728 +; RV32-ZFH-ILP32-NEXT: li t0, 7 +; RV32-ZFH-ILP32-NEXT: lui t1, 265216 +; RV32-ZFH-ILP32-NEXT: li t2, 6 +; RV32-ZFH-ILP32-NEXT: lui t3, 264704 +; RV32-ZFH-ILP32-NEXT: li t4, 5 +; RV32-ZFH-ILP32-NEXT: lui t5, 5 ; RV32-ZFH-ILP32-NEXT: li a0, 1 ; RV32-ZFH-ILP32-NEXT: lui a1, 260096 -; RV32-ZFH-ILP32-NEXT: flh fa5, %lo(.LCPI5_0)(a2) ; RV32-ZFH-ILP32-NEXT: li a2, 2 -; RV32-ZFH-ILP32-NEXT: sw a6, 16(sp) -; RV32-ZFH-ILP32-NEXT: sw a5, 20(sp) -; RV32-ZFH-ILP32-NEXT: sw a4, 24(sp) -; RV32-ZFH-ILP32-NEXT: sw a3, 28(sp) ; RV32-ZFH-ILP32-NEXT: lui a3, 262144 -; RV32-ZFH-ILP32-NEXT: sw t2, 0(sp) -; RV32-ZFH-ILP32-NEXT: sw t1, 4(sp) -; RV32-ZFH-ILP32-NEXT: sw t0, 8(sp) -; RV32-ZFH-ILP32-NEXT: sw a7, 12(sp) ; RV32-ZFH-ILP32-NEXT: li a4, 3 +; RV32-ZFH-ILP32-NEXT: sw t0, 16(sp) +; RV32-ZFH-ILP32-NEXT: sw a7, 20(sp) +; RV32-ZFH-ILP32-NEXT: sw a6, 24(sp) +; RV32-ZFH-ILP32-NEXT: sw a5, 28(sp) ; RV32-ZFH-ILP32-NEXT: lui a5, 263168 +; RV32-ZFH-ILP32-NEXT: sw t4, 0(sp) +; RV32-ZFH-ILP32-NEXT: sw t3, 4(sp) +; RV32-ZFH-ILP32-NEXT: sw t2, 8(sp) +; RV32-ZFH-ILP32-NEXT: sw t1, 12(sp) ; RV32-ZFH-ILP32-NEXT: li a6, 4 +; RV32-ZFH-ILP32-NEXT: addi a7, t5, -1792 +; RV32-ZFH-ILP32-NEXT: fmv.h.x fa5, a7 ; RV32-ZFH-ILP32-NEXT: lui a7, 264192 ; RV32-ZFH-ILP32-NEXT: fsh fa5, 32(sp) ; RV32-ZFH-ILP32-NEXT: call callee_half_on_stack @@ -1146,31 +1157,32 @@ define i32 @caller_half_on_stack_exhausted_gprs_fprs() nounwind { ; RV32-ZFH-ILP32F: # %bb.0: ; RV32-ZFH-ILP32F-NEXT: addi sp, sp, -16 ; RV32-ZFH-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ZFH-ILP32F-NEXT: lui a0, %hi(.LCPI5_0) -; RV32-ZFH-ILP32F-NEXT: lui a1, 260096 -; RV32-ZFH-ILP32F-NEXT: lui a2, 262144 -; RV32-ZFH-ILP32F-NEXT: lui a3, 263168 -; RV32-ZFH-ILP32F-NEXT: lui a4, 264192 -; RV32-ZFH-ILP32F-NEXT: lui a5, 264704 -; RV32-ZFH-ILP32F-NEXT: lui a6, 265216 -; RV32-ZFH-ILP32F-NEXT: lui a7, 265728 -; RV32-ZFH-ILP32F-NEXT: flh ft0, %lo(.LCPI5_0)(a0) +; RV32-ZFH-ILP32F-NEXT: lui a7, 5 +; RV32-ZFH-ILP32F-NEXT: lui a0, 260096 +; RV32-ZFH-ILP32F-NEXT: lui a1, 262144 +; RV32-ZFH-ILP32F-NEXT: lui a2, 263168 +; RV32-ZFH-ILP32F-NEXT: lui a3, 264192 +; RV32-ZFH-ILP32F-NEXT: lui a4, 264704 +; RV32-ZFH-ILP32F-NEXT: lui a5, 265216 +; RV32-ZFH-ILP32F-NEXT: lui a6, 265728 +; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa0, a0 ; RV32-ZFH-ILP32F-NEXT: lui t0, 266240 -; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa0, a1 +; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa1, a1 ; RV32-ZFH-ILP32F-NEXT: li a0, 1 -; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa1, a2 +; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa2, a2 ; RV32-ZFH-ILP32F-NEXT: li a1, 2 -; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa2, a3 +; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa3, a3 ; RV32-ZFH-ILP32F-NEXT: li a2, 3 -; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa3, a4 +; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa4, a4 ; RV32-ZFH-ILP32F-NEXT: li a3, 4 -; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa4, a5 -; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa5, a6 -; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa6, a7 -; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa7, t0 +; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa5, a5 ; RV32-ZFH-ILP32F-NEXT: li a4, 5 +; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa6, a6 ; RV32-ZFH-ILP32F-NEXT: li a5, 6 +; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa7, t0 ; RV32-ZFH-ILP32F-NEXT: li a6, 7 +; RV32-ZFH-ILP32F-NEXT: addi a7, a7, -1792 +; RV32-ZFH-ILP32F-NEXT: fmv.h.x ft0, a7 ; RV32-ZFH-ILP32F-NEXT: li a7, 8 ; RV32-ZFH-ILP32F-NEXT: fsh ft0, 0(sp) ; RV32-ZFH-ILP32F-NEXT: call callee_half_on_stack @@ -1182,31 +1194,32 @@ define i32 @caller_half_on_stack_exhausted_gprs_fprs() nounwind { ; RV64-ZFH-LP64: # %bb.0: ; RV64-ZFH-LP64-NEXT: addi sp, sp, -80 ; RV64-ZFH-LP64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; RV64-ZFH-LP64-NEXT: lui a2, %hi(.LCPI5_0) -; RV64-ZFH-LP64-NEXT: lui a3, 266240 -; RV64-ZFH-LP64-NEXT: li a4, 8 -; RV64-ZFH-LP64-NEXT: lui a5, 265728 -; RV64-ZFH-LP64-NEXT: li a6, 7 -; RV64-ZFH-LP64-NEXT: lui a7, 265216 -; RV64-ZFH-LP64-NEXT: li t0, 6 -; RV64-ZFH-LP64-NEXT: lui t1, 264704 -; RV64-ZFH-LP64-NEXT: li t2, 5 +; RV64-ZFH-LP64-NEXT: lui a5, 266240 +; RV64-ZFH-LP64-NEXT: li a6, 8 +; RV64-ZFH-LP64-NEXT: lui a7, 265728 +; RV64-ZFH-LP64-NEXT: li t0, 7 +; RV64-ZFH-LP64-NEXT: lui t1, 265216 +; RV64-ZFH-LP64-NEXT: li t2, 6 +; RV64-ZFH-LP64-NEXT: lui t3, 264704 +; RV64-ZFH-LP64-NEXT: li t4, 5 +; RV64-ZFH-LP64-NEXT: lui t5, 5 ; RV64-ZFH-LP64-NEXT: li a0, 1 ; RV64-ZFH-LP64-NEXT: lui a1, 260096 -; RV64-ZFH-LP64-NEXT: flh fa5, %lo(.LCPI5_0)(a2) ; RV64-ZFH-LP64-NEXT: li a2, 2 -; RV64-ZFH-LP64-NEXT: sd a6, 32(sp) -; RV64-ZFH-LP64-NEXT: sw a5, 40(sp) -; RV64-ZFH-LP64-NEXT: sd a4, 48(sp) -; RV64-ZFH-LP64-NEXT: sw a3, 56(sp) ; RV64-ZFH-LP64-NEXT: lui a3, 262144 -; RV64-ZFH-LP64-NEXT: sd t2, 0(sp) -; RV64-ZFH-LP64-NEXT: sw t1, 8(sp) -; RV64-ZFH-LP64-NEXT: sd t0, 16(sp) -; RV64-ZFH-LP64-NEXT: sw a7, 24(sp) ; RV64-ZFH-LP64-NEXT: li a4, 3 +; RV64-ZFH-LP64-NEXT: sd t0, 32(sp) +; RV64-ZFH-LP64-NEXT: sw a7, 40(sp) +; RV64-ZFH-LP64-NEXT: sd a6, 48(sp) +; RV64-ZFH-LP64-NEXT: sw a5, 56(sp) ; RV64-ZFH-LP64-NEXT: lui a5, 263168 +; RV64-ZFH-LP64-NEXT: sd t4, 0(sp) +; RV64-ZFH-LP64-NEXT: sw t3, 8(sp) +; RV64-ZFH-LP64-NEXT: sd t2, 16(sp) +; RV64-ZFH-LP64-NEXT: sw t1, 24(sp) ; RV64-ZFH-LP64-NEXT: li a6, 4 +; RV64-ZFH-LP64-NEXT: addi a7, t5, -1792 +; RV64-ZFH-LP64-NEXT: fmv.h.x fa5, a7 ; RV64-ZFH-LP64-NEXT: lui a7, 264192 ; RV64-ZFH-LP64-NEXT: fsh fa5, 64(sp) ; RV64-ZFH-LP64-NEXT: call callee_half_on_stack @@ -1218,31 +1231,32 @@ define i32 @caller_half_on_stack_exhausted_gprs_fprs() nounwind { ; RV64-ZFH-LP64F: # %bb.0: ; RV64-ZFH-LP64F-NEXT: addi sp, sp, -16 ; RV64-ZFH-LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-ZFH-LP64F-NEXT: lui a0, %hi(.LCPI5_0) -; RV64-ZFH-LP64F-NEXT: lui a1, 260096 -; RV64-ZFH-LP64F-NEXT: lui a2, 262144 -; RV64-ZFH-LP64F-NEXT: lui a3, 263168 -; RV64-ZFH-LP64F-NEXT: lui a4, 264192 -; RV64-ZFH-LP64F-NEXT: lui a5, 264704 -; RV64-ZFH-LP64F-NEXT: lui a6, 265216 -; RV64-ZFH-LP64F-NEXT: lui a7, 265728 -; RV64-ZFH-LP64F-NEXT: flh ft0, %lo(.LCPI5_0)(a0) +; RV64-ZFH-LP64F-NEXT: lui a7, 5 +; RV64-ZFH-LP64F-NEXT: lui a0, 260096 +; RV64-ZFH-LP64F-NEXT: lui a1, 262144 +; RV64-ZFH-LP64F-NEXT: lui a2, 263168 +; RV64-ZFH-LP64F-NEXT: lui a3, 264192 +; RV64-ZFH-LP64F-NEXT: lui a4, 264704 +; RV64-ZFH-LP64F-NEXT: lui a5, 265216 +; RV64-ZFH-LP64F-NEXT: lui a6, 265728 +; RV64-ZFH-LP64F-NEXT: fmv.w.x fa0, a0 ; RV64-ZFH-LP64F-NEXT: lui t0, 266240 -; RV64-ZFH-LP64F-NEXT: fmv.w.x fa0, a1 +; RV64-ZFH-LP64F-NEXT: fmv.w.x fa1, a1 ; RV64-ZFH-LP64F-NEXT: li a0, 1 -; RV64-ZFH-LP64F-NEXT: fmv.w.x fa1, a2 +; RV64-ZFH-LP64F-NEXT: fmv.w.x fa2, a2 ; RV64-ZFH-LP64F-NEXT: li a1, 2 -; RV64-ZFH-LP64F-NEXT: fmv.w.x fa2, a3 +; RV64-ZFH-LP64F-NEXT: fmv.w.x fa3, a3 ; RV64-ZFH-LP64F-NEXT: li a2, 3 -; RV64-ZFH-LP64F-NEXT: fmv.w.x fa3, a4 +; RV64-ZFH-LP64F-NEXT: fmv.w.x fa4, a4 ; RV64-ZFH-LP64F-NEXT: li a3, 4 -; RV64-ZFH-LP64F-NEXT: fmv.w.x fa4, a5 -; RV64-ZFH-LP64F-NEXT: fmv.w.x fa5, a6 -; RV64-ZFH-LP64F-NEXT: fmv.w.x fa6, a7 -; RV64-ZFH-LP64F-NEXT: fmv.w.x fa7, t0 +; RV64-ZFH-LP64F-NEXT: fmv.w.x fa5, a5 ; RV64-ZFH-LP64F-NEXT: li a4, 5 +; RV64-ZFH-LP64F-NEXT: fmv.w.x fa6, a6 ; RV64-ZFH-LP64F-NEXT: li a5, 6 +; RV64-ZFH-LP64F-NEXT: fmv.w.x fa7, t0 ; RV64-ZFH-LP64F-NEXT: li a6, 7 +; RV64-ZFH-LP64F-NEXT: addi a7, a7, -1792 +; RV64-ZFH-LP64F-NEXT: fmv.h.x ft0, a7 ; RV64-ZFH-LP64F-NEXT: li a7, 8 ; RV64-ZFH-LP64F-NEXT: fsh ft0, 0(sp) ; RV64-ZFH-LP64F-NEXT: call callee_half_on_stack @@ -1280,26 +1294,30 @@ define half @callee_half_ret() nounwind { ; ; RV32-ILP32F-LABEL: callee_half_ret: ; RV32-ILP32F: # %bb.0: -; RV32-ILP32F-NEXT: lui a0, %hi(.LCPI6_0) -; RV32-ILP32F-NEXT: flw fa0, %lo(.LCPI6_0)(a0) +; RV32-ILP32F-NEXT: lui a0, 1048564 +; RV32-ILP32F-NEXT: addi a0, a0, -1024 +; RV32-ILP32F-NEXT: fmv.w.x fa0, a0 ; RV32-ILP32F-NEXT: ret ; ; RV64-LP64F-LABEL: callee_half_ret: ; RV64-LP64F: # %bb.0: -; RV64-LP64F-NEXT: lui a0, %hi(.LCPI6_0) -; RV64-LP64F-NEXT: flw fa0, %lo(.LCPI6_0)(a0) +; RV64-LP64F-NEXT: lui a0, 1048564 +; RV64-LP64F-NEXT: addi a0, a0, -1024 +; RV64-LP64F-NEXT: fmv.w.x fa0, a0 ; RV64-LP64F-NEXT: ret ; ; RV32-ILP32ZFHMIN-LABEL: callee_half_ret: ; RV32-ILP32ZFHMIN: # %bb.0: -; RV32-ILP32ZFHMIN-NEXT: lui a0, %hi(.LCPI6_0) -; RV32-ILP32ZFHMIN-NEXT: flh fa0, %lo(.LCPI6_0)(a0) +; RV32-ILP32ZFHMIN-NEXT: li a0, 15 +; RV32-ILP32ZFHMIN-NEXT: slli a0, a0, 10 +; RV32-ILP32ZFHMIN-NEXT: fmv.h.x fa0, a0 ; RV32-ILP32ZFHMIN-NEXT: ret ; ; RV64-LP64ZFHMIN-LABEL: callee_half_ret: ; RV64-LP64ZFHMIN: # %bb.0: -; RV64-LP64ZFHMIN-NEXT: lui a0, %hi(.LCPI6_0) -; RV64-LP64ZFHMIN-NEXT: flh fa0, %lo(.LCPI6_0)(a0) +; RV64-LP64ZFHMIN-NEXT: li a0, 15 +; RV64-LP64ZFHMIN-NEXT: slli a0, a0, 10 +; RV64-LP64ZFHMIN-NEXT: fmv.h.x fa0, a0 ; RV64-LP64ZFHMIN-NEXT: ret ; ; RV32-ZFH-ILP32-LABEL: callee_half_ret: @@ -1310,8 +1328,9 @@ define half @callee_half_ret() nounwind { ; ; RV32-ZFH-ILP32F-LABEL: callee_half_ret: ; RV32-ZFH-ILP32F: # %bb.0: -; RV32-ZFH-ILP32F-NEXT: lui a0, %hi(.LCPI6_0) -; RV32-ZFH-ILP32F-NEXT: flh fa0, %lo(.LCPI6_0)(a0) +; RV32-ZFH-ILP32F-NEXT: li a0, 15 +; RV32-ZFH-ILP32F-NEXT: slli a0, a0, 10 +; RV32-ZFH-ILP32F-NEXT: fmv.h.x fa0, a0 ; RV32-ZFH-ILP32F-NEXT: ret ; ; RV64-ZFH-LP64-LABEL: callee_half_ret: @@ -1322,8 +1341,9 @@ define half @callee_half_ret() nounwind { ; ; RV64-ZFH-LP64F-LABEL: callee_half_ret: ; RV64-ZFH-LP64F: # %bb.0: -; RV64-ZFH-LP64F-NEXT: lui a0, %hi(.LCPI6_0) -; RV64-ZFH-LP64F-NEXT: flh fa0, %lo(.LCPI6_0)(a0) +; RV64-ZFH-LP64F-NEXT: li a0, 15 +; RV64-ZFH-LP64F-NEXT: slli a0, a0, 10 +; RV64-ZFH-LP64F-NEXT: fmv.h.x fa0, a0 ; RV64-ZFH-LP64F-NEXT: ret ret half 1.0 } diff --git a/llvm/test/CodeGen/RISCV/codemodel-lowering.ll b/llvm/test/CodeGen/RISCV/codemodel-lowering.ll index 94f8d7cab9b95..220494a4c4ff8 100644 --- a/llvm/test/CodeGen/RISCV/codemodel-lowering.ll +++ b/llvm/test/CodeGen/RISCV/codemodel-lowering.ll @@ -287,8 +287,9 @@ indirectgoto: define float @lower_constantpool(float %a) nounwind { ; RV32F-SMALL-LABEL: lower_constantpool: ; RV32F-SMALL: # %bb.0: -; RV32F-SMALL-NEXT: lui a0, %hi(.LCPI3_0) -; RV32F-SMALL-NEXT: flw fa5, %lo(.LCPI3_0)(a0) +; RV32F-SMALL-NEXT: lui a0, 260097 +; RV32F-SMALL-NEXT: addi a0, a0, -2048 +; RV32F-SMALL-NEXT: fmv.w.x fa5, a0 ; RV32F-SMALL-NEXT: fadd.s fa0, fa0, fa5 ; RV32F-SMALL-NEXT: ret ; @@ -301,32 +302,33 @@ define float @lower_constantpool(float %a) nounwind { ; ; RV32F-MEDIUM-LABEL: lower_constantpool: ; RV32F-MEDIUM: # %bb.0: -; RV32F-MEDIUM-NEXT: .Lpcrel_hi3: -; RV32F-MEDIUM-NEXT: auipc a0, %pcrel_hi(.LCPI3_0) -; RV32F-MEDIUM-NEXT: flw fa5, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV32F-MEDIUM-NEXT: lui a0, 260097 +; RV32F-MEDIUM-NEXT: addi a0, a0, -2048 +; RV32F-MEDIUM-NEXT: fmv.w.x fa5, a0 ; RV32F-MEDIUM-NEXT: fadd.s fa0, fa0, fa5 ; RV32F-MEDIUM-NEXT: ret ; ; RV64F-SMALL-LABEL: lower_constantpool: ; RV64F-SMALL: # %bb.0: -; RV64F-SMALL-NEXT: lui a0, %hi(.LCPI3_0) -; RV64F-SMALL-NEXT: flw fa5, %lo(.LCPI3_0)(a0) +; RV64F-SMALL-NEXT: lui a0, 260097 +; RV64F-SMALL-NEXT: addi a0, a0, -2048 +; RV64F-SMALL-NEXT: fmv.w.x fa5, a0 ; RV64F-SMALL-NEXT: fadd.s fa0, fa0, fa5 ; RV64F-SMALL-NEXT: ret ; ; RV64F-MEDIUM-LABEL: lower_constantpool: ; RV64F-MEDIUM: # %bb.0: -; RV64F-MEDIUM-NEXT: .Lpcrel_hi3: -; RV64F-MEDIUM-NEXT: auipc a0, %pcrel_hi(.LCPI3_0) -; RV64F-MEDIUM-NEXT: flw fa5, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV64F-MEDIUM-NEXT: lui a0, 260097 +; RV64F-MEDIUM-NEXT: addi a0, a0, -2048 +; RV64F-MEDIUM-NEXT: fmv.w.x fa5, a0 ; RV64F-MEDIUM-NEXT: fadd.s fa0, fa0, fa5 ; RV64F-MEDIUM-NEXT: ret ; ; RV64F-LARGE-LABEL: lower_constantpool: ; RV64F-LARGE: # %bb.0: -; RV64F-LARGE-NEXT: .Lpcrel_hi3: -; RV64F-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI3_0) -; RV64F-LARGE-NEXT: flw fa5, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV64F-LARGE-NEXT: lui a0, 260097 +; RV64F-LARGE-NEXT: addi a0, a0, -2048 +; RV64F-LARGE-NEXT: fmv.w.x fa5, a0 ; RV64F-LARGE-NEXT: fadd.s fa0, fa0, fa5 ; RV64F-LARGE-NEXT: ret ; @@ -390,13 +392,13 @@ define i32 @lower_extern_weak(i32 %a) nounwind { ; RV32IXQCILI-SMALL-NEXT: lw a0, 0(a0) ; RV32IXQCILI-SMALL-NEXT: ret ; -; RV32F-MEDIUM-LABEL: lower_extern_weak: -; RV32F-MEDIUM: # %bb.0: -; RV32F-MEDIUM-NEXT: .Lpcrel_hi4: -; RV32F-MEDIUM-NEXT: auipc a0, %got_pcrel_hi(W) -; RV32F-MEDIUM-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi4)(a0) -; RV32F-MEDIUM-NEXT: lw a0, 0(a0) -; RV32F-MEDIUM-NEXT: ret +; RV32I-MEDIUM-LABEL: lower_extern_weak: +; RV32I-MEDIUM: # %bb.0: +; RV32I-MEDIUM-NEXT: .Lpcrel_hi3: +; RV32I-MEDIUM-NEXT: auipc a0, %got_pcrel_hi(W) +; RV32I-MEDIUM-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV32I-MEDIUM-NEXT: lw a0, 0(a0) +; RV32I-MEDIUM-NEXT: ret ; ; RV64I-SMALL-LABEL: lower_extern_weak: ; RV64I-SMALL: # %bb.0: @@ -404,45 +406,21 @@ define i32 @lower_extern_weak(i32 %a) nounwind { ; RV64I-SMALL-NEXT: lw a0, %lo(W)(a0) ; RV64I-SMALL-NEXT: ret ; -; RV64F-MEDIUM-LABEL: lower_extern_weak: -; RV64F-MEDIUM: # %bb.0: -; RV64F-MEDIUM-NEXT: .Lpcrel_hi4: -; RV64F-MEDIUM-NEXT: auipc a0, %got_pcrel_hi(W) -; RV64F-MEDIUM-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi4)(a0) -; RV64F-MEDIUM-NEXT: lw a0, 0(a0) -; RV64F-MEDIUM-NEXT: ret -; -; RV64F-LARGE-LABEL: lower_extern_weak: -; RV64F-LARGE: # %bb.0: -; RV64F-LARGE-NEXT: .Lpcrel_hi4: -; RV64F-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI4_0) -; RV64F-LARGE-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi4)(a0) -; RV64F-LARGE-NEXT: lw a0, 0(a0) -; RV64F-LARGE-NEXT: ret -; -; RV32FINX-MEDIUM-LABEL: lower_extern_weak: -; RV32FINX-MEDIUM: # %bb.0: -; RV32FINX-MEDIUM-NEXT: .Lpcrel_hi3: -; RV32FINX-MEDIUM-NEXT: auipc a0, %got_pcrel_hi(W) -; RV32FINX-MEDIUM-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi3)(a0) -; RV32FINX-MEDIUM-NEXT: lw a0, 0(a0) -; RV32FINX-MEDIUM-NEXT: ret -; -; RV64FINX-MEDIUM-LABEL: lower_extern_weak: -; RV64FINX-MEDIUM: # %bb.0: -; RV64FINX-MEDIUM-NEXT: .Lpcrel_hi3: -; RV64FINX-MEDIUM-NEXT: auipc a0, %got_pcrel_hi(W) -; RV64FINX-MEDIUM-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi3)(a0) -; RV64FINX-MEDIUM-NEXT: lw a0, 0(a0) -; RV64FINX-MEDIUM-NEXT: ret +; RV64I-MEDIUM-LABEL: lower_extern_weak: +; RV64I-MEDIUM: # %bb.0: +; RV64I-MEDIUM-NEXT: .Lpcrel_hi3: +; RV64I-MEDIUM-NEXT: auipc a0, %got_pcrel_hi(W) +; RV64I-MEDIUM-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV64I-MEDIUM-NEXT: lw a0, 0(a0) +; RV64I-MEDIUM-NEXT: ret ; -; RV64FINX-LARGE-LABEL: lower_extern_weak: -; RV64FINX-LARGE: # %bb.0: -; RV64FINX-LARGE-NEXT: .Lpcrel_hi3: -; RV64FINX-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI4_0) -; RV64FINX-LARGE-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi3)(a0) -; RV64FINX-LARGE-NEXT: lw a0, 0(a0) -; RV64FINX-LARGE-NEXT: ret +; RV64I-LARGE-LABEL: lower_extern_weak: +; RV64I-LARGE: # %bb.0: +; RV64I-LARGE-NEXT: .Lpcrel_hi3: +; RV64I-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI4_0) +; RV64I-LARGE-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV64I-LARGE-NEXT: lw a0, 0(a0) +; RV64I-LARGE-NEXT: ret %1 = load volatile i32, ptr @W ret i32 %1 } @@ -466,9 +444,9 @@ define half @lower_global_half(half %a) nounwind { ; ; RV32F-MEDIUM-LABEL: lower_global_half: ; RV32F-MEDIUM: # %bb.0: -; RV32F-MEDIUM-NEXT: .Lpcrel_hi5: +; RV32F-MEDIUM-NEXT: .Lpcrel_hi4: ; RV32F-MEDIUM-NEXT: auipc a0, %pcrel_hi(X) -; RV32F-MEDIUM-NEXT: flh fa5, %pcrel_lo(.Lpcrel_hi5)(a0) +; RV32F-MEDIUM-NEXT: flh fa5, %pcrel_lo(.Lpcrel_hi4)(a0) ; RV32F-MEDIUM-NEXT: fadd.h fa0, fa0, fa5 ; RV32F-MEDIUM-NEXT: ret ; @@ -481,17 +459,17 @@ define half @lower_global_half(half %a) nounwind { ; ; RV64F-MEDIUM-LABEL: lower_global_half: ; RV64F-MEDIUM: # %bb.0: -; RV64F-MEDIUM-NEXT: .Lpcrel_hi5: +; RV64F-MEDIUM-NEXT: .Lpcrel_hi4: ; RV64F-MEDIUM-NEXT: auipc a0, %pcrel_hi(X) -; RV64F-MEDIUM-NEXT: flh fa5, %pcrel_lo(.Lpcrel_hi5)(a0) +; RV64F-MEDIUM-NEXT: flh fa5, %pcrel_lo(.Lpcrel_hi4)(a0) ; RV64F-MEDIUM-NEXT: fadd.h fa0, fa0, fa5 ; RV64F-MEDIUM-NEXT: ret ; ; RV64F-LARGE-LABEL: lower_global_half: ; RV64F-LARGE: # %bb.0: -; RV64F-LARGE-NEXT: .Lpcrel_hi5: +; RV64F-LARGE-NEXT: .Lpcrel_hi4: ; RV64F-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI5_0) -; RV64F-LARGE-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi5)(a0) +; RV64F-LARGE-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi4)(a0) ; RV64F-LARGE-NEXT: flh fa5, 0(a0) ; RV64F-LARGE-NEXT: fadd.h fa0, fa0, fa5 ; RV64F-LARGE-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll index 8124d00e63fa7..c3e729800616d 100644 --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -1636,14 +1636,15 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind { ; ; RV64IFD-LABEL: fcvt_w_s_sat_i16: ; RV64IFD: # %bb.0: # %start -; RV64IFD-NEXT: lui a0, %hi(.LCPI26_0) -; RV64IFD-NEXT: fld fa5, %lo(.LCPI26_0)(a0) -; RV64IFD-NEXT: lui a0, %hi(.LCPI26_1) -; RV64IFD-NEXT: fld fa4, %lo(.LCPI26_1)(a0) ; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: fmax.d fa5, fa0, fa5 +; RV64IFD-NEXT: lui a1, %hi(.LCPI26_0) +; RV64IFD-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; RV64IFD-NEXT: li a1, -505 +; RV64IFD-NEXT: slli a1, a1, 53 +; RV64IFD-NEXT: fmv.d.x fa4, a1 +; RV64IFD-NEXT: fmax.d fa4, fa0, fa4 ; RV64IFD-NEXT: neg a0, a0 -; RV64IFD-NEXT: fmin.d fa5, fa5, fa4 +; RV64IFD-NEXT: fmin.d fa5, fa4, fa5 ; RV64IFD-NEXT: fcvt.l.d a1, fa5, rtz ; RV64IFD-NEXT: and a0, a0, a1 ; RV64IFD-NEXT: ret @@ -1668,16 +1669,17 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind { ; ; RV64IZFINXZDINX-LABEL: fcvt_w_s_sat_i16: ; RV64IZFINXZDINX: # %bb.0: # %start -; RV64IZFINXZDINX-NEXT: li a1, -505 -; RV64IZFINXZDINX-NEXT: lui a2, %hi(.LCPI26_0) -; RV64IZFINXZDINX-NEXT: slli a1, a1, 53 -; RV64IZFINXZDINX-NEXT: ld a2, %lo(.LCPI26_0)(a2) -; RV64IZFINXZDINX-NEXT: fmax.d a1, a0, a1 -; RV64IZFINXZDINX-NEXT: feq.d a0, a0, a0 -; RV64IZFINXZDINX-NEXT: neg a0, a0 -; RV64IZFINXZDINX-NEXT: fmin.d a1, a1, a2 -; RV64IZFINXZDINX-NEXT: fcvt.l.d a1, a1, rtz -; RV64IZFINXZDINX-NEXT: and a0, a0, a1 +; RV64IZFINXZDINX-NEXT: feq.d a1, a0, a0 +; RV64IZFINXZDINX-NEXT: li a2, -505 +; RV64IZFINXZDINX-NEXT: slli a2, a2, 53 +; RV64IZFINXZDINX-NEXT: fmax.d a0, a0, a2 +; RV64IZFINXZDINX-NEXT: lui a2, 4152 +; RV64IZFINXZDINX-NEXT: neg a1, a1 +; RV64IZFINXZDINX-NEXT: addi a2, a2, -1 +; RV64IZFINXZDINX-NEXT: slli a2, a2, 38 +; RV64IZFINXZDINX-NEXT: fmin.d a0, a0, a2 +; RV64IZFINXZDINX-NEXT: fcvt.l.d a0, a0, rtz +; RV64IZFINXZDINX-NEXT: and a0, a1, a0 ; RV64IZFINXZDINX-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat_i16: @@ -1859,9 +1861,10 @@ define zeroext i16 @fcvt_wu_s_sat_i16(double %a) nounwind { ; ; RV64IZFINXZDINX-LABEL: fcvt_wu_s_sat_i16: ; RV64IZFINXZDINX: # %bb.0: # %start -; RV64IZFINXZDINX-NEXT: lui a1, %hi(.LCPI28_0) -; RV64IZFINXZDINX-NEXT: ld a1, %lo(.LCPI28_0)(a1) ; RV64IZFINXZDINX-NEXT: fmax.d a0, a0, zero +; RV64IZFINXZDINX-NEXT: lui a1, 8312 +; RV64IZFINXZDINX-NEXT: addi a1, a1, -1 +; RV64IZFINXZDINX-NEXT: slli a1, a1, 37 ; RV64IZFINXZDINX-NEXT: fmin.d a0, a0, a1 ; RV64IZFINXZDINX-NEXT: fcvt.lu.d a0, a0, rtz ; RV64IZFINXZDINX-NEXT: ret @@ -2012,13 +2015,15 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind { ; ; RV64IFD-LABEL: fcvt_w_s_sat_i8: ; RV64IFD: # %bb.0: # %start -; RV64IFD-NEXT: lui a0, %hi(.LCPI30_0) -; RV64IFD-NEXT: fld fa5, %lo(.LCPI30_0)(a0) -; RV64IFD-NEXT: lui a0, %hi(.LCPI30_1) -; RV64IFD-NEXT: fld fa4, %lo(.LCPI30_1)(a0) ; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: fmax.d fa5, fa0, fa5 +; RV64IFD-NEXT: li a1, -509 +; RV64IFD-NEXT: slli a1, a1, 53 +; RV64IFD-NEXT: fmv.d.x fa5, a1 +; RV64IFD-NEXT: lui a1, 65919 ; RV64IFD-NEXT: neg a0, a0 +; RV64IFD-NEXT: slli a1, a1, 34 +; RV64IFD-NEXT: fmax.d fa5, fa0, fa5 +; RV64IFD-NEXT: fmv.d.x fa4, a1 ; RV64IFD-NEXT: fmin.d fa5, fa5, fa4 ; RV64IFD-NEXT: fcvt.l.d a1, fa5, rtz ; RV64IFD-NEXT: and a0, a0, a1 @@ -2214,11 +2219,12 @@ define zeroext i8 @fcvt_wu_s_sat_i8(double %a) nounwind { ; ; RV64IFD-LABEL: fcvt_wu_s_sat_i8: ; RV64IFD: # %bb.0: # %start -; RV64IFD-NEXT: lui a0, %hi(.LCPI32_0) -; RV64IFD-NEXT: fld fa5, %lo(.LCPI32_0)(a0) -; RV64IFD-NEXT: fmv.d.x fa4, zero -; RV64IFD-NEXT: fmax.d fa4, fa0, fa4 -; RV64IFD-NEXT: fmin.d fa5, fa4, fa5 +; RV64IFD-NEXT: fmv.d.x fa5, zero +; RV64IFD-NEXT: lui a0, 131967 +; RV64IFD-NEXT: fmax.d fa5, fa0, fa5 +; RV64IFD-NEXT: slli a0, a0, 33 +; RV64IFD-NEXT: fmv.d.x fa4, a0 +; RV64IFD-NEXT: fmin.d fa5, fa5, fa4 ; RV64IFD-NEXT: fcvt.lu.d a0, fa5, rtz ; RV64IFD-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/double-imm.ll b/llvm/test/CodeGen/RISCV/double-imm.ll index 1119fd6d74a25..6f7c30edba3ea 100644 --- a/llvm/test/CodeGen/RISCV/double-imm.ll +++ b/llvm/test/CodeGen/RISCV/double-imm.ll @@ -47,8 +47,9 @@ define double @double_imm_op(double %a) nounwind { ; ; CHECK64D-LABEL: double_imm_op: ; CHECK64D: # %bb.0: -; CHECK64D-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK64D-NEXT: fld fa5, %lo(.LCPI1_0)(a0) +; CHECK64D-NEXT: li a0, 1023 +; CHECK64D-NEXT: slli a0, a0, 52 +; CHECK64D-NEXT: fmv.d.x fa5, a0 ; CHECK64D-NEXT: fadd.d fa0, fa0, fa5 ; CHECK64D-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/double-intrinsics.ll b/llvm/test/CodeGen/RISCV/double-intrinsics.ll index bb57665fa1801..caeb6e6ce70af 100644 --- a/llvm/test/CodeGen/RISCV/double-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/double-intrinsics.ll @@ -866,8 +866,9 @@ define double @floor_f64(double %a) nounwind { ; ; RV64IFD-LABEL: floor_f64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: lui a0, %hi(.LCPI18_0) -; RV64IFD-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV64IFD-NEXT: li a0, 1075 +; RV64IFD-NEXT: slli a0, a0, 52 +; RV64IFD-NEXT: fmv.d.x fa5, a0 ; RV64IFD-NEXT: fabs.d fa4, fa0 ; RV64IFD-NEXT: flt.d a0, fa4, fa5 ; RV64IFD-NEXT: beqz a0, .LBB18_2 @@ -931,8 +932,9 @@ define double @ceil_f64(double %a) nounwind { ; ; RV64IFD-LABEL: ceil_f64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: lui a0, %hi(.LCPI19_0) -; RV64IFD-NEXT: fld fa5, %lo(.LCPI19_0)(a0) +; RV64IFD-NEXT: li a0, 1075 +; RV64IFD-NEXT: slli a0, a0, 52 +; RV64IFD-NEXT: fmv.d.x fa5, a0 ; RV64IFD-NEXT: fabs.d fa4, fa0 ; RV64IFD-NEXT: flt.d a0, fa4, fa5 ; RV64IFD-NEXT: beqz a0, .LBB19_2 @@ -996,8 +998,9 @@ define double @trunc_f64(double %a) nounwind { ; ; RV64IFD-LABEL: trunc_f64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: lui a0, %hi(.LCPI20_0) -; RV64IFD-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV64IFD-NEXT: li a0, 1075 +; RV64IFD-NEXT: slli a0, a0, 52 +; RV64IFD-NEXT: fmv.d.x fa5, a0 ; RV64IFD-NEXT: fabs.d fa4, fa0 ; RV64IFD-NEXT: flt.d a0, fa4, fa5 ; RV64IFD-NEXT: beqz a0, .LBB20_2 @@ -1061,8 +1064,9 @@ define double @rint_f64(double %a) nounwind { ; ; RV64IFD-LABEL: rint_f64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: lui a0, %hi(.LCPI21_0) -; RV64IFD-NEXT: fld fa5, %lo(.LCPI21_0)(a0) +; RV64IFD-NEXT: li a0, 1075 +; RV64IFD-NEXT: slli a0, a0, 52 +; RV64IFD-NEXT: fmv.d.x fa5, a0 ; RV64IFD-NEXT: fabs.d fa4, fa0 ; RV64IFD-NEXT: flt.d a0, fa4, fa5 ; RV64IFD-NEXT: beqz a0, .LBB21_2 @@ -1167,8 +1171,9 @@ define double @round_f64(double %a) nounwind { ; ; RV64IFD-LABEL: round_f64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: lui a0, %hi(.LCPI23_0) -; RV64IFD-NEXT: fld fa5, %lo(.LCPI23_0)(a0) +; RV64IFD-NEXT: li a0, 1075 +; RV64IFD-NEXT: slli a0, a0, 52 +; RV64IFD-NEXT: fmv.d.x fa5, a0 ; RV64IFD-NEXT: fabs.d fa4, fa0 ; RV64IFD-NEXT: flt.d a0, fa4, fa5 ; RV64IFD-NEXT: beqz a0, .LBB23_2 @@ -1232,8 +1237,9 @@ define double @roundeven_f64(double %a) nounwind { ; ; RV64IFD-LABEL: roundeven_f64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: lui a0, %hi(.LCPI24_0) -; RV64IFD-NEXT: fld fa5, %lo(.LCPI24_0)(a0) +; RV64IFD-NEXT: li a0, 1075 +; RV64IFD-NEXT: slli a0, a0, 52 +; RV64IFD-NEXT: fmv.d.x fa5, a0 ; RV64IFD-NEXT: fabs.d fa4, fa0 ; RV64IFD-NEXT: flt.d a0, fa4, fa5 ; RV64IFD-NEXT: beqz a0, .LBB24_2 diff --git a/llvm/test/CodeGen/RISCV/double-round-conv.ll b/llvm/test/CodeGen/RISCV/double-round-conv.ll index 3edbda3a4bf6b..6dd24c056e386 100644 --- a/llvm/test/CodeGen/RISCV/double-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/double-round-conv.ll @@ -1145,8 +1145,9 @@ define double @test_floor_double(double %x) { ; ; RV64IFD-LABEL: test_floor_double: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: lui a0, %hi(.LCPI40_0) -; RV64IFD-NEXT: fld fa5, %lo(.LCPI40_0)(a0) +; RV64IFD-NEXT: li a0, 1075 +; RV64IFD-NEXT: slli a0, a0, 52 +; RV64IFD-NEXT: fmv.d.x fa5, a0 ; RV64IFD-NEXT: fabs.d fa4, fa0 ; RV64IFD-NEXT: flt.d a0, fa4, fa5 ; RV64IFD-NEXT: beqz a0, .LBB40_2 @@ -1194,8 +1195,9 @@ define double @test_ceil_double(double %x) { ; ; RV64IFD-LABEL: test_ceil_double: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: lui a0, %hi(.LCPI41_0) -; RV64IFD-NEXT: fld fa5, %lo(.LCPI41_0)(a0) +; RV64IFD-NEXT: li a0, 1075 +; RV64IFD-NEXT: slli a0, a0, 52 +; RV64IFD-NEXT: fmv.d.x fa5, a0 ; RV64IFD-NEXT: fabs.d fa4, fa0 ; RV64IFD-NEXT: flt.d a0, fa4, fa5 ; RV64IFD-NEXT: beqz a0, .LBB41_2 @@ -1243,8 +1245,9 @@ define double @test_trunc_double(double %x) { ; ; RV64IFD-LABEL: test_trunc_double: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: lui a0, %hi(.LCPI42_0) -; RV64IFD-NEXT: fld fa5, %lo(.LCPI42_0)(a0) +; RV64IFD-NEXT: li a0, 1075 +; RV64IFD-NEXT: slli a0, a0, 52 +; RV64IFD-NEXT: fmv.d.x fa5, a0 ; RV64IFD-NEXT: fabs.d fa4, fa0 ; RV64IFD-NEXT: flt.d a0, fa4, fa5 ; RV64IFD-NEXT: beqz a0, .LBB42_2 @@ -1292,8 +1295,9 @@ define double @test_round_double(double %x) { ; ; RV64IFD-LABEL: test_round_double: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: lui a0, %hi(.LCPI43_0) -; RV64IFD-NEXT: fld fa5, %lo(.LCPI43_0)(a0) +; RV64IFD-NEXT: li a0, 1075 +; RV64IFD-NEXT: slli a0, a0, 52 +; RV64IFD-NEXT: fmv.d.x fa5, a0 ; RV64IFD-NEXT: fabs.d fa4, fa0 ; RV64IFD-NEXT: flt.d a0, fa4, fa5 ; RV64IFD-NEXT: beqz a0, .LBB43_2 @@ -1341,8 +1345,9 @@ define double @test_roundeven_double(double %x) { ; ; RV64IFD-LABEL: test_roundeven_double: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: lui a0, %hi(.LCPI44_0) -; RV64IFD-NEXT: fld fa5, %lo(.LCPI44_0)(a0) +; RV64IFD-NEXT: li a0, 1075 +; RV64IFD-NEXT: slli a0, a0, 52 +; RV64IFD-NEXT: fmv.d.x fa5, a0 ; RV64IFD-NEXT: fabs.d fa4, fa0 ; RV64IFD-NEXT: flt.d a0, fa4, fa5 ; RV64IFD-NEXT: beqz a0, .LBB44_2 diff --git a/llvm/test/CodeGen/RISCV/double-zfa.ll b/llvm/test/CodeGen/RISCV/double-zfa.ll index 2f35496b9b32c..f17c63ddb6cae 100644 --- a/llvm/test/CodeGen/RISCV/double-zfa.ll +++ b/llvm/test/CodeGen/RISCV/double-zfa.ll @@ -69,21 +69,35 @@ define double @loadfpimm8() { } define double @loadfpimm9() { -; CHECK-LABEL: loadfpimm9: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI8_0) -; CHECK-NEXT: fld fa0, %lo(.LCPI8_0)(a0) -; CHECK-NEXT: ret +; RV32IDZFA-LABEL: loadfpimm9: +; RV32IDZFA: # %bb.0: +; RV32IDZFA-NEXT: lui a0, %hi(.LCPI8_0) +; RV32IDZFA-NEXT: fld fa0, %lo(.LCPI8_0)(a0) +; RV32IDZFA-NEXT: ret +; +; RV64DZFA-LABEL: loadfpimm9: +; RV64DZFA: # %bb.0: +; RV64DZFA-NEXT: lui a0, 131967 +; RV64DZFA-NEXT: slli a0, a0, 33 +; RV64DZFA-NEXT: fmv.d.x fa0, a0 +; RV64DZFA-NEXT: ret ret double 255.0 } ; Negative test. This is 1 * 2^256. define double @loadfpimm10() { -; CHECK-LABEL: loadfpimm10: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI9_0) -; CHECK-NEXT: fld fa0, %lo(.LCPI9_0)(a0) -; CHECK-NEXT: ret +; RV32IDZFA-LABEL: loadfpimm10: +; RV32IDZFA: # %bb.0: +; RV32IDZFA-NEXT: lui a0, %hi(.LCPI9_0) +; RV32IDZFA-NEXT: fld fa0, %lo(.LCPI9_0)(a0) +; RV32IDZFA-NEXT: ret +; +; RV64DZFA-LABEL: loadfpimm10: +; RV64DZFA: # %bb.0: +; RV64DZFA-NEXT: li a0, 1 +; RV64DZFA-NEXT: slli a0, a0, 60 +; RV64DZFA-NEXT: fmv.d.x fa0, a0 +; RV64DZFA-NEXT: ret ret double 0x1000000000000000 } @@ -125,11 +139,18 @@ define double @loadfpimm13() { ; Negative test. This is 2^-1023, a denormal. define double @loadfpimm15() { -; CHECK-LABEL: loadfpimm15: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa0, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: ret +; RV32IDZFA-LABEL: loadfpimm15: +; RV32IDZFA: # %bb.0: +; RV32IDZFA-NEXT: lui a0, %hi(.LCPI13_0) +; RV32IDZFA-NEXT: fld fa0, %lo(.LCPI13_0)(a0) +; RV32IDZFA-NEXT: ret +; +; RV64DZFA-LABEL: loadfpimm15: +; RV64DZFA: # %bb.0: +; RV64DZFA-NEXT: li a0, 1 +; RV64DZFA-NEXT: slli a0, a0, 51 +; RV64DZFA-NEXT: fmv.d.x fa0, a0 +; RV64DZFA-NEXT: ret ret double 0x0008000000000000 } diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll index 72578193ee4bf..e6e4f6642f685 100644 --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -623,20 +623,21 @@ define i64 @fcvt_l_s_sat(float %a) nounwind { ; RV32IF-NEXT: # %bb.1: # %start ; RV32IF-NEXT: mv a2, a1 ; RV32IF-NEXT: .LBB12_2: # %start -; RV32IF-NEXT: lui a1, %hi(.LCPI12_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI12_0)(a1) +; RV32IF-NEXT: lui a1, 389120 +; RV32IF-NEXT: addi a1, a1, -1 +; RV32IF-NEXT: fmv.w.x fa5, a1 ; RV32IF-NEXT: flt.s a1, fa5, fs0 ; RV32IF-NEXT: beqz a1, .LBB12_4 ; RV32IF-NEXT: # %bb.3: ; RV32IF-NEXT: addi a2, a3, -1 ; RV32IF-NEXT: .LBB12_4: # %start ; RV32IF-NEXT: feq.s a3, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: neg a1, s0 +; RV32IF-NEXT: neg a4, s0 +; RV32IF-NEXT: neg a5, a1 ; RV32IF-NEXT: neg a3, a3 -; RV32IF-NEXT: and a0, a1, a0 +; RV32IF-NEXT: and a0, a4, a0 ; RV32IF-NEXT: and a1, a3, a2 -; RV32IF-NEXT: or a0, a4, a0 +; RV32IF-NEXT: or a0, a5, a0 ; RV32IF-NEXT: and a0, a3, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -864,10 +865,11 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; RV32IF-NEXT: fle.s a0, fa5, fa0 ; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI14_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI14_0)(a2) ; RV32IF-NEXT: and a0, s0, a0 +; RV32IF-NEXT: lui a2, 391168 ; RV32IF-NEXT: and a1, s0, a1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: fmv.w.x fa5, a2 ; RV32IF-NEXT: flt.s a2, fa5, fs0 ; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: or a0, a2, a0 @@ -1405,13 +1407,14 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind { ; RV32IF-LABEL: fcvt_w_s_sat_i16: ; RV32IF: # %bb.0: # %start ; RV32IF-NEXT: feq.s a0, fa0, fa0 -; RV32IF-NEXT: lui a1, %hi(.LCPI24_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI24_0)(a1) ; RV32IF-NEXT: lui a1, 815104 -; RV32IF-NEXT: fmv.w.x fa4, a1 -; RV32IF-NEXT: fmax.s fa4, fa0, fa4 +; RV32IF-NEXT: fmv.w.x fa5, a1 +; RV32IF-NEXT: lui a1, 290816 ; RV32IF-NEXT: neg a0, a0 -; RV32IF-NEXT: fmin.s fa5, fa4, fa5 +; RV32IF-NEXT: addi a1, a1, -512 +; RV32IF-NEXT: fmax.s fa5, fa0, fa5 +; RV32IF-NEXT: fmv.w.x fa4, a1 +; RV32IF-NEXT: fmin.s fa5, fa5, fa4 ; RV32IF-NEXT: fcvt.w.s a1, fa5, rtz ; RV32IF-NEXT: and a0, a0, a1 ; RV32IF-NEXT: ret @@ -1419,13 +1422,14 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind { ; RV64IF-LABEL: fcvt_w_s_sat_i16: ; RV64IF: # %bb.0: # %start ; RV64IF-NEXT: feq.s a0, fa0, fa0 -; RV64IF-NEXT: lui a1, %hi(.LCPI24_0) -; RV64IF-NEXT: flw fa5, %lo(.LCPI24_0)(a1) ; RV64IF-NEXT: lui a1, 815104 -; RV64IF-NEXT: fmv.w.x fa4, a1 -; RV64IF-NEXT: fmax.s fa4, fa0, fa4 +; RV64IF-NEXT: fmv.w.x fa5, a1 +; RV64IF-NEXT: lui a1, 290816 ; RV64IF-NEXT: neg a0, a0 -; RV64IF-NEXT: fmin.s fa5, fa4, fa5 +; RV64IF-NEXT: addi a1, a1, -512 +; RV64IF-NEXT: fmax.s fa5, fa0, fa5 +; RV64IF-NEXT: fmv.w.x fa4, a1 +; RV64IF-NEXT: fmin.s fa5, fa5, fa4 ; RV64IF-NEXT: fcvt.l.s a1, fa5, rtz ; RV64IF-NEXT: and a0, a0, a1 ; RV64IF-NEXT: ret @@ -1590,21 +1594,23 @@ define zeroext i16 @fcvt_wu_s_i16(float %a) nounwind { define zeroext i16 @fcvt_wu_s_sat_i16(float %a) nounwind { ; RV32IF-LABEL: fcvt_wu_s_sat_i16: ; RV32IF: # %bb.0: # %start -; RV32IF-NEXT: lui a0, %hi(.LCPI26_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI26_0)(a0) -; RV32IF-NEXT: fmv.w.x fa4, zero -; RV32IF-NEXT: fmax.s fa4, fa0, fa4 -; RV32IF-NEXT: fmin.s fa5, fa4, fa5 +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: lui a0, 292864 +; RV32IF-NEXT: fmax.s fa5, fa0, fa5 +; RV32IF-NEXT: addi a0, a0, -256 +; RV32IF-NEXT: fmv.w.x fa4, a0 +; RV32IF-NEXT: fmin.s fa5, fa5, fa4 ; RV32IF-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcvt_wu_s_sat_i16: ; RV64IF: # %bb.0: # %start -; RV64IF-NEXT: lui a0, %hi(.LCPI26_0) -; RV64IF-NEXT: flw fa5, %lo(.LCPI26_0)(a0) -; RV64IF-NEXT: fmv.w.x fa4, zero -; RV64IF-NEXT: fmax.s fa4, fa0, fa4 -; RV64IF-NEXT: fmin.s fa5, fa4, fa5 +; RV64IF-NEXT: fmv.w.x fa5, zero +; RV64IF-NEXT: lui a0, 292864 +; RV64IF-NEXT: fmax.s fa5, fa0, fa5 +; RV64IF-NEXT: addi a0, a0, -256 +; RV64IF-NEXT: fmv.w.x fa4, a0 +; RV64IF-NEXT: fmin.s fa5, fa5, fa4 ; RV64IF-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64IF-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/float-imm.ll b/llvm/test/CodeGen/RISCV/float-imm.ll index a010ab49b2827..e4e34543d6314 100644 --- a/llvm/test/CodeGen/RISCV/float-imm.ll +++ b/llvm/test/CodeGen/RISCV/float-imm.ll @@ -12,8 +12,9 @@ define float @float_imm() nounwind { ; CHECK-LABEL: float_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flw fa0, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: lui a0, 263313 +; CHECK-NEXT: addi a0, a0, -37 +; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret ; ; CHECKZFINX-LABEL: float_imm: diff --git a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll index 809cc31abe612..6871f29cb8b05 100644 --- a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll @@ -60,8 +60,9 @@ define i64 @test_floor_si64(float %x) nounwind { ; RV32IF-NEXT: # %bb.3: ; RV32IF-NEXT: mv a2, a1 ; RV32IF-NEXT: .LBB1_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI1_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI1_0)(a1) +; RV32IF-NEXT: lui a1, 389120 +; RV32IF-NEXT: addi a1, a1, -1 +; RV32IF-NEXT: fmv.w.x fa5, a1 ; RV32IF-NEXT: flt.s a1, fa5, fs0 ; RV32IF-NEXT: beqz a1, .LBB1_6 ; RV32IF-NEXT: # %bb.5: @@ -196,10 +197,11 @@ define i64 @test_floor_ui64(float %x) nounwind { ; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI3_0)(a2) ; RV32IF-NEXT: and a0, s0, a0 +; RV32IF-NEXT: lui a2, 391168 ; RV32IF-NEXT: and a1, s0, a1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: fmv.w.x fa5, a2 ; RV32IF-NEXT: flt.s a2, fa5, fs0 ; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: or a0, a2, a0 @@ -318,8 +320,9 @@ define i64 @test_ceil_si64(float %x) nounwind { ; RV32IF-NEXT: # %bb.3: ; RV32IF-NEXT: mv a2, a1 ; RV32IF-NEXT: .LBB5_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI5_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI5_0)(a1) +; RV32IF-NEXT: lui a1, 389120 +; RV32IF-NEXT: addi a1, a1, -1 +; RV32IF-NEXT: fmv.w.x fa5, a1 ; RV32IF-NEXT: flt.s a1, fa5, fs0 ; RV32IF-NEXT: beqz a1, .LBB5_6 ; RV32IF-NEXT: # %bb.5: @@ -454,10 +457,11 @@ define i64 @test_ceil_ui64(float %x) nounwind { ; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI7_0)(a2) ; RV32IF-NEXT: and a0, s0, a0 +; RV32IF-NEXT: lui a2, 391168 ; RV32IF-NEXT: and a1, s0, a1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: fmv.w.x fa5, a2 ; RV32IF-NEXT: flt.s a2, fa5, fs0 ; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: or a0, a2, a0 @@ -576,8 +580,9 @@ define i64 @test_trunc_si64(float %x) nounwind { ; RV32IF-NEXT: # %bb.3: ; RV32IF-NEXT: mv a2, a1 ; RV32IF-NEXT: .LBB9_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI9_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI9_0)(a1) +; RV32IF-NEXT: lui a1, 389120 +; RV32IF-NEXT: addi a1, a1, -1 +; RV32IF-NEXT: fmv.w.x fa5, a1 ; RV32IF-NEXT: flt.s a1, fa5, fs0 ; RV32IF-NEXT: beqz a1, .LBB9_6 ; RV32IF-NEXT: # %bb.5: @@ -712,10 +717,11 @@ define i64 @test_trunc_ui64(float %x) nounwind { ; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI11_0)(a2) ; RV32IF-NEXT: and a0, s0, a0 +; RV32IF-NEXT: lui a2, 391168 ; RV32IF-NEXT: and a1, s0, a1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: fmv.w.x fa5, a2 ; RV32IF-NEXT: flt.s a2, fa5, fs0 ; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: or a0, a2, a0 @@ -834,8 +840,9 @@ define i64 @test_round_si64(float %x) nounwind { ; RV32IF-NEXT: # %bb.3: ; RV32IF-NEXT: mv a2, a1 ; RV32IF-NEXT: .LBB13_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI13_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI13_0)(a1) +; RV32IF-NEXT: lui a1, 389120 +; RV32IF-NEXT: addi a1, a1, -1 +; RV32IF-NEXT: fmv.w.x fa5, a1 ; RV32IF-NEXT: flt.s a1, fa5, fs0 ; RV32IF-NEXT: beqz a1, .LBB13_6 ; RV32IF-NEXT: # %bb.5: @@ -970,10 +977,11 @@ define i64 @test_round_ui64(float %x) nounwind { ; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI15_0)(a2) ; RV32IF-NEXT: and a0, s0, a0 +; RV32IF-NEXT: lui a2, 391168 ; RV32IF-NEXT: and a1, s0, a1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: fmv.w.x fa5, a2 ; RV32IF-NEXT: flt.s a2, fa5, fs0 ; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: or a0, a2, a0 @@ -1092,8 +1100,9 @@ define i64 @test_roundeven_si64(float %x) nounwind { ; RV32IF-NEXT: # %bb.3: ; RV32IF-NEXT: mv a2, a1 ; RV32IF-NEXT: .LBB17_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI17_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI17_0)(a1) +; RV32IF-NEXT: lui a1, 389120 +; RV32IF-NEXT: addi a1, a1, -1 +; RV32IF-NEXT: fmv.w.x fa5, a1 ; RV32IF-NEXT: flt.s a1, fa5, fs0 ; RV32IF-NEXT: beqz a1, .LBB17_6 ; RV32IF-NEXT: # %bb.5: @@ -1228,10 +1237,11 @@ define i64 @test_roundeven_ui64(float %x) nounwind { ; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI19_0)(a2) ; RV32IF-NEXT: and a0, s0, a0 +; RV32IF-NEXT: lui a2, 391168 ; RV32IF-NEXT: and a1, s0, a1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: fmv.w.x fa5, a2 ; RV32IF-NEXT: flt.s a2, fa5, fs0 ; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: or a0, a2, a0 @@ -1350,8 +1360,9 @@ define i64 @test_rint_si64(float %x) nounwind { ; RV32IF-NEXT: # %bb.3: ; RV32IF-NEXT: mv a2, a1 ; RV32IF-NEXT: .LBB21_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI21_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI21_0)(a1) +; RV32IF-NEXT: lui a1, 389120 +; RV32IF-NEXT: addi a1, a1, -1 +; RV32IF-NEXT: fmv.w.x fa5, a1 ; RV32IF-NEXT: flt.s a1, fa5, fs0 ; RV32IF-NEXT: beqz a1, .LBB21_6 ; RV32IF-NEXT: # %bb.5: @@ -1486,10 +1497,11 @@ define i64 @test_rint_ui64(float %x) nounwind { ; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI23_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI23_0)(a2) ; RV32IF-NEXT: and a0, s0, a0 +; RV32IF-NEXT: lui a2, 391168 ; RV32IF-NEXT: and a1, s0, a1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: fmv.w.x fa5, a2 ; RV32IF-NEXT: flt.s a2, fa5, fs0 ; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: or a0, a2, a0 diff --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll index 84163b52bb98d..2ebb6e9b97a4d 100644 --- a/llvm/test/CodeGen/RISCV/half-arith.ll +++ b/llvm/test/CodeGen/RISCV/half-arith.ll @@ -2883,39 +2883,20 @@ define half @fsgnjx_f16(half %x, half %y) nounwind { ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; -; RV32IZFHMIN-LABEL: fsgnjx_f16: -; RV32IZFHMIN: # %bb.0: -; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI23_0) -; RV32IZFHMIN-NEXT: lhu a0, %lo(.LCPI23_0)(a0) -; RV32IZFHMIN-NEXT: fmv.x.h a1, fa0 -; RV32IZFHMIN-NEXT: lui a2, 1048568 -; RV32IZFHMIN-NEXT: and a1, a1, a2 -; RV32IZFHMIN-NEXT: slli a0, a0, 17 -; RV32IZFHMIN-NEXT: srli a0, a0, 17 -; RV32IZFHMIN-NEXT: or a0, a0, a1 -; RV32IZFHMIN-NEXT: fmv.h.x fa5, a0 -; RV32IZFHMIN-NEXT: fcvt.s.h fa5, fa5 -; RV32IZFHMIN-NEXT: fcvt.s.h fa4, fa1 -; RV32IZFHMIN-NEXT: fmul.s fa5, fa5, fa4 -; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa5 -; RV32IZFHMIN-NEXT: ret -; -; RV64IZFHMIN-LABEL: fsgnjx_f16: -; RV64IZFHMIN: # %bb.0: -; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI23_0) -; RV64IZFHMIN-NEXT: lhu a0, %lo(.LCPI23_0)(a0) -; RV64IZFHMIN-NEXT: fmv.x.h a1, fa0 -; RV64IZFHMIN-NEXT: lui a2, 1048568 -; RV64IZFHMIN-NEXT: and a1, a1, a2 -; RV64IZFHMIN-NEXT: slli a0, a0, 49 -; RV64IZFHMIN-NEXT: srli a0, a0, 49 -; RV64IZFHMIN-NEXT: or a0, a0, a1 -; RV64IZFHMIN-NEXT: fmv.h.x fa5, a0 -; RV64IZFHMIN-NEXT: fcvt.s.h fa5, fa5 -; RV64IZFHMIN-NEXT: fcvt.s.h fa4, fa1 -; RV64IZFHMIN-NEXT: fmul.s fa5, fa5, fa4 -; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa5 -; RV64IZFHMIN-NEXT: ret +; CHECKIZFHMIN-LABEL: fsgnjx_f16: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fmv.x.h a0, fa0 +; CHECKIZFHMIN-NEXT: lui a1, 1048568 +; CHECKIZFHMIN-NEXT: and a0, a0, a1 +; CHECKIZFHMIN-NEXT: li a1, 15 +; CHECKIZFHMIN-NEXT: slli a1, a1, 10 +; CHECKIZFHMIN-NEXT: or a0, a0, a1 +; CHECKIZFHMIN-NEXT: fmv.h.x fa5, a0 +; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa5 +; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa1 +; CHECKIZFHMIN-NEXT: fmul.s fa5, fa5, fa4 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5 +; CHECKIZFHMIN-NEXT: ret ; ; CHECKIZHINXMIN-LABEL: fsgnjx_f16: ; CHECKIZHINXMIN: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll index 6cebf8b2828bf..c3c06e192f76f 100644 --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -194,13 +194,14 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_si_h_sat: ; RV32IZFH: # %bb.0: # %start ; RV32IZFH-NEXT: fcvt.s.h fa5, fa0 -; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_0) -; RV32IZFH-NEXT: feq.s a1, fa5, fa5 -; RV32IZFH-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; RV32IZFH-NEXT: lui a0, 815104 -; RV32IZFH-NEXT: fmv.w.x fa3, a0 -; RV32IZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV32IZFH-NEXT: neg a0, a1 +; RV32IZFH-NEXT: lui a1, 290816 +; RV32IZFH-NEXT: fmv.w.x fa4, a0 +; RV32IZFH-NEXT: feq.s a0, fa5, fa5 +; RV32IZFH-NEXT: addi a1, a1, -512 +; RV32IZFH-NEXT: neg a0, a0 +; RV32IZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV32IZFH-NEXT: fmv.w.x fa4, a1 ; RV32IZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV32IZFH-NEXT: fcvt.w.s a1, fa5, rtz ; RV32IZFH-NEXT: and a0, a0, a1 @@ -209,13 +210,14 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV64IZFH-LABEL: fcvt_si_h_sat: ; RV64IZFH: # %bb.0: # %start ; RV64IZFH-NEXT: fcvt.s.h fa5, fa0 -; RV64IZFH-NEXT: lui a0, %hi(.LCPI1_0) -; RV64IZFH-NEXT: feq.s a1, fa5, fa5 -; RV64IZFH-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; RV64IZFH-NEXT: lui a0, 815104 -; RV64IZFH-NEXT: fmv.w.x fa3, a0 -; RV64IZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV64IZFH-NEXT: neg a0, a1 +; RV64IZFH-NEXT: lui a1, 290816 +; RV64IZFH-NEXT: fmv.w.x fa4, a0 +; RV64IZFH-NEXT: feq.s a0, fa5, fa5 +; RV64IZFH-NEXT: addi a1, a1, -512 +; RV64IZFH-NEXT: neg a0, a0 +; RV64IZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV64IZFH-NEXT: fmv.w.x fa4, a1 ; RV64IZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IZFH-NEXT: fcvt.l.s a1, fa5, rtz ; RV64IZFH-NEXT: and a0, a0, a1 @@ -224,13 +226,14 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV32IDZFH-LABEL: fcvt_si_h_sat: ; RV32IDZFH: # %bb.0: # %start ; RV32IDZFH-NEXT: fcvt.s.h fa5, fa0 -; RV32IDZFH-NEXT: lui a0, %hi(.LCPI1_0) -; RV32IDZFH-NEXT: feq.s a1, fa5, fa5 -; RV32IDZFH-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; RV32IDZFH-NEXT: lui a0, 815104 -; RV32IDZFH-NEXT: fmv.w.x fa3, a0 -; RV32IDZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV32IDZFH-NEXT: neg a0, a1 +; RV32IDZFH-NEXT: lui a1, 290816 +; RV32IDZFH-NEXT: fmv.w.x fa4, a0 +; RV32IDZFH-NEXT: feq.s a0, fa5, fa5 +; RV32IDZFH-NEXT: addi a1, a1, -512 +; RV32IDZFH-NEXT: neg a0, a0 +; RV32IDZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV32IDZFH-NEXT: fmv.w.x fa4, a1 ; RV32IDZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV32IDZFH-NEXT: fcvt.w.s a1, fa5, rtz ; RV32IDZFH-NEXT: and a0, a0, a1 @@ -239,13 +242,14 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV64IDZFH-LABEL: fcvt_si_h_sat: ; RV64IDZFH: # %bb.0: # %start ; RV64IDZFH-NEXT: fcvt.s.h fa5, fa0 -; RV64IDZFH-NEXT: lui a0, %hi(.LCPI1_0) -; RV64IDZFH-NEXT: feq.s a1, fa5, fa5 -; RV64IDZFH-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; RV64IDZFH-NEXT: lui a0, 815104 -; RV64IDZFH-NEXT: fmv.w.x fa3, a0 -; RV64IDZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV64IDZFH-NEXT: neg a0, a1 +; RV64IDZFH-NEXT: lui a1, 290816 +; RV64IDZFH-NEXT: fmv.w.x fa4, a0 +; RV64IDZFH-NEXT: feq.s a0, fa5, fa5 +; RV64IDZFH-NEXT: addi a1, a1, -512 +; RV64IDZFH-NEXT: neg a0, a0 +; RV64IDZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV64IDZFH-NEXT: fmv.w.x fa4, a1 ; RV64IDZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IDZFH-NEXT: fcvt.l.s a1, fa5, rtz ; RV64IDZFH-NEXT: and a0, a0, a1 @@ -399,13 +403,14 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32ID-ILP32-NEXT: call __extendhfsf2 ; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0 -; RV32ID-ILP32-NEXT: lui a0, %hi(.LCPI1_0) -; RV32ID-ILP32-NEXT: feq.s a1, fa5, fa5 -; RV32ID-ILP32-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; RV32ID-ILP32-NEXT: lui a0, 815104 -; RV32ID-ILP32-NEXT: fmv.w.x fa3, a0 -; RV32ID-ILP32-NEXT: fmax.s fa5, fa5, fa3 -; RV32ID-ILP32-NEXT: neg a0, a1 +; RV32ID-ILP32-NEXT: lui a1, 290816 +; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 +; RV32ID-ILP32-NEXT: feq.s a0, fa5, fa5 +; RV32ID-ILP32-NEXT: addi a1, a1, -512 +; RV32ID-ILP32-NEXT: neg a0, a0 +; RV32ID-ILP32-NEXT: fmax.s fa5, fa5, fa4 +; RV32ID-ILP32-NEXT: fmv.w.x fa4, a1 ; RV32ID-ILP32-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-ILP32-NEXT: fcvt.w.s a1, fa5, rtz ; RV32ID-ILP32-NEXT: and a0, a0, a1 @@ -419,13 +424,14 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-LP64-NEXT: call __extendhfsf2 ; RV64ID-LP64-NEXT: fmv.w.x fa5, a0 -; RV64ID-LP64-NEXT: lui a0, %hi(.LCPI1_0) -; RV64ID-LP64-NEXT: feq.s a1, fa5, fa5 -; RV64ID-LP64-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; RV64ID-LP64-NEXT: lui a0, 815104 -; RV64ID-LP64-NEXT: fmv.w.x fa3, a0 -; RV64ID-LP64-NEXT: fmax.s fa5, fa5, fa3 -; RV64ID-LP64-NEXT: neg a0, a1 +; RV64ID-LP64-NEXT: lui a1, 290816 +; RV64ID-LP64-NEXT: fmv.w.x fa4, a0 +; RV64ID-LP64-NEXT: feq.s a0, fa5, fa5 +; RV64ID-LP64-NEXT: addi a1, a1, -512 +; RV64ID-LP64-NEXT: neg a0, a0 +; RV64ID-LP64-NEXT: fmax.s fa5, fa5, fa4 +; RV64ID-LP64-NEXT: fmv.w.x fa4, a1 ; RV64ID-LP64-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-LP64-NEXT: fcvt.l.s a1, fa5, rtz ; RV64ID-LP64-NEXT: and a0, a0, a1 @@ -439,13 +445,14 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32ID-NEXT: call __extendhfsf2 ; RV32ID-NEXT: feq.s a0, fa0, fa0 -; RV32ID-NEXT: lui a1, %hi(.LCPI1_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI1_0)(a1) ; RV32ID-NEXT: lui a1, 815104 -; RV32ID-NEXT: fmv.w.x fa4, a1 -; RV32ID-NEXT: fmax.s fa4, fa0, fa4 +; RV32ID-NEXT: fmv.w.x fa5, a1 +; RV32ID-NEXT: lui a1, 290816 ; RV32ID-NEXT: neg a0, a0 -; RV32ID-NEXT: fmin.s fa5, fa4, fa5 +; RV32ID-NEXT: addi a1, a1, -512 +; RV32ID-NEXT: fmax.s fa5, fa0, fa5 +; RV32ID-NEXT: fmv.w.x fa4, a1 +; RV32ID-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-NEXT: fcvt.w.s a1, fa5, rtz ; RV32ID-NEXT: and a0, a0, a1 ; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -458,13 +465,14 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-NEXT: call __extendhfsf2 ; RV64ID-NEXT: feq.s a0, fa0, fa0 -; RV64ID-NEXT: lui a1, %hi(.LCPI1_0) -; RV64ID-NEXT: flw fa5, %lo(.LCPI1_0)(a1) ; RV64ID-NEXT: lui a1, 815104 -; RV64ID-NEXT: fmv.w.x fa4, a1 -; RV64ID-NEXT: fmax.s fa4, fa0, fa4 +; RV64ID-NEXT: fmv.w.x fa5, a1 +; RV64ID-NEXT: lui a1, 290816 ; RV64ID-NEXT: neg a0, a0 -; RV64ID-NEXT: fmin.s fa5, fa4, fa5 +; RV64ID-NEXT: addi a1, a1, -512 +; RV64ID-NEXT: fmax.s fa5, fa0, fa5 +; RV64ID-NEXT: fmv.w.x fa4, a1 +; RV64ID-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-NEXT: fcvt.l.s a1, fa5, rtz ; RV64ID-NEXT: and a0, a0, a1 ; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -474,13 +482,14 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; CHECK32-IZFHMIN-LABEL: fcvt_si_h_sat: ; CHECK32-IZFHMIN: # %bb.0: # %start ; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa5, fa0 -; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK32-IZFHMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK32-IZFHMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; CHECK32-IZFHMIN-NEXT: lui a0, 815104 -; CHECK32-IZFHMIN-NEXT: fmv.w.x fa3, a0 -; CHECK32-IZFHMIN-NEXT: fmax.s fa5, fa5, fa3 -; CHECK32-IZFHMIN-NEXT: neg a0, a1 +; CHECK32-IZFHMIN-NEXT: lui a1, 290816 +; CHECK32-IZFHMIN-NEXT: fmv.w.x fa4, a0 +; CHECK32-IZFHMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK32-IZFHMIN-NEXT: addi a1, a1, -512 +; CHECK32-IZFHMIN-NEXT: neg a0, a0 +; CHECK32-IZFHMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK32-IZFHMIN-NEXT: fmv.w.x fa4, a1 ; CHECK32-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK32-IZFHMIN-NEXT: fcvt.w.s a1, fa5, rtz ; CHECK32-IZFHMIN-NEXT: and a0, a0, a1 @@ -489,13 +498,14 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; CHECK64-IZFHMIN-LABEL: fcvt_si_h_sat: ; CHECK64-IZFHMIN: # %bb.0: # %start ; CHECK64-IZFHMIN-NEXT: fcvt.s.h fa5, fa0 -; CHECK64-IZFHMIN-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK64-IZFHMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK64-IZFHMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; CHECK64-IZFHMIN-NEXT: lui a0, 815104 -; CHECK64-IZFHMIN-NEXT: fmv.w.x fa3, a0 -; CHECK64-IZFHMIN-NEXT: fmax.s fa5, fa5, fa3 -; CHECK64-IZFHMIN-NEXT: neg a0, a1 +; CHECK64-IZFHMIN-NEXT: lui a1, 290816 +; CHECK64-IZFHMIN-NEXT: fmv.w.x fa4, a0 +; CHECK64-IZFHMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK64-IZFHMIN-NEXT: addi a1, a1, -512 +; CHECK64-IZFHMIN-NEXT: neg a0, a0 +; CHECK64-IZFHMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK64-IZFHMIN-NEXT: fmv.w.x fa4, a1 ; CHECK64-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK64-IZFHMIN-NEXT: fcvt.l.s a1, fa5, rtz ; CHECK64-IZFHMIN-NEXT: and a0, a0, a1 @@ -711,45 +721,49 @@ define i16 @fcvt_ui_h(half %a) nounwind { define i16 @fcvt_ui_h_sat(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_ui_h_sat: ; RV32IZFH: # %bb.0: # %start -; RV32IZFH-NEXT: lui a0, %hi(.LCPI3_0) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; RV32IZFH-NEXT: fcvt.s.h fa4, fa0 -; RV32IZFH-NEXT: fmv.w.x fa3, zero -; RV32IZFH-NEXT: fmax.s fa4, fa4, fa3 -; RV32IZFH-NEXT: fmin.s fa5, fa4, fa5 +; RV32IZFH-NEXT: fcvt.s.h fa5, fa0 +; RV32IZFH-NEXT: fmv.w.x fa4, zero +; RV32IZFH-NEXT: lui a0, 292864 +; RV32IZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV32IZFH-NEXT: addi a0, a0, -256 +; RV32IZFH-NEXT: fmv.w.x fa4, a0 +; RV32IZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV32IZFH-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcvt_ui_h_sat: ; RV64IZFH: # %bb.0: # %start -; RV64IZFH-NEXT: lui a0, %hi(.LCPI3_0) -; RV64IZFH-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; RV64IZFH-NEXT: fcvt.s.h fa4, fa0 -; RV64IZFH-NEXT: fmv.w.x fa3, zero -; RV64IZFH-NEXT: fmax.s fa4, fa4, fa3 -; RV64IZFH-NEXT: fmin.s fa5, fa4, fa5 +; RV64IZFH-NEXT: fcvt.s.h fa5, fa0 +; RV64IZFH-NEXT: fmv.w.x fa4, zero +; RV64IZFH-NEXT: lui a0, 292864 +; RV64IZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV64IZFH-NEXT: addi a0, a0, -256 +; RV64IZFH-NEXT: fmv.w.x fa4, a0 +; RV64IZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IZFH-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: fcvt_ui_h_sat: ; RV32IDZFH: # %bb.0: # %start -; RV32IDZFH-NEXT: lui a0, %hi(.LCPI3_0) -; RV32IDZFH-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; RV32IDZFH-NEXT: fcvt.s.h fa4, fa0 -; RV32IDZFH-NEXT: fmv.w.x fa3, zero -; RV32IDZFH-NEXT: fmax.s fa4, fa4, fa3 -; RV32IDZFH-NEXT: fmin.s fa5, fa4, fa5 +; RV32IDZFH-NEXT: fcvt.s.h fa5, fa0 +; RV32IDZFH-NEXT: fmv.w.x fa4, zero +; RV32IDZFH-NEXT: lui a0, 292864 +; RV32IDZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV32IDZFH-NEXT: addi a0, a0, -256 +; RV32IDZFH-NEXT: fmv.w.x fa4, a0 +; RV32IDZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV32IDZFH-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_ui_h_sat: ; RV64IDZFH: # %bb.0: # %start -; RV64IDZFH-NEXT: lui a0, %hi(.LCPI3_0) -; RV64IDZFH-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; RV64IDZFH-NEXT: fcvt.s.h fa4, fa0 -; RV64IDZFH-NEXT: fmv.w.x fa3, zero -; RV64IDZFH-NEXT: fmax.s fa4, fa4, fa3 -; RV64IDZFH-NEXT: fmin.s fa5, fa4, fa5 +; RV64IDZFH-NEXT: fcvt.s.h fa5, fa0 +; RV64IDZFH-NEXT: fmv.w.x fa4, zero +; RV64IDZFH-NEXT: lui a0, 292864 +; RV64IDZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV64IDZFH-NEXT: addi a0, a0, -256 +; RV64IDZFH-NEXT: fmv.w.x fa4, a0 +; RV64IDZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IDZFH-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64IDZFH-NEXT: ret ; @@ -874,12 +888,13 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind { ; RV32ID-ILP32-NEXT: addi sp, sp, -16 ; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32ID-ILP32-NEXT: call __extendhfsf2 -; RV32ID-ILP32-NEXT: lui a1, %hi(.LCPI3_0) -; RV32ID-ILP32-NEXT: flw fa5, %lo(.LCPI3_0)(a1) +; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0 +; RV32ID-ILP32-NEXT: fmv.w.x fa4, zero +; RV32ID-ILP32-NEXT: lui a0, 292864 +; RV32ID-ILP32-NEXT: fmax.s fa5, fa5, fa4 +; RV32ID-ILP32-NEXT: addi a0, a0, -256 ; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 -; RV32ID-ILP32-NEXT: fmv.w.x fa3, zero -; RV32ID-ILP32-NEXT: fmax.s fa4, fa4, fa3 -; RV32ID-ILP32-NEXT: fmin.s fa5, fa4, fa5 +; RV32ID-ILP32-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-ILP32-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32ID-ILP32-NEXT: addi sp, sp, 16 @@ -890,12 +905,13 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind { ; RV64ID-LP64-NEXT: addi sp, sp, -16 ; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-LP64-NEXT: call __extendhfsf2 -; RV64ID-LP64-NEXT: lui a1, %hi(.LCPI3_0) -; RV64ID-LP64-NEXT: flw fa5, %lo(.LCPI3_0)(a1) +; RV64ID-LP64-NEXT: fmv.w.x fa5, a0 +; RV64ID-LP64-NEXT: fmv.w.x fa4, zero +; RV64ID-LP64-NEXT: lui a0, 292864 +; RV64ID-LP64-NEXT: fmax.s fa5, fa5, fa4 +; RV64ID-LP64-NEXT: addi a0, a0, -256 ; RV64ID-LP64-NEXT: fmv.w.x fa4, a0 -; RV64ID-LP64-NEXT: fmv.w.x fa3, zero -; RV64ID-LP64-NEXT: fmax.s fa4, fa4, fa3 -; RV64ID-LP64-NEXT: fmin.s fa5, fa4, fa5 +; RV64ID-LP64-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-LP64-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64ID-LP64-NEXT: addi sp, sp, 16 @@ -906,11 +922,12 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind { ; RV32ID-NEXT: addi sp, sp, -16 ; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32ID-NEXT: call __extendhfsf2 -; RV32ID-NEXT: lui a0, %hi(.LCPI3_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; RV32ID-NEXT: fmv.w.x fa4, zero -; RV32ID-NEXT: fmax.s fa4, fa0, fa4 -; RV32ID-NEXT: fmin.s fa5, fa4, fa5 +; RV32ID-NEXT: fmv.w.x fa5, zero +; RV32ID-NEXT: lui a0, 292864 +; RV32ID-NEXT: fmax.s fa5, fa0, fa5 +; RV32ID-NEXT: addi a0, a0, -256 +; RV32ID-NEXT: fmv.w.x fa4, a0 +; RV32ID-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32ID-NEXT: addi sp, sp, 16 @@ -921,11 +938,12 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind { ; RV64ID-NEXT: addi sp, sp, -16 ; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-NEXT: call __extendhfsf2 -; RV64ID-NEXT: lui a0, %hi(.LCPI3_0) -; RV64ID-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; RV64ID-NEXT: fmv.w.x fa4, zero -; RV64ID-NEXT: fmax.s fa4, fa0, fa4 -; RV64ID-NEXT: fmin.s fa5, fa4, fa5 +; RV64ID-NEXT: fmv.w.x fa5, zero +; RV64ID-NEXT: lui a0, 292864 +; RV64ID-NEXT: fmax.s fa5, fa0, fa5 +; RV64ID-NEXT: addi a0, a0, -256 +; RV64ID-NEXT: fmv.w.x fa4, a0 +; RV64ID-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64ID-NEXT: addi sp, sp, 16 @@ -933,23 +951,25 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind { ; ; CHECK32-IZFHMIN-LABEL: fcvt_ui_h_sat: ; CHECK32-IZFHMIN: # %bb.0: # %start -; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK32-IZFHMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa4, fa0 -; CHECK32-IZFHMIN-NEXT: fmv.w.x fa3, zero -; CHECK32-IZFHMIN-NEXT: fmax.s fa4, fa4, fa3 -; CHECK32-IZFHMIN-NEXT: fmin.s fa5, fa4, fa5 +; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa5, fa0 +; CHECK32-IZFHMIN-NEXT: fmv.w.x fa4, zero +; CHECK32-IZFHMIN-NEXT: lui a0, 292864 +; CHECK32-IZFHMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK32-IZFHMIN-NEXT: addi a0, a0, -256 +; CHECK32-IZFHMIN-NEXT: fmv.w.x fa4, a0 +; CHECK32-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK32-IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; CHECK32-IZFHMIN-NEXT: ret ; ; CHECK64-IZFHMIN-LABEL: fcvt_ui_h_sat: ; CHECK64-IZFHMIN: # %bb.0: # %start -; CHECK64-IZFHMIN-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK64-IZFHMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; CHECK64-IZFHMIN-NEXT: fcvt.s.h fa4, fa0 -; CHECK64-IZFHMIN-NEXT: fmv.w.x fa3, zero -; CHECK64-IZFHMIN-NEXT: fmax.s fa4, fa4, fa3 -; CHECK64-IZFHMIN-NEXT: fmin.s fa5, fa4, fa5 +; CHECK64-IZFHMIN-NEXT: fcvt.s.h fa5, fa0 +; CHECK64-IZFHMIN-NEXT: fmv.w.x fa4, zero +; CHECK64-IZFHMIN-NEXT: lui a0, 292864 +; CHECK64-IZFHMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK64-IZFHMIN-NEXT: addi a0, a0, -256 +; CHECK64-IZFHMIN-NEXT: fmv.w.x fa4, a0 +; CHECK64-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK64-IZFHMIN-NEXT: fcvt.lu.s a0, fa5, rtz ; CHECK64-IZFHMIN-NEXT: ret ; @@ -2159,20 +2179,21 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IZFH-NEXT: # %bb.1: # %start ; RV32IZFH-NEXT: mv a2, a1 ; RV32IZFH-NEXT: .LBB10_2: # %start -; RV32IZFH-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a1) +; RV32IZFH-NEXT: lui a1, 389120 +; RV32IZFH-NEXT: addi a1, a1, -1 +; RV32IZFH-NEXT: fmv.w.x fa5, a1 ; RV32IZFH-NEXT: flt.s a1, fa5, fs0 ; RV32IZFH-NEXT: beqz a1, .LBB10_4 ; RV32IZFH-NEXT: # %bb.3: ; RV32IZFH-NEXT: addi a2, a3, -1 ; RV32IZFH-NEXT: .LBB10_4: # %start ; RV32IZFH-NEXT: feq.s a3, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: neg a1, s0 +; RV32IZFH-NEXT: neg a4, s0 +; RV32IZFH-NEXT: neg a5, a1 ; RV32IZFH-NEXT: neg a3, a3 -; RV32IZFH-NEXT: and a0, a1, a0 +; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: and a1, a3, a2 -; RV32IZFH-NEXT: or a0, a4, a0 +; RV32IZFH-NEXT: or a0, a5, a0 ; RV32IZFH-NEXT: and a0, a3, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2207,20 +2228,21 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IDZFH-NEXT: # %bb.1: # %start ; RV32IDZFH-NEXT: mv a2, a1 ; RV32IDZFH-NEXT: .LBB10_2: # %start -; RV32IDZFH-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IDZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a1) +; RV32IDZFH-NEXT: lui a1, 389120 +; RV32IDZFH-NEXT: addi a1, a1, -1 +; RV32IDZFH-NEXT: fmv.w.x fa5, a1 ; RV32IDZFH-NEXT: flt.s a1, fa5, fs0 ; RV32IDZFH-NEXT: beqz a1, .LBB10_4 ; RV32IDZFH-NEXT: # %bb.3: ; RV32IDZFH-NEXT: addi a2, a3, -1 ; RV32IDZFH-NEXT: .LBB10_4: # %start ; RV32IDZFH-NEXT: feq.s a3, fs0, fs0 -; RV32IDZFH-NEXT: neg a4, a1 -; RV32IDZFH-NEXT: neg a1, s0 +; RV32IDZFH-NEXT: neg a4, s0 +; RV32IDZFH-NEXT: neg a5, a1 ; RV32IDZFH-NEXT: neg a3, a3 -; RV32IDZFH-NEXT: and a0, a1, a0 +; RV32IDZFH-NEXT: and a0, a4, a0 ; RV32IDZFH-NEXT: and a1, a3, a2 -; RV32IDZFH-NEXT: or a0, a4, a0 +; RV32IDZFH-NEXT: or a0, a5, a0 ; RV32IDZFH-NEXT: and a0, a3, a0 ; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IDZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2450,8 +2472,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32ID-ILP32-NEXT: # %bb.1: # %start ; RV32ID-ILP32-NEXT: mv a2, a1 ; RV32ID-ILP32-NEXT: .LBB10_2: # %start -; RV32ID-ILP32-NEXT: lui a1, %hi(.LCPI10_0) -; RV32ID-ILP32-NEXT: flw fa5, %lo(.LCPI10_0)(a1) +; RV32ID-ILP32-NEXT: lui a1, 389120 +; RV32ID-ILP32-NEXT: addi a1, a1, -1 +; RV32ID-ILP32-NEXT: fmv.w.x fa5, a1 ; RV32ID-ILP32-NEXT: flw fa4, 4(sp) # 4-byte Folded Reload ; RV32ID-ILP32-NEXT: flt.s a1, fa5, fa4 ; RV32ID-ILP32-NEXT: fmv.s fa5, fa4 @@ -2505,8 +2528,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32ID-NEXT: # %bb.1: # %start ; RV32ID-NEXT: mv a2, a1 ; RV32ID-NEXT: .LBB10_2: # %start -; RV32ID-NEXT: lui a1, %hi(.LCPI10_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1) +; RV32ID-NEXT: lui a1, 389120 +; RV32ID-NEXT: addi a1, a1, -1 +; RV32ID-NEXT: fmv.w.x fa5, a1 ; RV32ID-NEXT: flt.s a1, fa5, fs0 ; RV32ID-NEXT: beqz a1, .LBB10_4 ; RV32ID-NEXT: # %bb.3: @@ -2558,20 +2582,21 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IFZFHMIN-NEXT: # %bb.1: # %start ; RV32IFZFHMIN-NEXT: mv a2, a1 ; RV32IFZFHMIN-NEXT: .LBB10_2: # %start -; RV32IFZFHMIN-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IFZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) +; RV32IFZFHMIN-NEXT: lui a1, 389120 +; RV32IFZFHMIN-NEXT: addi a1, a1, -1 +; RV32IFZFHMIN-NEXT: fmv.w.x fa5, a1 ; RV32IFZFHMIN-NEXT: flt.s a1, fa5, fs0 ; RV32IFZFHMIN-NEXT: beqz a1, .LBB10_4 ; RV32IFZFHMIN-NEXT: # %bb.3: ; RV32IFZFHMIN-NEXT: addi a2, a3, -1 ; RV32IFZFHMIN-NEXT: .LBB10_4: # %start ; RV32IFZFHMIN-NEXT: feq.s a3, fs0, fs0 -; RV32IFZFHMIN-NEXT: neg a4, a1 -; RV32IFZFHMIN-NEXT: neg a1, s0 +; RV32IFZFHMIN-NEXT: neg a4, s0 +; RV32IFZFHMIN-NEXT: neg a5, a1 ; RV32IFZFHMIN-NEXT: neg a3, a3 -; RV32IFZFHMIN-NEXT: and a0, a1, a0 +; RV32IFZFHMIN-NEXT: and a0, a4, a0 ; RV32IFZFHMIN-NEXT: and a1, a3, a2 -; RV32IFZFHMIN-NEXT: or a0, a4, a0 +; RV32IFZFHMIN-NEXT: or a0, a5, a0 ; RV32IFZFHMIN-NEXT: and a0, a3, a0 ; RV32IFZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2607,20 +2632,21 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IDZFHMIN-NEXT: # %bb.1: # %start ; RV32IDZFHMIN-NEXT: mv a2, a1 ; RV32IDZFHMIN-NEXT: .LBB10_2: # %start -; RV32IDZFHMIN-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IDZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) +; RV32IDZFHMIN-NEXT: lui a1, 389120 +; RV32IDZFHMIN-NEXT: addi a1, a1, -1 +; RV32IDZFHMIN-NEXT: fmv.w.x fa5, a1 ; RV32IDZFHMIN-NEXT: flt.s a1, fa5, fs0 ; RV32IDZFHMIN-NEXT: beqz a1, .LBB10_4 ; RV32IDZFHMIN-NEXT: # %bb.3: ; RV32IDZFHMIN-NEXT: addi a2, a3, -1 ; RV32IDZFHMIN-NEXT: .LBB10_4: # %start ; RV32IDZFHMIN-NEXT: feq.s a3, fs0, fs0 -; RV32IDZFHMIN-NEXT: neg a4, a1 -; RV32IDZFHMIN-NEXT: neg a1, s0 +; RV32IDZFHMIN-NEXT: neg a4, s0 +; RV32IDZFHMIN-NEXT: neg a5, a1 ; RV32IDZFHMIN-NEXT: neg a3, a3 -; RV32IDZFHMIN-NEXT: and a0, a1, a0 +; RV32IDZFHMIN-NEXT: and a0, a4, a0 ; RV32IDZFHMIN-NEXT: and a1, a3, a2 -; RV32IDZFHMIN-NEXT: or a0, a4, a0 +; RV32IDZFHMIN-NEXT: or a0, a5, a0 ; RV32IDZFHMIN-NEXT: and a0, a3, a0 ; RV32IDZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IDZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2903,23 +2929,25 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: lui a0, %hi(.LCPI12_0) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI12_0)(a0) -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa4, zero -; RV32IZFH-NEXT: fle.s a0, fa4, fa0 -; RV32IZFH-NEXT: flt.s a1, fa5, fa0 -; RV32IZFH-NEXT: neg s0, a1 -; RV32IZFH-NEXT: neg s1, a0 +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a0, fa5, fs0 +; RV32IZFH-NEXT: neg s0, a0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: and a0, s1, a0 -; RV32IZFH-NEXT: and a1, s1, a1 -; RV32IZFH-NEXT: or a0, s0, a0 -; RV32IZFH-NEXT: or a1, s0, a1 +; RV32IZFH-NEXT: and a0, s0, a0 +; RV32IZFH-NEXT: lui a2, 391168 +; RV32IZFH-NEXT: and a1, s0, a1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: fmv.w.x fa5, a2 +; RV32IZFH-NEXT: flt.s a2, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: or a1, a2, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -2937,23 +2965,25 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IDZFH-NEXT: addi sp, sp, -16 ; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IDZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IDZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IDZFH-NEXT: lui a0, %hi(.LCPI12_0) -; RV32IDZFH-NEXT: flw fa5, %lo(.LCPI12_0)(a0) -; RV32IDZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IDZFH-NEXT: fmv.w.x fa4, zero -; RV32IDZFH-NEXT: fle.s a0, fa4, fa0 -; RV32IDZFH-NEXT: flt.s a1, fa5, fa0 -; RV32IDZFH-NEXT: neg s0, a1 -; RV32IDZFH-NEXT: neg s1, a0 +; RV32IDZFH-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IDZFH-NEXT: fcvt.s.h fs0, fa0 +; RV32IDZFH-NEXT: fmv.w.x fa5, zero +; RV32IDZFH-NEXT: fle.s a0, fa5, fs0 +; RV32IDZFH-NEXT: neg s0, a0 +; RV32IDZFH-NEXT: fmv.s fa0, fs0 ; RV32IDZFH-NEXT: call __fixunssfdi -; RV32IDZFH-NEXT: and a0, s1, a0 -; RV32IDZFH-NEXT: and a1, s1, a1 -; RV32IDZFH-NEXT: or a0, s0, a0 -; RV32IDZFH-NEXT: or a1, s0, a1 +; RV32IDZFH-NEXT: and a0, s0, a0 +; RV32IDZFH-NEXT: lui a2, 391168 +; RV32IDZFH-NEXT: and a1, s0, a1 +; RV32IDZFH-NEXT: addi a2, a2, -1 +; RV32IDZFH-NEXT: fmv.w.x fa5, a2 +; RV32IDZFH-NEXT: flt.s a2, fa5, fs0 +; RV32IDZFH-NEXT: neg a2, a2 +; RV32IDZFH-NEXT: or a0, a2, a0 +; RV32IDZFH-NEXT: or a1, a2, a1 ; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IDZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IDZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; RV32IDZFH-NEXT: addi sp, sp, 16 ; RV32IDZFH-NEXT: ret ; @@ -3105,14 +3135,15 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32ID-ILP32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32ID-ILP32-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32ID-ILP32-NEXT: call __extendhfsf2 -; RV32ID-ILP32-NEXT: lui a1, %hi(.LCPI12_0) -; RV32ID-ILP32-NEXT: flw fa5, %lo(.LCPI12_0)(a1) -; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 -; RV32ID-ILP32-NEXT: fmv.w.x fa3, zero -; RV32ID-ILP32-NEXT: fle.s a1, fa3, fa4 -; RV32ID-ILP32-NEXT: flt.s a2, fa5, fa4 -; RV32ID-ILP32-NEXT: neg s0, a2 -; RV32ID-ILP32-NEXT: neg s1, a1 +; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0 +; RV32ID-ILP32-NEXT: lui a1, 391168 +; RV32ID-ILP32-NEXT: fmv.w.x fa4, zero +; RV32ID-ILP32-NEXT: addi a1, a1, -1 +; RV32ID-ILP32-NEXT: fle.s a2, fa4, fa5 +; RV32ID-ILP32-NEXT: fmv.w.x fa4, a1 +; RV32ID-ILP32-NEXT: flt.s a1, fa4, fa5 +; RV32ID-ILP32-NEXT: neg s0, a1 +; RV32ID-ILP32-NEXT: neg s1, a2 ; RV32ID-ILP32-NEXT: call __fixunssfdi ; RV32ID-ILP32-NEXT: and a0, s1, a0 ; RV32ID-ILP32-NEXT: and a1, s1, a1 @@ -3144,23 +3175,25 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32ID-NEXT: addi sp, sp, -16 ; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32ID-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32ID-NEXT: call __extendhfsf2 -; RV32ID-NEXT: lui a0, %hi(.LCPI12_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI12_0)(a0) -; RV32ID-NEXT: fmv.w.x fa4, zero -; RV32ID-NEXT: fle.s a0, fa4, fa0 -; RV32ID-NEXT: flt.s a1, fa5, fa0 -; RV32ID-NEXT: neg s0, a1 -; RV32ID-NEXT: neg s1, a0 +; RV32ID-NEXT: fmv.s fs0, fa0 +; RV32ID-NEXT: fmv.w.x fa5, zero +; RV32ID-NEXT: fle.s a0, fa5, fa0 +; RV32ID-NEXT: neg s0, a0 ; RV32ID-NEXT: call __fixunssfdi -; RV32ID-NEXT: and a0, s1, a0 -; RV32ID-NEXT: and a1, s1, a1 -; RV32ID-NEXT: or a0, s0, a0 -; RV32ID-NEXT: or a1, s0, a1 +; RV32ID-NEXT: and a0, s0, a0 +; RV32ID-NEXT: lui a2, 391168 +; RV32ID-NEXT: and a1, s0, a1 +; RV32ID-NEXT: addi a2, a2, -1 +; RV32ID-NEXT: fmv.w.x fa5, a2 +; RV32ID-NEXT: flt.s a2, fa5, fs0 +; RV32ID-NEXT: neg a2, a2 +; RV32ID-NEXT: or a0, a2, a0 +; RV32ID-NEXT: or a1, a2, a1 ; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32ID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32ID-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32ID-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; RV32ID-NEXT: addi sp, sp, 16 ; RV32ID-NEXT: ret ; @@ -3178,30 +3211,32 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV64ID-NEXT: addi sp, sp, 16 ; RV64ID-NEXT: ret ; -; CHECK32-IZFHMIN-LABEL: fcvt_lu_h_sat: -; CHECK32-IZFHMIN: # %bb.0: # %start -; CHECK32-IZFHMIN-NEXT: addi sp, sp, -16 -; CHECK32-IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; CHECK32-IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; CHECK32-IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK32-IZFHMIN-NEXT: flw fa5, %lo(.LCPI12_0)(a0) -; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa0, fa0 -; CHECK32-IZFHMIN-NEXT: fmv.w.x fa4, zero -; CHECK32-IZFHMIN-NEXT: fle.s a0, fa4, fa0 -; CHECK32-IZFHMIN-NEXT: flt.s a1, fa5, fa0 -; CHECK32-IZFHMIN-NEXT: neg s0, a1 -; CHECK32-IZFHMIN-NEXT: neg s1, a0 -; CHECK32-IZFHMIN-NEXT: call __fixunssfdi -; CHECK32-IZFHMIN-NEXT: and a0, s1, a0 -; CHECK32-IZFHMIN-NEXT: and a1, s1, a1 -; CHECK32-IZFHMIN-NEXT: or a0, s0, a0 -; CHECK32-IZFHMIN-NEXT: or a1, s0, a1 -; CHECK32-IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; CHECK32-IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; CHECK32-IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; CHECK32-IZFHMIN-NEXT: addi sp, sp, 16 -; CHECK32-IZFHMIN-NEXT: ret +; RV32IFZFHMIN-LABEL: fcvt_lu_h_sat: +; RV32IFZFHMIN: # %bb.0: # %start +; RV32IFZFHMIN-NEXT: addi sp, sp, -16 +; RV32IFZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: fcvt.s.h fs0, fa0 +; RV32IFZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IFZFHMIN-NEXT: fle.s a0, fa5, fs0 +; RV32IFZFHMIN-NEXT: neg s0, a0 +; RV32IFZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IFZFHMIN-NEXT: call __fixunssfdi +; RV32IFZFHMIN-NEXT: and a0, s0, a0 +; RV32IFZFHMIN-NEXT: lui a2, 391168 +; RV32IFZFHMIN-NEXT: and a1, s0, a1 +; RV32IFZFHMIN-NEXT: addi a2, a2, -1 +; RV32IFZFHMIN-NEXT: fmv.w.x fa5, a2 +; RV32IFZFHMIN-NEXT: flt.s a2, fa5, fs0 +; RV32IFZFHMIN-NEXT: neg a2, a2 +; RV32IFZFHMIN-NEXT: or a0, a2, a0 +; RV32IFZFHMIN-NEXT: or a1, a2, a1 +; RV32IFZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: addi sp, sp, 16 +; RV32IFZFHMIN-NEXT: ret ; ; CHECK64-IZFHMIN-LABEL: fcvt_lu_h_sat: ; CHECK64-IZFHMIN: # %bb.0: # %start @@ -3213,6 +3248,33 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; CHECK64-IZFHMIN-NEXT: and a0, a1, a0 ; CHECK64-IZFHMIN-NEXT: ret ; +; RV32IDZFHMIN-LABEL: fcvt_lu_h_sat: +; RV32IDZFHMIN: # %bb.0: # %start +; RV32IDZFHMIN-NEXT: addi sp, sp, -16 +; RV32IDZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IDZFHMIN-NEXT: fcvt.s.h fs0, fa0 +; RV32IDZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IDZFHMIN-NEXT: fle.s a0, fa5, fs0 +; RV32IDZFHMIN-NEXT: neg s0, a0 +; RV32IDZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IDZFHMIN-NEXT: call __fixunssfdi +; RV32IDZFHMIN-NEXT: and a0, s0, a0 +; RV32IDZFHMIN-NEXT: lui a2, 391168 +; RV32IDZFHMIN-NEXT: and a1, s0, a1 +; RV32IDZFHMIN-NEXT: addi a2, a2, -1 +; RV32IDZFHMIN-NEXT: fmv.w.x fa5, a2 +; RV32IDZFHMIN-NEXT: flt.s a2, fa5, fs0 +; RV32IDZFHMIN-NEXT: neg a2, a2 +; RV32IDZFHMIN-NEXT: or a0, a2, a0 +; RV32IDZFHMIN-NEXT: or a1, a2, a1 +; RV32IDZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IDZFHMIN-NEXT: addi sp, sp, 16 +; RV32IDZFHMIN-NEXT: ret +; ; CHECK32-IZHINXMIN-LABEL: fcvt_lu_h_sat: ; CHECK32-IZHINXMIN: # %bb.0: # %start ; CHECK32-IZHINXMIN-NEXT: addi sp, sp, -16 @@ -6282,13 +6344,14 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_w_s_sat_i16: ; RV32IZFH: # %bb.0: # %start ; RV32IZFH-NEXT: fcvt.s.h fa5, fa0 -; RV32IZFH-NEXT: lui a0, %hi(.LCPI32_0) -; RV32IZFH-NEXT: feq.s a1, fa5, fa5 -; RV32IZFH-NEXT: flw fa4, %lo(.LCPI32_0)(a0) ; RV32IZFH-NEXT: lui a0, 815104 -; RV32IZFH-NEXT: fmv.w.x fa3, a0 -; RV32IZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV32IZFH-NEXT: neg a0, a1 +; RV32IZFH-NEXT: lui a1, 290816 +; RV32IZFH-NEXT: fmv.w.x fa4, a0 +; RV32IZFH-NEXT: feq.s a0, fa5, fa5 +; RV32IZFH-NEXT: addi a1, a1, -512 +; RV32IZFH-NEXT: neg a0, a0 +; RV32IZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV32IZFH-NEXT: fmv.w.x fa4, a1 ; RV32IZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV32IZFH-NEXT: fcvt.w.s a1, fa5, rtz ; RV32IZFH-NEXT: and a0, a0, a1 @@ -6297,13 +6360,14 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV64IZFH-LABEL: fcvt_w_s_sat_i16: ; RV64IZFH: # %bb.0: # %start ; RV64IZFH-NEXT: fcvt.s.h fa5, fa0 -; RV64IZFH-NEXT: lui a0, %hi(.LCPI32_0) -; RV64IZFH-NEXT: feq.s a1, fa5, fa5 -; RV64IZFH-NEXT: flw fa4, %lo(.LCPI32_0)(a0) ; RV64IZFH-NEXT: lui a0, 815104 -; RV64IZFH-NEXT: fmv.w.x fa3, a0 -; RV64IZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV64IZFH-NEXT: neg a0, a1 +; RV64IZFH-NEXT: lui a1, 290816 +; RV64IZFH-NEXT: fmv.w.x fa4, a0 +; RV64IZFH-NEXT: feq.s a0, fa5, fa5 +; RV64IZFH-NEXT: addi a1, a1, -512 +; RV64IZFH-NEXT: neg a0, a0 +; RV64IZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV64IZFH-NEXT: fmv.w.x fa4, a1 ; RV64IZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IZFH-NEXT: fcvt.l.s a1, fa5, rtz ; RV64IZFH-NEXT: and a0, a0, a1 @@ -6312,13 +6376,14 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV32IDZFH-LABEL: fcvt_w_s_sat_i16: ; RV32IDZFH: # %bb.0: # %start ; RV32IDZFH-NEXT: fcvt.s.h fa5, fa0 -; RV32IDZFH-NEXT: lui a0, %hi(.LCPI32_0) -; RV32IDZFH-NEXT: feq.s a1, fa5, fa5 -; RV32IDZFH-NEXT: flw fa4, %lo(.LCPI32_0)(a0) ; RV32IDZFH-NEXT: lui a0, 815104 -; RV32IDZFH-NEXT: fmv.w.x fa3, a0 -; RV32IDZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV32IDZFH-NEXT: neg a0, a1 +; RV32IDZFH-NEXT: lui a1, 290816 +; RV32IDZFH-NEXT: fmv.w.x fa4, a0 +; RV32IDZFH-NEXT: feq.s a0, fa5, fa5 +; RV32IDZFH-NEXT: addi a1, a1, -512 +; RV32IDZFH-NEXT: neg a0, a0 +; RV32IDZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV32IDZFH-NEXT: fmv.w.x fa4, a1 ; RV32IDZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV32IDZFH-NEXT: fcvt.w.s a1, fa5, rtz ; RV32IDZFH-NEXT: and a0, a0, a1 @@ -6327,13 +6392,14 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV64IDZFH-LABEL: fcvt_w_s_sat_i16: ; RV64IDZFH: # %bb.0: # %start ; RV64IDZFH-NEXT: fcvt.s.h fa5, fa0 -; RV64IDZFH-NEXT: lui a0, %hi(.LCPI32_0) -; RV64IDZFH-NEXT: feq.s a1, fa5, fa5 -; RV64IDZFH-NEXT: flw fa4, %lo(.LCPI32_0)(a0) ; RV64IDZFH-NEXT: lui a0, 815104 -; RV64IDZFH-NEXT: fmv.w.x fa3, a0 -; RV64IDZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV64IDZFH-NEXT: neg a0, a1 +; RV64IDZFH-NEXT: lui a1, 290816 +; RV64IDZFH-NEXT: fmv.w.x fa4, a0 +; RV64IDZFH-NEXT: feq.s a0, fa5, fa5 +; RV64IDZFH-NEXT: addi a1, a1, -512 +; RV64IDZFH-NEXT: neg a0, a0 +; RV64IDZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV64IDZFH-NEXT: fmv.w.x fa4, a1 ; RV64IDZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IDZFH-NEXT: fcvt.l.s a1, fa5, rtz ; RV64IDZFH-NEXT: and a0, a0, a1 @@ -6491,13 +6557,14 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32ID-ILP32-NEXT: call __extendhfsf2 ; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0 -; RV32ID-ILP32-NEXT: lui a0, %hi(.LCPI32_0) -; RV32ID-ILP32-NEXT: feq.s a1, fa5, fa5 -; RV32ID-ILP32-NEXT: flw fa4, %lo(.LCPI32_0)(a0) ; RV32ID-ILP32-NEXT: lui a0, 815104 -; RV32ID-ILP32-NEXT: fmv.w.x fa3, a0 -; RV32ID-ILP32-NEXT: fmax.s fa5, fa5, fa3 -; RV32ID-ILP32-NEXT: neg a0, a1 +; RV32ID-ILP32-NEXT: lui a1, 290816 +; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 +; RV32ID-ILP32-NEXT: feq.s a0, fa5, fa5 +; RV32ID-ILP32-NEXT: addi a1, a1, -512 +; RV32ID-ILP32-NEXT: neg a0, a0 +; RV32ID-ILP32-NEXT: fmax.s fa5, fa5, fa4 +; RV32ID-ILP32-NEXT: fmv.w.x fa4, a1 ; RV32ID-ILP32-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-ILP32-NEXT: fcvt.w.s a1, fa5, rtz ; RV32ID-ILP32-NEXT: and a0, a0, a1 @@ -6511,13 +6578,14 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-LP64-NEXT: call __extendhfsf2 ; RV64ID-LP64-NEXT: fmv.w.x fa5, a0 -; RV64ID-LP64-NEXT: lui a0, %hi(.LCPI32_0) -; RV64ID-LP64-NEXT: feq.s a1, fa5, fa5 -; RV64ID-LP64-NEXT: flw fa4, %lo(.LCPI32_0)(a0) ; RV64ID-LP64-NEXT: lui a0, 815104 -; RV64ID-LP64-NEXT: fmv.w.x fa3, a0 -; RV64ID-LP64-NEXT: fmax.s fa5, fa5, fa3 -; RV64ID-LP64-NEXT: neg a0, a1 +; RV64ID-LP64-NEXT: lui a1, 290816 +; RV64ID-LP64-NEXT: fmv.w.x fa4, a0 +; RV64ID-LP64-NEXT: feq.s a0, fa5, fa5 +; RV64ID-LP64-NEXT: addi a1, a1, -512 +; RV64ID-LP64-NEXT: neg a0, a0 +; RV64ID-LP64-NEXT: fmax.s fa5, fa5, fa4 +; RV64ID-LP64-NEXT: fmv.w.x fa4, a1 ; RV64ID-LP64-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-LP64-NEXT: fcvt.l.s a1, fa5, rtz ; RV64ID-LP64-NEXT: and a0, a0, a1 @@ -6531,13 +6599,14 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32ID-NEXT: call __extendhfsf2 ; RV32ID-NEXT: feq.s a0, fa0, fa0 -; RV32ID-NEXT: lui a1, %hi(.LCPI32_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI32_0)(a1) ; RV32ID-NEXT: lui a1, 815104 -; RV32ID-NEXT: fmv.w.x fa4, a1 -; RV32ID-NEXT: fmax.s fa4, fa0, fa4 +; RV32ID-NEXT: fmv.w.x fa5, a1 +; RV32ID-NEXT: lui a1, 290816 ; RV32ID-NEXT: neg a0, a0 -; RV32ID-NEXT: fmin.s fa5, fa4, fa5 +; RV32ID-NEXT: addi a1, a1, -512 +; RV32ID-NEXT: fmax.s fa5, fa0, fa5 +; RV32ID-NEXT: fmv.w.x fa4, a1 +; RV32ID-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-NEXT: fcvt.w.s a1, fa5, rtz ; RV32ID-NEXT: and a0, a0, a1 ; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -6550,13 +6619,14 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-NEXT: call __extendhfsf2 ; RV64ID-NEXT: feq.s a0, fa0, fa0 -; RV64ID-NEXT: lui a1, %hi(.LCPI32_0) -; RV64ID-NEXT: flw fa5, %lo(.LCPI32_0)(a1) ; RV64ID-NEXT: lui a1, 815104 -; RV64ID-NEXT: fmv.w.x fa4, a1 -; RV64ID-NEXT: fmax.s fa4, fa0, fa4 +; RV64ID-NEXT: fmv.w.x fa5, a1 +; RV64ID-NEXT: lui a1, 290816 ; RV64ID-NEXT: neg a0, a0 -; RV64ID-NEXT: fmin.s fa5, fa4, fa5 +; RV64ID-NEXT: addi a1, a1, -512 +; RV64ID-NEXT: fmax.s fa5, fa0, fa5 +; RV64ID-NEXT: fmv.w.x fa4, a1 +; RV64ID-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-NEXT: fcvt.l.s a1, fa5, rtz ; RV64ID-NEXT: and a0, a0, a1 ; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -6566,13 +6636,14 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; CHECK32-IZFHMIN-LABEL: fcvt_w_s_sat_i16: ; CHECK32-IZFHMIN: # %bb.0: # %start ; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa5, fa0 -; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI32_0) -; CHECK32-IZFHMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK32-IZFHMIN-NEXT: flw fa4, %lo(.LCPI32_0)(a0) ; CHECK32-IZFHMIN-NEXT: lui a0, 815104 -; CHECK32-IZFHMIN-NEXT: fmv.w.x fa3, a0 -; CHECK32-IZFHMIN-NEXT: fmax.s fa5, fa5, fa3 -; CHECK32-IZFHMIN-NEXT: neg a0, a1 +; CHECK32-IZFHMIN-NEXT: lui a1, 290816 +; CHECK32-IZFHMIN-NEXT: fmv.w.x fa4, a0 +; CHECK32-IZFHMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK32-IZFHMIN-NEXT: addi a1, a1, -512 +; CHECK32-IZFHMIN-NEXT: neg a0, a0 +; CHECK32-IZFHMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK32-IZFHMIN-NEXT: fmv.w.x fa4, a1 ; CHECK32-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK32-IZFHMIN-NEXT: fcvt.w.s a1, fa5, rtz ; CHECK32-IZFHMIN-NEXT: and a0, a0, a1 @@ -6581,13 +6652,14 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; CHECK64-IZFHMIN-LABEL: fcvt_w_s_sat_i16: ; CHECK64-IZFHMIN: # %bb.0: # %start ; CHECK64-IZFHMIN-NEXT: fcvt.s.h fa5, fa0 -; CHECK64-IZFHMIN-NEXT: lui a0, %hi(.LCPI32_0) -; CHECK64-IZFHMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK64-IZFHMIN-NEXT: flw fa4, %lo(.LCPI32_0)(a0) ; CHECK64-IZFHMIN-NEXT: lui a0, 815104 -; CHECK64-IZFHMIN-NEXT: fmv.w.x fa3, a0 -; CHECK64-IZFHMIN-NEXT: fmax.s fa5, fa5, fa3 -; CHECK64-IZFHMIN-NEXT: neg a0, a1 +; CHECK64-IZFHMIN-NEXT: lui a1, 290816 +; CHECK64-IZFHMIN-NEXT: fmv.w.x fa4, a0 +; CHECK64-IZFHMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK64-IZFHMIN-NEXT: addi a1, a1, -512 +; CHECK64-IZFHMIN-NEXT: neg a0, a0 +; CHECK64-IZFHMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK64-IZFHMIN-NEXT: fmv.w.x fa4, a1 ; CHECK64-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK64-IZFHMIN-NEXT: fcvt.l.s a1, fa5, rtz ; CHECK64-IZFHMIN-NEXT: and a0, a0, a1 @@ -6802,45 +6874,49 @@ define zeroext i16 @fcvt_wu_s_i16(half %a) nounwind { define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_wu_s_sat_i16: ; RV32IZFH: # %bb.0: # %start -; RV32IZFH-NEXT: lui a0, %hi(.LCPI34_0) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI34_0)(a0) -; RV32IZFH-NEXT: fcvt.s.h fa4, fa0 -; RV32IZFH-NEXT: fmv.w.x fa3, zero -; RV32IZFH-NEXT: fmax.s fa4, fa4, fa3 -; RV32IZFH-NEXT: fmin.s fa5, fa4, fa5 +; RV32IZFH-NEXT: fcvt.s.h fa5, fa0 +; RV32IZFH-NEXT: fmv.w.x fa4, zero +; RV32IZFH-NEXT: lui a0, 292864 +; RV32IZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV32IZFH-NEXT: addi a0, a0, -256 +; RV32IZFH-NEXT: fmv.w.x fa4, a0 +; RV32IZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV32IZFH-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcvt_wu_s_sat_i16: ; RV64IZFH: # %bb.0: # %start -; RV64IZFH-NEXT: lui a0, %hi(.LCPI34_0) -; RV64IZFH-NEXT: flw fa5, %lo(.LCPI34_0)(a0) -; RV64IZFH-NEXT: fcvt.s.h fa4, fa0 -; RV64IZFH-NEXT: fmv.w.x fa3, zero -; RV64IZFH-NEXT: fmax.s fa4, fa4, fa3 -; RV64IZFH-NEXT: fmin.s fa5, fa4, fa5 +; RV64IZFH-NEXT: fcvt.s.h fa5, fa0 +; RV64IZFH-NEXT: fmv.w.x fa4, zero +; RV64IZFH-NEXT: lui a0, 292864 +; RV64IZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV64IZFH-NEXT: addi a0, a0, -256 +; RV64IZFH-NEXT: fmv.w.x fa4, a0 +; RV64IZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IZFH-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: fcvt_wu_s_sat_i16: ; RV32IDZFH: # %bb.0: # %start -; RV32IDZFH-NEXT: lui a0, %hi(.LCPI34_0) -; RV32IDZFH-NEXT: flw fa5, %lo(.LCPI34_0)(a0) -; RV32IDZFH-NEXT: fcvt.s.h fa4, fa0 -; RV32IDZFH-NEXT: fmv.w.x fa3, zero -; RV32IDZFH-NEXT: fmax.s fa4, fa4, fa3 -; RV32IDZFH-NEXT: fmin.s fa5, fa4, fa5 +; RV32IDZFH-NEXT: fcvt.s.h fa5, fa0 +; RV32IDZFH-NEXT: fmv.w.x fa4, zero +; RV32IDZFH-NEXT: lui a0, 292864 +; RV32IDZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV32IDZFH-NEXT: addi a0, a0, -256 +; RV32IDZFH-NEXT: fmv.w.x fa4, a0 +; RV32IDZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV32IDZFH-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_wu_s_sat_i16: ; RV64IDZFH: # %bb.0: # %start -; RV64IDZFH-NEXT: lui a0, %hi(.LCPI34_0) -; RV64IDZFH-NEXT: flw fa5, %lo(.LCPI34_0)(a0) -; RV64IDZFH-NEXT: fcvt.s.h fa4, fa0 -; RV64IDZFH-NEXT: fmv.w.x fa3, zero -; RV64IDZFH-NEXT: fmax.s fa4, fa4, fa3 -; RV64IDZFH-NEXT: fmin.s fa5, fa4, fa5 +; RV64IDZFH-NEXT: fcvt.s.h fa5, fa0 +; RV64IDZFH-NEXT: fmv.w.x fa4, zero +; RV64IDZFH-NEXT: lui a0, 292864 +; RV64IDZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV64IDZFH-NEXT: addi a0, a0, -256 +; RV64IDZFH-NEXT: fmv.w.x fa4, a0 +; RV64IDZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IDZFH-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64IDZFH-NEXT: ret ; @@ -6971,12 +7047,13 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind { ; RV32ID-ILP32-NEXT: addi sp, sp, -16 ; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32ID-ILP32-NEXT: call __extendhfsf2 -; RV32ID-ILP32-NEXT: lui a1, %hi(.LCPI34_0) -; RV32ID-ILP32-NEXT: flw fa5, %lo(.LCPI34_0)(a1) +; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0 +; RV32ID-ILP32-NEXT: fmv.w.x fa4, zero +; RV32ID-ILP32-NEXT: lui a0, 292864 +; RV32ID-ILP32-NEXT: fmax.s fa5, fa5, fa4 +; RV32ID-ILP32-NEXT: addi a0, a0, -256 ; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 -; RV32ID-ILP32-NEXT: fmv.w.x fa3, zero -; RV32ID-ILP32-NEXT: fmax.s fa4, fa4, fa3 -; RV32ID-ILP32-NEXT: fmin.s fa5, fa4, fa5 +; RV32ID-ILP32-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-ILP32-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32ID-ILP32-NEXT: addi sp, sp, 16 @@ -6987,12 +7064,13 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind { ; RV64ID-LP64-NEXT: addi sp, sp, -16 ; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-LP64-NEXT: call __extendhfsf2 -; RV64ID-LP64-NEXT: lui a1, %hi(.LCPI34_0) -; RV64ID-LP64-NEXT: flw fa5, %lo(.LCPI34_0)(a1) +; RV64ID-LP64-NEXT: fmv.w.x fa5, a0 +; RV64ID-LP64-NEXT: fmv.w.x fa4, zero +; RV64ID-LP64-NEXT: lui a0, 292864 +; RV64ID-LP64-NEXT: fmax.s fa5, fa5, fa4 +; RV64ID-LP64-NEXT: addi a0, a0, -256 ; RV64ID-LP64-NEXT: fmv.w.x fa4, a0 -; RV64ID-LP64-NEXT: fmv.w.x fa3, zero -; RV64ID-LP64-NEXT: fmax.s fa4, fa4, fa3 -; RV64ID-LP64-NEXT: fmin.s fa5, fa4, fa5 +; RV64ID-LP64-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-LP64-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64ID-LP64-NEXT: addi sp, sp, 16 @@ -7003,11 +7081,12 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind { ; RV32ID-NEXT: addi sp, sp, -16 ; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32ID-NEXT: call __extendhfsf2 -; RV32ID-NEXT: lui a0, %hi(.LCPI34_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI34_0)(a0) -; RV32ID-NEXT: fmv.w.x fa4, zero -; RV32ID-NEXT: fmax.s fa4, fa0, fa4 -; RV32ID-NEXT: fmin.s fa5, fa4, fa5 +; RV32ID-NEXT: fmv.w.x fa5, zero +; RV32ID-NEXT: lui a0, 292864 +; RV32ID-NEXT: fmax.s fa5, fa0, fa5 +; RV32ID-NEXT: addi a0, a0, -256 +; RV32ID-NEXT: fmv.w.x fa4, a0 +; RV32ID-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32ID-NEXT: addi sp, sp, 16 @@ -7018,11 +7097,12 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind { ; RV64ID-NEXT: addi sp, sp, -16 ; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-NEXT: call __extendhfsf2 -; RV64ID-NEXT: lui a0, %hi(.LCPI34_0) -; RV64ID-NEXT: flw fa5, %lo(.LCPI34_0)(a0) -; RV64ID-NEXT: fmv.w.x fa4, zero -; RV64ID-NEXT: fmax.s fa4, fa0, fa4 -; RV64ID-NEXT: fmin.s fa5, fa4, fa5 +; RV64ID-NEXT: fmv.w.x fa5, zero +; RV64ID-NEXT: lui a0, 292864 +; RV64ID-NEXT: fmax.s fa5, fa0, fa5 +; RV64ID-NEXT: addi a0, a0, -256 +; RV64ID-NEXT: fmv.w.x fa4, a0 +; RV64ID-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64ID-NEXT: addi sp, sp, 16 @@ -7030,23 +7110,25 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind { ; ; CHECK32-IZFHMIN-LABEL: fcvt_wu_s_sat_i16: ; CHECK32-IZFHMIN: # %bb.0: # %start -; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI34_0) -; CHECK32-IZFHMIN-NEXT: flw fa5, %lo(.LCPI34_0)(a0) -; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa4, fa0 -; CHECK32-IZFHMIN-NEXT: fmv.w.x fa3, zero -; CHECK32-IZFHMIN-NEXT: fmax.s fa4, fa4, fa3 -; CHECK32-IZFHMIN-NEXT: fmin.s fa5, fa4, fa5 +; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa5, fa0 +; CHECK32-IZFHMIN-NEXT: fmv.w.x fa4, zero +; CHECK32-IZFHMIN-NEXT: lui a0, 292864 +; CHECK32-IZFHMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK32-IZFHMIN-NEXT: addi a0, a0, -256 +; CHECK32-IZFHMIN-NEXT: fmv.w.x fa4, a0 +; CHECK32-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK32-IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; CHECK32-IZFHMIN-NEXT: ret ; ; CHECK64-IZFHMIN-LABEL: fcvt_wu_s_sat_i16: ; CHECK64-IZFHMIN: # %bb.0: # %start -; CHECK64-IZFHMIN-NEXT: lui a0, %hi(.LCPI34_0) -; CHECK64-IZFHMIN-NEXT: flw fa5, %lo(.LCPI34_0)(a0) -; CHECK64-IZFHMIN-NEXT: fcvt.s.h fa4, fa0 -; CHECK64-IZFHMIN-NEXT: fmv.w.x fa3, zero -; CHECK64-IZFHMIN-NEXT: fmax.s fa4, fa4, fa3 -; CHECK64-IZFHMIN-NEXT: fmin.s fa5, fa4, fa5 +; CHECK64-IZFHMIN-NEXT: fcvt.s.h fa5, fa0 +; CHECK64-IZFHMIN-NEXT: fmv.w.x fa4, zero +; CHECK64-IZFHMIN-NEXT: lui a0, 292864 +; CHECK64-IZFHMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK64-IZFHMIN-NEXT: addi a0, a0, -256 +; CHECK64-IZFHMIN-NEXT: fmv.w.x fa4, a0 +; CHECK64-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK64-IZFHMIN-NEXT: fcvt.lu.s a0, fa5, rtz ; CHECK64-IZFHMIN-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/half-imm.ll b/llvm/test/CodeGen/RISCV/half-imm.ll index d68e19d15b4bb..1dc0da8c04dba 100644 --- a/llvm/test/CodeGen/RISCV/half-imm.ll +++ b/llvm/test/CodeGen/RISCV/half-imm.ll @@ -24,8 +24,9 @@ define half @half_imm() nounwind { ; CHECK-LABEL: half_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa0, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: lui a0, 4 +; CHECK-NEXT: addi a0, a0, 512 +; CHECK-NEXT: fmv.h.x fa0, a0 ; CHECK-NEXT: ret ; ; RV32IZHINX-LABEL: half_imm: @@ -44,8 +45,9 @@ define half @half_imm() nounwind { ; ; CHECKIZFHMIN-LABEL: half_imm: ; CHECKIZFHMIN: # %bb.0: -; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI0_0) -; CHECKIZFHMIN-NEXT: flh fa0, %lo(.LCPI0_0)(a0) +; CHECKIZFHMIN-NEXT: lui a0, 4 +; CHECKIZFHMIN-NEXT: addi a0, a0, 512 +; CHECKIZFHMIN-NEXT: fmv.h.x fa0, a0 ; CHECKIZFHMIN-NEXT: ret ; ; CHECKIZHINXMIN-LABEL: half_imm: @@ -60,8 +62,9 @@ define half @half_imm() nounwind { define half @half_imm_op(half %a) nounwind { ; CHECK-LABEL: half_imm_op: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: li a0, 15 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: fadd.h fa0, fa0, fa5 ; CHECK-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll index 4f0026175e7c7..e16d788f66ede 100644 --- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll @@ -2222,8 +2222,9 @@ declare half @llvm.floor.f16(half) define half @floor_f16(half %a) nounwind { ; CHECKIZFH-LABEL: floor_f16: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: lui a0, %hi(.LCPI18_0) -; CHECKIZFH-NEXT: flh fa5, %lo(.LCPI18_0)(a0) +; CHECKIZFH-NEXT: li a0, 25 +; CHECKIZFH-NEXT: slli a0, a0, 10 +; CHECKIZFH-NEXT: fmv.h.x fa5, a0 ; CHECKIZFH-NEXT: fabs.h fa4, fa0 ; CHECKIZFH-NEXT: flt.h a0, fa4, fa5 ; CHECKIZFH-NEXT: beqz a0, .LBB18_2 @@ -2313,8 +2314,9 @@ declare half @llvm.ceil.f16(half) define half @ceil_f16(half %a) nounwind { ; CHECKIZFH-LABEL: ceil_f16: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: lui a0, %hi(.LCPI19_0) -; CHECKIZFH-NEXT: flh fa5, %lo(.LCPI19_0)(a0) +; CHECKIZFH-NEXT: li a0, 25 +; CHECKIZFH-NEXT: slli a0, a0, 10 +; CHECKIZFH-NEXT: fmv.h.x fa5, a0 ; CHECKIZFH-NEXT: fabs.h fa4, fa0 ; CHECKIZFH-NEXT: flt.h a0, fa4, fa5 ; CHECKIZFH-NEXT: beqz a0, .LBB19_2 @@ -2404,8 +2406,9 @@ declare half @llvm.trunc.f16(half) define half @trunc_f16(half %a) nounwind { ; CHECKIZFH-LABEL: trunc_f16: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: lui a0, %hi(.LCPI20_0) -; CHECKIZFH-NEXT: flh fa5, %lo(.LCPI20_0)(a0) +; CHECKIZFH-NEXT: li a0, 25 +; CHECKIZFH-NEXT: slli a0, a0, 10 +; CHECKIZFH-NEXT: fmv.h.x fa5, a0 ; CHECKIZFH-NEXT: fabs.h fa4, fa0 ; CHECKIZFH-NEXT: flt.h a0, fa4, fa5 ; CHECKIZFH-NEXT: beqz a0, .LBB20_2 @@ -2495,8 +2498,9 @@ declare half @llvm.rint.f16(half) define half @rint_f16(half %a) nounwind { ; CHECKIZFH-LABEL: rint_f16: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: lui a0, %hi(.LCPI21_0) -; CHECKIZFH-NEXT: flh fa5, %lo(.LCPI21_0)(a0) +; CHECKIZFH-NEXT: li a0, 25 +; CHECKIZFH-NEXT: slli a0, a0, 10 +; CHECKIZFH-NEXT: fmv.h.x fa5, a0 ; CHECKIZFH-NEXT: fabs.h fa4, fa0 ; CHECKIZFH-NEXT: flt.h a0, fa4, fa5 ; CHECKIZFH-NEXT: beqz a0, .LBB21_2 @@ -2706,8 +2710,9 @@ declare half @llvm.round.f16(half) define half @round_f16(half %a) nounwind { ; CHECKIZFH-LABEL: round_f16: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: lui a0, %hi(.LCPI23_0) -; CHECKIZFH-NEXT: flh fa5, %lo(.LCPI23_0)(a0) +; CHECKIZFH-NEXT: li a0, 25 +; CHECKIZFH-NEXT: slli a0, a0, 10 +; CHECKIZFH-NEXT: fmv.h.x fa5, a0 ; CHECKIZFH-NEXT: fabs.h fa4, fa0 ; CHECKIZFH-NEXT: flt.h a0, fa4, fa5 ; CHECKIZFH-NEXT: beqz a0, .LBB23_2 @@ -2797,8 +2802,9 @@ declare half @llvm.roundeven.f16(half) define half @roundeven_f16(half %a) nounwind { ; CHECKIZFH-LABEL: roundeven_f16: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: lui a0, %hi(.LCPI24_0) -; CHECKIZFH-NEXT: flh fa5, %lo(.LCPI24_0)(a0) +; CHECKIZFH-NEXT: li a0, 25 +; CHECKIZFH-NEXT: slli a0, a0, 10 +; CHECKIZFH-NEXT: fmv.h.x fa5, a0 ; CHECKIZFH-NEXT: fabs.h fa4, fa0 ; CHECKIZFH-NEXT: flt.h a0, fa4, fa5 ; CHECKIZFH-NEXT: beqz a0, .LBB24_2 diff --git a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll index 3b645bf8aef91..c815bc19e280c 100644 --- a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll @@ -95,8 +95,9 @@ define signext i32 @test_floor_si32(half %x) { define i64 @test_floor_si64(half %x) nounwind { ; RV32IZFH-LABEL: test_floor_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB1_2 @@ -121,8 +122,9 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZFH-NEXT: # %bb.3: ; RV32IZFH-NEXT: mv a2, a1 ; RV32IZFH-NEXT: .LBB1_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI1_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI1_1)(a1) +; RV32IZFH-NEXT: lui a1, 389120 +; RV32IZFH-NEXT: addi a1, a1, -1 +; RV32IZFH-NEXT: fmv.w.x fa5, a1 ; RV32IZFH-NEXT: flt.s a1, fa5, fs0 ; RV32IZFH-NEXT: beqz a1, .LBB1_6 ; RV32IZFH-NEXT: # %bb.5: @@ -248,8 +250,9 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: # %bb.3: ; RV32IZFHMIN-NEXT: mv a2, a1 ; RV32IZFHMIN-NEXT: .LBB1_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI1_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI1_0)(a1) +; RV32IZFHMIN-NEXT: lui a1, 389120 +; RV32IZFHMIN-NEXT: addi a1, a1, -1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, a1 ; RV32IZFHMIN-NEXT: flt.s a1, fa5, fs0 ; RV32IZFHMIN-NEXT: beqz a1, .LBB1_6 ; RV32IZFHMIN-NEXT: # %bb.5: @@ -506,8 +509,9 @@ define signext i32 @test_floor_ui32(half %x) { define i64 @test_floor_ui64(half %x) nounwind { ; RV32IZFH-LABEL: test_floor_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI3_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB3_2 @@ -526,10 +530,11 @@ define i64 @test_floor_ui64(half %x) nounwind { ; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI3_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI3_1)(a2) ; RV32IZFH-NEXT: and a0, s0, a0 +; RV32IZFH-NEXT: lui a2, 391168 ; RV32IZFH-NEXT: and a1, s0, a1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: fmv.w.x fa5, a2 ; RV32IZFH-NEXT: flt.s a2, fa5, fs0 ; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: or a0, a2, a0 @@ -627,10 +632,11 @@ define i64 @test_floor_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a2) ; RV32IZFHMIN-NEXT: and a0, s0, a0 +; RV32IZFHMIN-NEXT: lui a2, 391168 ; RV32IZFHMIN-NEXT: and a1, s0, a1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, a2 ; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 ; RV32IZFHMIN-NEXT: neg a2, a2 ; RV32IZFHMIN-NEXT: or a0, a2, a0 @@ -803,8 +809,9 @@ define signext i32 @test_ceil_si32(half %x) { define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZFH-LABEL: test_ceil_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI5_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB5_2 @@ -829,8 +836,9 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZFH-NEXT: # %bb.3: ; RV32IZFH-NEXT: mv a2, a1 ; RV32IZFH-NEXT: .LBB5_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI5_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI5_1)(a1) +; RV32IZFH-NEXT: lui a1, 389120 +; RV32IZFH-NEXT: addi a1, a1, -1 +; RV32IZFH-NEXT: fmv.w.x fa5, a1 ; RV32IZFH-NEXT: flt.s a1, fa5, fs0 ; RV32IZFH-NEXT: beqz a1, .LBB5_6 ; RV32IZFH-NEXT: # %bb.5: @@ -956,8 +964,9 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: # %bb.3: ; RV32IZFHMIN-NEXT: mv a2, a1 ; RV32IZFHMIN-NEXT: .LBB5_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI5_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI5_0)(a1) +; RV32IZFHMIN-NEXT: lui a1, 389120 +; RV32IZFHMIN-NEXT: addi a1, a1, -1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, a1 ; RV32IZFHMIN-NEXT: flt.s a1, fa5, fs0 ; RV32IZFHMIN-NEXT: beqz a1, .LBB5_6 ; RV32IZFHMIN-NEXT: # %bb.5: @@ -1214,8 +1223,9 @@ define signext i32 @test_ceil_ui32(half %x) { define i64 @test_ceil_ui64(half %x) nounwind { ; RV32IZFH-LABEL: test_ceil_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI7_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB7_2 @@ -1234,10 +1244,11 @@ define i64 @test_ceil_ui64(half %x) nounwind { ; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI7_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI7_1)(a2) ; RV32IZFH-NEXT: and a0, s0, a0 +; RV32IZFH-NEXT: lui a2, 391168 ; RV32IZFH-NEXT: and a1, s0, a1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: fmv.w.x fa5, a2 ; RV32IZFH-NEXT: flt.s a2, fa5, fs0 ; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: or a0, a2, a0 @@ -1335,10 +1346,11 @@ define i64 @test_ceil_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI7_0)(a2) ; RV32IZFHMIN-NEXT: and a0, s0, a0 +; RV32IZFHMIN-NEXT: lui a2, 391168 ; RV32IZFHMIN-NEXT: and a1, s0, a1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, a2 ; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 ; RV32IZFHMIN-NEXT: neg a2, a2 ; RV32IZFHMIN-NEXT: or a0, a2, a0 @@ -1511,8 +1523,9 @@ define signext i32 @test_trunc_si32(half %x) { define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZFH-LABEL: test_trunc_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI9_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB9_2 @@ -1537,8 +1550,9 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZFH-NEXT: # %bb.3: ; RV32IZFH-NEXT: mv a2, a1 ; RV32IZFH-NEXT: .LBB9_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI9_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI9_1)(a1) +; RV32IZFH-NEXT: lui a1, 389120 +; RV32IZFH-NEXT: addi a1, a1, -1 +; RV32IZFH-NEXT: fmv.w.x fa5, a1 ; RV32IZFH-NEXT: flt.s a1, fa5, fs0 ; RV32IZFH-NEXT: beqz a1, .LBB9_6 ; RV32IZFH-NEXT: # %bb.5: @@ -1664,8 +1678,9 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: # %bb.3: ; RV32IZFHMIN-NEXT: mv a2, a1 ; RV32IZFHMIN-NEXT: .LBB9_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI9_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI9_0)(a1) +; RV32IZFHMIN-NEXT: lui a1, 389120 +; RV32IZFHMIN-NEXT: addi a1, a1, -1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, a1 ; RV32IZFHMIN-NEXT: flt.s a1, fa5, fs0 ; RV32IZFHMIN-NEXT: beqz a1, .LBB9_6 ; RV32IZFHMIN-NEXT: # %bb.5: @@ -1922,8 +1937,9 @@ define signext i32 @test_trunc_ui32(half %x) { define i64 @test_trunc_ui64(half %x) nounwind { ; RV32IZFH-LABEL: test_trunc_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI11_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB11_2 @@ -1942,10 +1958,11 @@ define i64 @test_trunc_ui64(half %x) nounwind { ; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI11_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI11_1)(a2) ; RV32IZFH-NEXT: and a0, s0, a0 +; RV32IZFH-NEXT: lui a2, 391168 ; RV32IZFH-NEXT: and a1, s0, a1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: fmv.w.x fa5, a2 ; RV32IZFH-NEXT: flt.s a2, fa5, fs0 ; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: or a0, a2, a0 @@ -2043,10 +2060,11 @@ define i64 @test_trunc_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI11_0)(a2) ; RV32IZFHMIN-NEXT: and a0, s0, a0 +; RV32IZFHMIN-NEXT: lui a2, 391168 ; RV32IZFHMIN-NEXT: and a1, s0, a1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, a2 ; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 ; RV32IZFHMIN-NEXT: neg a2, a2 ; RV32IZFHMIN-NEXT: or a0, a2, a0 @@ -2219,8 +2237,9 @@ define signext i32 @test_round_si32(half %x) { define i64 @test_round_si64(half %x) nounwind { ; RV32IZFH-LABEL: test_round_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI13_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI13_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB13_2 @@ -2245,8 +2264,9 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZFH-NEXT: # %bb.3: ; RV32IZFH-NEXT: mv a2, a1 ; RV32IZFH-NEXT: .LBB13_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI13_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI13_1)(a1) +; RV32IZFH-NEXT: lui a1, 389120 +; RV32IZFH-NEXT: addi a1, a1, -1 +; RV32IZFH-NEXT: fmv.w.x fa5, a1 ; RV32IZFH-NEXT: flt.s a1, fa5, fs0 ; RV32IZFH-NEXT: beqz a1, .LBB13_6 ; RV32IZFH-NEXT: # %bb.5: @@ -2372,8 +2392,9 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: # %bb.3: ; RV32IZFHMIN-NEXT: mv a2, a1 ; RV32IZFHMIN-NEXT: .LBB13_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI13_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI13_0)(a1) +; RV32IZFHMIN-NEXT: lui a1, 389120 +; RV32IZFHMIN-NEXT: addi a1, a1, -1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, a1 ; RV32IZFHMIN-NEXT: flt.s a1, fa5, fs0 ; RV32IZFHMIN-NEXT: beqz a1, .LBB13_6 ; RV32IZFHMIN-NEXT: # %bb.5: @@ -2630,8 +2651,9 @@ define signext i32 @test_round_ui32(half %x) { define i64 @test_round_ui64(half %x) nounwind { ; RV32IZFH-LABEL: test_round_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI15_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI15_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB15_2 @@ -2650,10 +2672,11 @@ define i64 @test_round_ui64(half %x) nounwind { ; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI15_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI15_1)(a2) ; RV32IZFH-NEXT: and a0, s0, a0 +; RV32IZFH-NEXT: lui a2, 391168 ; RV32IZFH-NEXT: and a1, s0, a1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: fmv.w.x fa5, a2 ; RV32IZFH-NEXT: flt.s a2, fa5, fs0 ; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: or a0, a2, a0 @@ -2751,10 +2774,11 @@ define i64 @test_round_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI15_0)(a2) ; RV32IZFHMIN-NEXT: and a0, s0, a0 +; RV32IZFHMIN-NEXT: lui a2, 391168 ; RV32IZFHMIN-NEXT: and a1, s0, a1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, a2 ; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 ; RV32IZFHMIN-NEXT: neg a2, a2 ; RV32IZFHMIN-NEXT: or a0, a2, a0 @@ -2927,8 +2951,9 @@ define signext i32 @test_roundeven_si32(half %x) { define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZFH-LABEL: test_roundeven_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI17_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI17_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB17_2 @@ -2953,8 +2978,9 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZFH-NEXT: # %bb.3: ; RV32IZFH-NEXT: mv a2, a1 ; RV32IZFH-NEXT: .LBB17_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI17_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI17_1)(a1) +; RV32IZFH-NEXT: lui a1, 389120 +; RV32IZFH-NEXT: addi a1, a1, -1 +; RV32IZFH-NEXT: fmv.w.x fa5, a1 ; RV32IZFH-NEXT: flt.s a1, fa5, fs0 ; RV32IZFH-NEXT: beqz a1, .LBB17_6 ; RV32IZFH-NEXT: # %bb.5: @@ -3080,8 +3106,9 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: # %bb.3: ; RV32IZFHMIN-NEXT: mv a2, a1 ; RV32IZFHMIN-NEXT: .LBB17_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI17_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI17_0)(a1) +; RV32IZFHMIN-NEXT: lui a1, 389120 +; RV32IZFHMIN-NEXT: addi a1, a1, -1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, a1 ; RV32IZFHMIN-NEXT: flt.s a1, fa5, fs0 ; RV32IZFHMIN-NEXT: beqz a1, .LBB17_6 ; RV32IZFHMIN-NEXT: # %bb.5: @@ -3338,8 +3365,9 @@ define signext i32 @test_roundeven_ui32(half %x) { define i64 @test_roundeven_ui64(half %x) nounwind { ; RV32IZFH-LABEL: test_roundeven_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI19_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI19_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB19_2 @@ -3358,10 +3386,11 @@ define i64 @test_roundeven_ui64(half %x) nounwind { ; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI19_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI19_1)(a2) ; RV32IZFH-NEXT: and a0, s0, a0 +; RV32IZFH-NEXT: lui a2, 391168 ; RV32IZFH-NEXT: and a1, s0, a1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: fmv.w.x fa5, a2 ; RV32IZFH-NEXT: flt.s a2, fa5, fs0 ; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: or a0, a2, a0 @@ -3459,10 +3488,11 @@ define i64 @test_roundeven_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI19_0)(a2) ; RV32IZFHMIN-NEXT: and a0, s0, a0 +; RV32IZFHMIN-NEXT: lui a2, 391168 ; RV32IZFHMIN-NEXT: and a1, s0, a1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, a2 ; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 ; RV32IZFHMIN-NEXT: neg a2, a2 ; RV32IZFHMIN-NEXT: or a0, a2, a0 @@ -3635,8 +3665,9 @@ define signext i32 @test_rint_si32(half %x) { define i64 @test_rint_si64(half %x) nounwind { ; RV32IZFH-LABEL: test_rint_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI21_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI21_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB21_2 @@ -3661,8 +3692,9 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZFH-NEXT: # %bb.3: ; RV32IZFH-NEXT: mv a2, a1 ; RV32IZFH-NEXT: .LBB21_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI21_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI21_1)(a1) +; RV32IZFH-NEXT: lui a1, 389120 +; RV32IZFH-NEXT: addi a1, a1, -1 +; RV32IZFH-NEXT: fmv.w.x fa5, a1 ; RV32IZFH-NEXT: flt.s a1, fa5, fs0 ; RV32IZFH-NEXT: beqz a1, .LBB21_6 ; RV32IZFH-NEXT: # %bb.5: @@ -3788,8 +3820,9 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: # %bb.3: ; RV32IZFHMIN-NEXT: mv a2, a1 ; RV32IZFHMIN-NEXT: .LBB21_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI21_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI21_0)(a1) +; RV32IZFHMIN-NEXT: lui a1, 389120 +; RV32IZFHMIN-NEXT: addi a1, a1, -1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, a1 ; RV32IZFHMIN-NEXT: flt.s a1, fa5, fs0 ; RV32IZFHMIN-NEXT: beqz a1, .LBB21_6 ; RV32IZFHMIN-NEXT: # %bb.5: @@ -4046,8 +4079,9 @@ define signext i32 @test_rint_ui32(half %x) { define i64 @test_rint_ui64(half %x) nounwind { ; RV32IZFH-LABEL: test_rint_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI23_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI23_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB23_2 @@ -4066,10 +4100,11 @@ define i64 @test_rint_ui64(half %x) nounwind { ; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI23_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI23_1)(a2) ; RV32IZFH-NEXT: and a0, s0, a0 +; RV32IZFH-NEXT: lui a2, 391168 ; RV32IZFH-NEXT: and a1, s0, a1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: fmv.w.x fa5, a2 ; RV32IZFH-NEXT: flt.s a2, fa5, fs0 ; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: or a0, a2, a0 @@ -4167,10 +4202,11 @@ define i64 @test_rint_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI23_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI23_0)(a2) ; RV32IZFHMIN-NEXT: and a0, s0, a0 +; RV32IZFHMIN-NEXT: lui a2, 391168 ; RV32IZFHMIN-NEXT: and a1, s0, a1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, a2 ; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 ; RV32IZFHMIN-NEXT: neg a2, a2 ; RV32IZFHMIN-NEXT: or a0, a2, a0 diff --git a/llvm/test/CodeGen/RISCV/half-round-conv.ll b/llvm/test/CodeGen/RISCV/half-round-conv.ll index 8a787ee578990..cfc997d66ec56 100644 --- a/llvm/test/CodeGen/RISCV/half-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/half-round-conv.ll @@ -309,8 +309,9 @@ define signext i32 @test_floor_si32(half %x) { define i64 @test_floor_si64(half %x) { ; RV32IZFH-LABEL: test_floor_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI3_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB3_2 @@ -754,8 +755,9 @@ define signext i32 @test_floor_ui32(half %x) { define i64 @test_floor_ui64(half %x) { ; RV32IZFH-LABEL: test_floor_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI7_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB7_2 @@ -1199,8 +1201,9 @@ define signext i32 @test_ceil_si32(half %x) { define i64 @test_ceil_si64(half %x) { ; RV32IZFH-LABEL: test_ceil_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI11_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB11_2 @@ -1644,8 +1647,9 @@ define signext i32 @test_ceil_ui32(half %x) { define i64 @test_ceil_ui64(half %x) { ; RV32IZFH-LABEL: test_ceil_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI15_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI15_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB15_2 @@ -2089,8 +2093,9 @@ define signext i32 @test_trunc_si32(half %x) { define i64 @test_trunc_si64(half %x) { ; RV32IZFH-LABEL: test_trunc_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI19_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI19_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB19_2 @@ -2534,8 +2539,9 @@ define signext i32 @test_trunc_ui32(half %x) { define i64 @test_trunc_ui64(half %x) { ; RV32IZFH-LABEL: test_trunc_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI23_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI23_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB23_2 @@ -2979,8 +2985,9 @@ define signext i32 @test_round_si32(half %x) { define i64 @test_round_si64(half %x) { ; RV32IZFH-LABEL: test_round_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI27_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI27_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB27_2 @@ -3424,8 +3431,9 @@ define signext i32 @test_round_ui32(half %x) { define i64 @test_round_ui64(half %x) { ; RV32IZFH-LABEL: test_round_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI31_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI31_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB31_2 @@ -3869,8 +3877,9 @@ define signext i32 @test_roundeven_si32(half %x) { define i64 @test_roundeven_si64(half %x) { ; RV32IZFH-LABEL: test_roundeven_si64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI35_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI35_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB35_2 @@ -4314,8 +4323,9 @@ define signext i32 @test_roundeven_ui32(half %x) { define i64 @test_roundeven_ui64(half %x) { ; RV32IZFH-LABEL: test_roundeven_ui64: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI39_0) -; RV32IZFH-NEXT: flh fa5, %lo(.LCPI39_0)(a0) +; RV32IZFH-NEXT: li a0, 25 +; RV32IZFH-NEXT: slli a0, a0, 10 +; RV32IZFH-NEXT: fmv.h.x fa5, a0 ; RV32IZFH-NEXT: fabs.h fa4, fa0 ; RV32IZFH-NEXT: flt.h a0, fa4, fa5 ; RV32IZFH-NEXT: beqz a0, .LBB39_2 @@ -4490,8 +4500,9 @@ define half @test_floor_half(half %x) { ; RV64IFD-NEXT: ret ; CHECKIZFH-LABEL: test_floor_half: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: lui a0, %hi(.LCPI40_0) -; CHECKIZFH-NEXT: flh fa5, %lo(.LCPI40_0)(a0) +; CHECKIZFH-NEXT: li a0, 25 +; CHECKIZFH-NEXT: slli a0, a0, 10 +; CHECKIZFH-NEXT: fmv.h.x fa5, a0 ; CHECKIZFH-NEXT: fabs.h fa4, fa0 ; CHECKIZFH-NEXT: flt.h a0, fa4, fa5 ; CHECKIZFH-NEXT: beqz a0, .LBB40_2 @@ -4574,8 +4585,9 @@ define half @test_ceil_half(half %x) { ; RV64IFD-NEXT: ret ; CHECKIZFH-LABEL: test_ceil_half: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: lui a0, %hi(.LCPI41_0) -; CHECKIZFH-NEXT: flh fa5, %lo(.LCPI41_0)(a0) +; CHECKIZFH-NEXT: li a0, 25 +; CHECKIZFH-NEXT: slli a0, a0, 10 +; CHECKIZFH-NEXT: fmv.h.x fa5, a0 ; CHECKIZFH-NEXT: fabs.h fa4, fa0 ; CHECKIZFH-NEXT: flt.h a0, fa4, fa5 ; CHECKIZFH-NEXT: beqz a0, .LBB41_2 @@ -4658,8 +4670,9 @@ define half @test_trunc_half(half %x) { ; RV64IFD-NEXT: ret ; CHECKIZFH-LABEL: test_trunc_half: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: lui a0, %hi(.LCPI42_0) -; CHECKIZFH-NEXT: flh fa5, %lo(.LCPI42_0)(a0) +; CHECKIZFH-NEXT: li a0, 25 +; CHECKIZFH-NEXT: slli a0, a0, 10 +; CHECKIZFH-NEXT: fmv.h.x fa5, a0 ; CHECKIZFH-NEXT: fabs.h fa4, fa0 ; CHECKIZFH-NEXT: flt.h a0, fa4, fa5 ; CHECKIZFH-NEXT: beqz a0, .LBB42_2 @@ -4742,8 +4755,9 @@ define half @test_round_half(half %x) { ; RV64IFD-NEXT: ret ; CHECKIZFH-LABEL: test_round_half: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: lui a0, %hi(.LCPI43_0) -; CHECKIZFH-NEXT: flh fa5, %lo(.LCPI43_0)(a0) +; CHECKIZFH-NEXT: li a0, 25 +; CHECKIZFH-NEXT: slli a0, a0, 10 +; CHECKIZFH-NEXT: fmv.h.x fa5, a0 ; CHECKIZFH-NEXT: fabs.h fa4, fa0 ; CHECKIZFH-NEXT: flt.h a0, fa4, fa5 ; CHECKIZFH-NEXT: beqz a0, .LBB43_2 @@ -4826,8 +4840,9 @@ define half @test_roundeven_half(half %x) { ; RV64IFD-NEXT: ret ; CHECKIZFH-LABEL: test_roundeven_half: ; CHECKIZFH: # %bb.0: -; CHECKIZFH-NEXT: lui a0, %hi(.LCPI44_0) -; CHECKIZFH-NEXT: flh fa5, %lo(.LCPI44_0)(a0) +; CHECKIZFH-NEXT: li a0, 25 +; CHECKIZFH-NEXT: slli a0, a0, 10 +; CHECKIZFH-NEXT: fmv.h.x fa5, a0 ; CHECKIZFH-NEXT: fabs.h fa4, fa0 ; CHECKIZFH-NEXT: flt.h a0, fa4, fa5 ; CHECKIZFH-NEXT: beqz a0, .LBB44_2 diff --git a/llvm/test/CodeGen/RISCV/half-select-fcmp.ll b/llvm/test/CodeGen/RISCV/half-select-fcmp.ll index bf535b1cbd084..e9699502ed3a9 100644 --- a/llvm/test/CodeGen/RISCV/half-select-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/half-select-fcmp.ll @@ -878,8 +878,9 @@ define signext i32 @select_fcmp_uge_1_2(half %a, half %b) nounwind { define half @CascadedSelect(half noundef %a) { ; CHECK-LABEL: CascadedSelect: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI20_0)(a0) +; CHECK-NEXT: li a0, 15 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: flt.h a0, fa5, fa0 ; CHECK-NEXT: bnez a0, .LBB20_3 ; CHECK-NEXT: # %bb.1: # %entry @@ -910,23 +911,24 @@ define half @CascadedSelect(half noundef %a) { ; ; CHECKIZFHMIN-LABEL: CascadedSelect: ; CHECKIZFHMIN: # %bb.0: # %entry -; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI20_0) -; CHECKIZFHMIN-NEXT: flh fa5, %lo(.LCPI20_0)(a0) -; CHECKIZFHMIN-NEXT: fcvt.s.h fa3, fa5 -; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa0 -; CHECKIZFHMIN-NEXT: flt.s a0, fa3, fa4 -; CHECKIZFHMIN-NEXT: bnez a0, .LBB20_3 -; CHECKIZFHMIN-NEXT: # %bb.1: # %entry -; CHECKIZFHMIN-NEXT: fmv.w.x fa5, zero +; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa0 +; CHECKIZFHMIN-NEXT: lui a0, 260096 +; CHECKIZFHMIN-NEXT: fmv.w.x fa4, zero +; CHECKIZFHMIN-NEXT: flt.s a1, fa5, fa4 +; CHECKIZFHMIN-NEXT: fmv.w.x fa4, a0 ; CHECKIZFHMIN-NEXT: flt.s a0, fa4, fa5 +; CHECKIZFHMIN-NEXT: bnez a1, .LBB20_3 +; CHECKIZFHMIN-NEXT: # %bb.1: # %entry ; CHECKIZFHMIN-NEXT: bnez a0, .LBB20_4 -; CHECKIZFHMIN-NEXT: # %bb.2: # %entry -; CHECKIZFHMIN-NEXT: fmv.s fa5, fa0 -; CHECKIZFHMIN-NEXT: .LBB20_3: # %entry -; CHECKIZFHMIN-NEXT: fmv.s fa0, fa5 +; CHECKIZFHMIN-NEXT: .LBB20_2: # %entry ; CHECKIZFHMIN-NEXT: ret -; CHECKIZFHMIN-NEXT: .LBB20_4: +; CHECKIZFHMIN-NEXT: .LBB20_3: ; CHECKIZFHMIN-NEXT: fmv.h.x fa0, zero +; CHECKIZFHMIN-NEXT: beqz a0, .LBB20_2 +; CHECKIZFHMIN-NEXT: .LBB20_4: +; CHECKIZFHMIN-NEXT: li a0, 15 +; CHECKIZFHMIN-NEXT: slli a0, a0, 10 +; CHECKIZFHMIN-NEXT: fmv.h.x fa0, a0 ; CHECKIZFHMIN-NEXT: ret ; ; CHECKIZHINXMIN-LABEL: CascadedSelect: diff --git a/llvm/test/CodeGen/RISCV/half-zfa-fli.ll b/llvm/test/CodeGen/RISCV/half-zfa-fli.ll index 281a873235623..928535d79f02c 100644 --- a/llvm/test/CodeGen/RISCV/half-zfa-fli.ll +++ b/llvm/test/CodeGen/RISCV/half-zfa-fli.ll @@ -16,8 +16,9 @@ define half @loadfpimm1() { ; ; ZFHMIN-LABEL: loadfpimm1: ; ZFHMIN: # %bb.0: -; ZFHMIN-NEXT: lui a0, %hi(.LCPI0_0) -; ZFHMIN-NEXT: flh fa0, %lo(.LCPI0_0)(a0) +; ZFHMIN-NEXT: li a0, 11 +; ZFHMIN-NEXT: slli a0, a0, 10 +; ZFHMIN-NEXT: fmv.h.x fa0, a0 ; ZFHMIN-NEXT: ret ret half 0.0625 } @@ -30,8 +31,9 @@ define half @loadfpimm2() { ; ; ZFHMIN-LABEL: loadfpimm2: ; ZFHMIN: # %bb.0: -; ZFHMIN-NEXT: lui a0, %hi(.LCPI1_0) -; ZFHMIN-NEXT: flh fa0, %lo(.LCPI1_0)(a0) +; ZFHMIN-NEXT: li a0, 29 +; ZFHMIN-NEXT: slli a0, a0, 9 +; ZFHMIN-NEXT: fmv.h.x fa0, a0 ; ZFHMIN-NEXT: ret ret half 0.75 } @@ -44,8 +46,9 @@ define half @loadfpimm3() { ; ; ZFHMIN-LABEL: loadfpimm3: ; ZFHMIN: # %bb.0: -; ZFHMIN-NEXT: lui a0, %hi(.LCPI2_0) -; ZFHMIN-NEXT: flh fa0, %lo(.LCPI2_0)(a0) +; ZFHMIN-NEXT: lui a0, 4 +; ZFHMIN-NEXT: addi a0, a0, -768 +; ZFHMIN-NEXT: fmv.h.x fa0, a0 ; ZFHMIN-NEXT: ret ret half 1.25 } @@ -58,8 +61,9 @@ define half @loadfpimm4() { ; ; ZFHMIN-LABEL: loadfpimm4: ; ZFHMIN: # %bb.0: -; ZFHMIN-NEXT: lui a0, %hi(.LCPI3_0) -; ZFHMIN-NEXT: flh fa0, %lo(.LCPI3_0)(a0) +; ZFHMIN-NEXT: lui a0, 4 +; ZFHMIN-NEXT: addi a0, a0, 512 +; ZFHMIN-NEXT: fmv.h.x fa0, a0 ; ZFHMIN-NEXT: ret ret half 3.0 } @@ -72,8 +76,9 @@ define half @loadfpimm5() { ; ; ZFHMIN-LABEL: loadfpimm5: ; ZFHMIN: # %bb.0: -; ZFHMIN-NEXT: lui a0, %hi(.LCPI4_0) -; ZFHMIN-NEXT: flh fa0, %lo(.LCPI4_0)(a0) +; ZFHMIN-NEXT: li a0, 23 +; ZFHMIN-NEXT: slli a0, a0, 10 +; ZFHMIN-NEXT: fmv.h.x fa0, a0 ; ZFHMIN-NEXT: ret ret half 256.0 } @@ -86,8 +91,9 @@ define half @loadfpimm6() { ; ; ZFHMIN-LABEL: loadfpimm6: ; ZFHMIN: # %bb.0: -; ZFHMIN-NEXT: lui a0, %hi(.LCPI5_0) -; ZFHMIN-NEXT: flh fa0, %lo(.LCPI5_0)(a0) +; ZFHMIN-NEXT: li a0, 31 +; ZFHMIN-NEXT: slli a0, a0, 10 +; ZFHMIN-NEXT: fmv.h.x fa0, a0 ; ZFHMIN-NEXT: ret ret half 0xH7C00 } @@ -100,8 +106,9 @@ define half @loadfpimm7() { ; ; ZFHMIN-LABEL: loadfpimm7: ; ZFHMIN: # %bb.0: -; ZFHMIN-NEXT: lui a0, %hi(.LCPI6_0) -; ZFHMIN-NEXT: flh fa0, %lo(.LCPI6_0)(a0) +; ZFHMIN-NEXT: lui a0, 8 +; ZFHMIN-NEXT: addi a0, a0, -512 +; ZFHMIN-NEXT: fmv.h.x fa0, a0 ; ZFHMIN-NEXT: ret ret half 0xH7E00 } @@ -123,14 +130,16 @@ define half @loadfpimm8() { define half @loadfpimm9() { ; CHECK-LABEL: loadfpimm9: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI8_0) -; CHECK-NEXT: flh fa0, %lo(.LCPI8_0)(a0) +; CHECK-NEXT: lui a0, 6 +; CHECK-NEXT: addi a0, a0, -1032 +; CHECK-NEXT: fmv.h.x fa0, a0 ; CHECK-NEXT: ret ; ; ZFHMIN-LABEL: loadfpimm9: ; ZFHMIN: # %bb.0: -; ZFHMIN-NEXT: lui a0, %hi(.LCPI8_0) -; ZFHMIN-NEXT: flh fa0, %lo(.LCPI8_0)(a0) +; ZFHMIN-NEXT: lui a0, 6 +; ZFHMIN-NEXT: addi a0, a0, -1032 +; ZFHMIN-NEXT: fmv.h.x fa0, a0 ; ZFHMIN-NEXT: ret ret half 255.0 } @@ -169,14 +178,16 @@ define half @loadfpimm11() { define half @loadfpimm12() { ; CHECK-LABEL: loadfpimm12: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: flh fa0, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: addi a0, a0, -1023 +; CHECK-NEXT: fmv.h.x fa0, a0 ; CHECK-NEXT: ret ; ; ZFHMIN-LABEL: loadfpimm12: ; ZFHMIN: # %bb.0: -; ZFHMIN-NEXT: lui a0, %hi(.LCPI11_0) -; ZFHMIN-NEXT: flh fa0, %lo(.LCPI11_0)(a0) +; ZFHMIN-NEXT: lui a0, 8 +; ZFHMIN-NEXT: addi a0, a0, -1023 +; ZFHMIN-NEXT: fmv.h.x fa0, a0 ; ZFHMIN-NEXT: ret ret half 0xH7c01 } @@ -189,8 +200,9 @@ define half @loadfpimm13() { ; ; ZFHMIN-LABEL: loadfpimm13: ; ZFHMIN: # %bb.0: -; ZFHMIN-NEXT: lui a0, %hi(.LCPI12_0) -; ZFHMIN-NEXT: flh fa0, %lo(.LCPI12_0)(a0) +; ZFHMIN-NEXT: li a0, -17 +; ZFHMIN-NEXT: slli a0, a0, 10 +; ZFHMIN-NEXT: fmv.h.x fa0, a0 ; ZFHMIN-NEXT: ret ret half -1.0 } @@ -222,8 +234,9 @@ define half @loadfpimm15() { ; ; ZFHMIN-LABEL: loadfpimm15: ; ZFHMIN: # %bb.0: -; ZFHMIN-NEXT: lui a0, %hi(.LCPI14_0) -; ZFHMIN-NEXT: flh fa0, %lo(.LCPI14_0)(a0) +; ZFHMIN-NEXT: li a0, -31 +; ZFHMIN-NEXT: slli a0, a0, 10 +; ZFHMIN-NEXT: fmv.h.x fa0, a0 ; ZFHMIN-NEXT: ret ret half 0xH8400 } diff --git a/llvm/test/CodeGen/RISCV/half-zfa.ll b/llvm/test/CodeGen/RISCV/half-zfa.ll index 960c7c4a73e4f..90c66e7fe2ca4 100644 --- a/llvm/test/CodeGen/RISCV/half-zfa.ll +++ b/llvm/test/CodeGen/RISCV/half-zfa.ll @@ -350,12 +350,15 @@ define half @select_loadfpimm(half %x) nounwind { ; ZFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZFHMIN-NEXT: fmv.w.x fa4, zero ; ZFHMIN-NEXT: fle.s a0, fa4, fa5 -; ZFHMIN-NEXT: xori a0, a0, 1 -; ZFHMIN-NEXT: slli a0, a0, 1 -; ZFHMIN-NEXT: lui a1, %hi(.LCPI17_0) -; ZFHMIN-NEXT: addi a1, a1, %lo(.LCPI17_0) -; ZFHMIN-NEXT: add a0, a1, a0 -; ZFHMIN-NEXT: flh fa0, 0(a0) +; ZFHMIN-NEXT: beqz a0, .LBB17_2 +; ZFHMIN-NEXT: # %bb.1: # %entry +; ZFHMIN-NEXT: li a0, 7 +; ZFHMIN-NEXT: j .LBB17_3 +; ZFHMIN-NEXT: .LBB17_2: +; ZFHMIN-NEXT: li a0, -9 +; ZFHMIN-NEXT: .LBB17_3: # %entry +; ZFHMIN-NEXT: slli a0, a0, 11 +; ZFHMIN-NEXT: fmv.h.x fa0, a0 ; ZFHMIN-NEXT: ret entry: %cmp = fcmp ult half %x, 0.000000e+00 diff --git a/llvm/test/CodeGen/RISCV/repeated-fp-divisors.ll b/llvm/test/CodeGen/RISCV/repeated-fp-divisors.ll index f183c936fc672..f3b4319ccc4fa 100644 --- a/llvm/test/CodeGen/RISCV/repeated-fp-divisors.ll +++ b/llvm/test/CodeGen/RISCV/repeated-fp-divisors.ll @@ -17,8 +17,9 @@ entry: define void @two_fdivs(double %a0, double %a1, double %a2, ptr %res) { ; CHECK-LABEL: two_fdivs: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lui a1, %hi(.LCPI1_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI1_0)(a1) +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: slli a1, a1, 52 +; CHECK-NEXT: fmv.d.x fa5, a1 ; CHECK-NEXT: fdiv.d fa5, fa5, fa0 ; CHECK-NEXT: fmul.d fa4, fa1, fa5 ; CHECK-NEXT: fmul.d fa5, fa2, fa5 diff --git a/llvm/test/CodeGen/RISCV/rv64-double-convert.ll b/llvm/test/CodeGen/RISCV/rv64-double-convert.ll index caa6c2f8ff96f..a919452389c43 100644 --- a/llvm/test/CodeGen/RISCV/rv64-double-convert.ll +++ b/llvm/test/CodeGen/RISCV/rv64-double-convert.ll @@ -122,9 +122,10 @@ define i128 @fptosi_sat_f64_to_i128(double %a) nounwind { ; RV64ID-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64ID-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64ID-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill -; RV64ID-NEXT: lui a0, %hi(.LCPI4_0) -; RV64ID-NEXT: fld fa5, %lo(.LCPI4_0)(a0) ; RV64ID-NEXT: fmv.d fs0, fa0 +; RV64ID-NEXT: li a0, -449 +; RV64ID-NEXT: slli a0, a0, 53 +; RV64ID-NEXT: fmv.d.x fa5, a0 ; RV64ID-NEXT: fle.d s0, fa5, fa0 ; RV64ID-NEXT: call __fixdfti ; RV64ID-NEXT: li a2, -1 @@ -132,8 +133,8 @@ define i128 @fptosi_sat_f64_to_i128(double %a) nounwind { ; RV64ID-NEXT: # %bb.1: ; RV64ID-NEXT: slli a1, a2, 63 ; RV64ID-NEXT: .LBB4_2: -; RV64ID-NEXT: lui a3, %hi(.LCPI4_1) -; RV64ID-NEXT: fld fa5, %lo(.LCPI4_1)(a3) +; RV64ID-NEXT: lui a3, %hi(.LCPI4_0) +; RV64ID-NEXT: fld fa5, %lo(.LCPI4_0)(a3) ; RV64ID-NEXT: flt.d a3, fa5, fs0 ; RV64ID-NEXT: beqz a3, .LBB4_4 ; RV64ID-NEXT: # %bb.3: @@ -170,16 +171,17 @@ define i128 @fptosi_sat_f64_to_i128(double %a) nounwind { ; RV64IDINX-NEXT: # %bb.1: ; RV64IDINX-NEXT: slli a1, a2, 63 ; RV64IDINX-NEXT: .LBB4_2: -; RV64IDINX-NEXT: lui a3, %hi(.LCPI4_0) -; RV64IDINX-NEXT: ld a3, %lo(.LCPI4_0)(a3) +; RV64IDINX-NEXT: li a3, 575 +; RV64IDINX-NEXT: slli a3, a3, 53 +; RV64IDINX-NEXT: addi a3, a3, -1 ; RV64IDINX-NEXT: flt.d a3, a3, s0 ; RV64IDINX-NEXT: beqz a3, .LBB4_4 ; RV64IDINX-NEXT: # %bb.3: ; RV64IDINX-NEXT: srli a1, a2, 1 ; RV64IDINX-NEXT: .LBB4_4: ; RV64IDINX-NEXT: feq.d a2, s0, s0 -; RV64IDINX-NEXT: neg a3, a3 ; RV64IDINX-NEXT: neg a4, s1 +; RV64IDINX-NEXT: neg a3, a3 ; RV64IDINX-NEXT: neg a2, a2 ; RV64IDINX-NEXT: and a0, a4, a0 ; RV64IDINX-NEXT: and a1, a2, a1 @@ -267,10 +269,11 @@ define i128 @fptoui_sat_f64_to_i128(double %a) nounwind { ; RV64IDINX-NEXT: neg s1, a0 ; RV64IDINX-NEXT: mv a0, s0 ; RV64IDINX-NEXT: call __fixunsdfti -; RV64IDINX-NEXT: lui a2, %hi(.LCPI5_0) -; RV64IDINX-NEXT: ld a2, %lo(.LCPI5_0)(a2) ; RV64IDINX-NEXT: and a0, s1, a0 +; RV64IDINX-NEXT: li a2, 1151 ; RV64IDINX-NEXT: and a1, s1, a1 +; RV64IDINX-NEXT: slli a2, a2, 52 +; RV64IDINX-NEXT: addi a2, a2, -1 ; RV64IDINX-NEXT: flt.d a2, a2, s0 ; RV64IDINX-NEXT: neg a2, a2 ; RV64IDINX-NEXT: or a0, a2, a0 diff --git a/llvm/test/CodeGen/RISCV/rv64-float-convert.ll b/llvm/test/CodeGen/RISCV/rv64-float-convert.ll index ebda78528810f..0af75a789f7a2 100644 --- a/llvm/test/CodeGen/RISCV/rv64-float-convert.ll +++ b/llvm/test/CodeGen/RISCV/rv64-float-convert.ll @@ -130,16 +130,17 @@ define i128 @fptosi_sat_f32_to_i128(float %a) nounwind { ; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: slli a1, a2, 63 ; RV64IF-NEXT: .LBB4_2: -; RV64IF-NEXT: lui a3, %hi(.LCPI4_0) -; RV64IF-NEXT: flw fa5, %lo(.LCPI4_0)(a3) +; RV64IF-NEXT: lui a3, 520192 +; RV64IF-NEXT: addi a3, a3, -1 +; RV64IF-NEXT: fmv.w.x fa5, a3 ; RV64IF-NEXT: flt.s a3, fa5, fs0 ; RV64IF-NEXT: beqz a3, .LBB4_4 ; RV64IF-NEXT: # %bb.3: ; RV64IF-NEXT: srli a1, a2, 1 ; RV64IF-NEXT: .LBB4_4: ; RV64IF-NEXT: feq.s a2, fs0, fs0 -; RV64IF-NEXT: neg a3, a3 ; RV64IF-NEXT: neg a4, s0 +; RV64IF-NEXT: neg a3, a3 ; RV64IF-NEXT: neg a2, a2 ; RV64IF-NEXT: and a0, a4, a0 ; RV64IF-NEXT: and a1, a2, a1 @@ -235,10 +236,11 @@ define i128 @fptoui_sat_f32_to_i128(float %a) nounwind { ; RV64IF-NEXT: fle.s a0, fa5, fa0 ; RV64IF-NEXT: neg s0, a0 ; RV64IF-NEXT: call __fixunssfti -; RV64IF-NEXT: lui a2, %hi(.LCPI5_0) -; RV64IF-NEXT: flw fa5, %lo(.LCPI5_0)(a2) ; RV64IF-NEXT: and a0, s0, a0 +; RV64IF-NEXT: lui a2, 522240 ; RV64IF-NEXT: and a1, s0, a1 +; RV64IF-NEXT: addi a2, a2, -1 +; RV64IF-NEXT: fmv.w.x fa5, a2 ; RV64IF-NEXT: flt.s a2, fa5, fs0 ; RV64IF-NEXT: neg a2, a2 ; RV64IF-NEXT: or a0, a2, a0 diff --git a/llvm/test/CodeGen/RISCV/rv64-half-convert.ll b/llvm/test/CodeGen/RISCV/rv64-half-convert.ll index 648f3789953aa..d8f3816b85485 100644 --- a/llvm/test/CodeGen/RISCV/rv64-half-convert.ll +++ b/llvm/test/CodeGen/RISCV/rv64-half-convert.ll @@ -208,16 +208,17 @@ define i128 @fptosi_sat_f16_to_i128(half %a) nounwind { ; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: slli a1, a2, 63 ; RV64IZFH-NEXT: .LBB4_2: -; RV64IZFH-NEXT: lui a3, %hi(.LCPI4_0) -; RV64IZFH-NEXT: flw fa5, %lo(.LCPI4_0)(a3) +; RV64IZFH-NEXT: lui a3, 520192 +; RV64IZFH-NEXT: addi a3, a3, -1 +; RV64IZFH-NEXT: fmv.w.x fa5, a3 ; RV64IZFH-NEXT: flt.s a3, fa5, fs0 ; RV64IZFH-NEXT: beqz a3, .LBB4_4 ; RV64IZFH-NEXT: # %bb.3: ; RV64IZFH-NEXT: srli a1, a2, 1 ; RV64IZFH-NEXT: .LBB4_4: ; RV64IZFH-NEXT: feq.s a2, fs0, fs0 -; RV64IZFH-NEXT: neg a3, a3 ; RV64IZFH-NEXT: neg a4, s0 +; RV64IZFH-NEXT: neg a3, a3 ; RV64IZFH-NEXT: neg a2, a2 ; RV64IZFH-NEXT: and a0, a4, a0 ; RV64IZFH-NEXT: and a1, a2, a1 @@ -308,23 +309,25 @@ define i128 @fptoui_sat_f16_to_i128(half %a) nounwind { ; RV64IZFH-NEXT: addi sp, sp, -32 ; RV64IZFH-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: lui a0, %hi(.LCPI5_0) -; RV64IZFH-NEXT: flw fa5, %lo(.LCPI5_0)(a0) -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: fmv.w.x fa4, zero -; RV64IZFH-NEXT: fle.s a0, fa4, fa0 -; RV64IZFH-NEXT: flt.s a1, fa5, fa0 -; RV64IZFH-NEXT: neg s0, a1 -; RV64IZFH-NEXT: neg s1, a0 +; RV64IZFH-NEXT: fsw fs0, 12(sp) # 4-byte Folded Spill +; RV64IZFH-NEXT: fcvt.s.h fs0, fa0 +; RV64IZFH-NEXT: fmv.w.x fa5, zero +; RV64IZFH-NEXT: fle.s a0, fa5, fs0 +; RV64IZFH-NEXT: neg s0, a0 +; RV64IZFH-NEXT: fmv.s fa0, fs0 ; RV64IZFH-NEXT: call __fixunssfti -; RV64IZFH-NEXT: and a0, s1, a0 -; RV64IZFH-NEXT: and a1, s1, a1 -; RV64IZFH-NEXT: or a0, s0, a0 -; RV64IZFH-NEXT: or a1, s0, a1 +; RV64IZFH-NEXT: and a0, s0, a0 +; RV64IZFH-NEXT: lui a2, 522240 +; RV64IZFH-NEXT: and a1, s0, a1 +; RV64IZFH-NEXT: addi a2, a2, -1 +; RV64IZFH-NEXT: fmv.w.x fa5, a2 +; RV64IZFH-NEXT: flt.s a2, fa5, fs0 +; RV64IZFH-NEXT: neg a2, a2 +; RV64IZFH-NEXT: or a0, a2, a0 +; RV64IZFH-NEXT: or a1, a2, a1 ; RV64IZFH-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64IZFH-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64IZFH-NEXT: flw fs0, 12(sp) # 4-byte Folded Reload ; RV64IZFH-NEXT: addi sp, sp, 32 ; RV64IZFH-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll index 2fe8c8ce7975a..6507349f45a2f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll @@ -1,16 +1,16 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV32ZFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV64ZFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV32ZFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV64ZFH declare @llvm.vp.ceil.nxv1bf16(, , i32) @@ -407,10 +407,11 @@ declare @llvm.vp.ceil.nxv1f16(, @vp_ceil_vv_nxv1f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 3 @@ -453,10 +454,11 @@ define @vp_ceil_vv_nxv1f16( %va, @vp_ceil_vv_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -493,10 +495,11 @@ declare @llvm.vp.ceil.nxv2f16(, @vp_ceil_vv_nxv2f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 3 @@ -539,10 +542,11 @@ define @vp_ceil_vv_nxv2f16( %va, @vp_ceil_vv_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -579,10 +583,11 @@ declare @llvm.vp.ceil.nxv4f16(, @vp_ceil_vv_nxv4f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 3 @@ -625,10 +630,11 @@ define @vp_ceil_vv_nxv4f16( %va, @vp_ceil_vv_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -667,9 +673,10 @@ define @vp_ceil_vv_nxv8f16( %va, @vp_ceil_vv_nxv8f16( %va, @vp_ceil_vv_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -755,9 +763,10 @@ define @vp_ceil_vv_nxv16f16( %va, @vp_ceil_vv_nxv16f16( %va, @vp_ceil_vv_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -843,9 +853,10 @@ define @vp_ceil_vv_nxv32f16( %va, @vp_ceil_vv_nxv32f16( %va, @vp_ceil_vv_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -1210,41 +1222,75 @@ define @vp_ceil_vv_nxv16f32_unmasked( declare @llvm.vp.ceil.nxv1f64(, , i32) define @vp_ceil_vv_nxv1f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_vv_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI34_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: vp_ceil_vv_nxv1f64: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: lui a1, %hi(.LCPI34_0) +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI34_0)(a1) +; RV32ZFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZFH-NEXT: vfabs.v v9, v8, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZFH-NEXT: fsrmi a0, 3 +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: vp_ceil_vv_nxv1f64: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZFH-NEXT: vfabs.v v9, v8, v0.t +; RV64ZFH-NEXT: li a0, 1075 +; RV64ZFH-NEXT: slli a0, a0, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a0 +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZFH-NEXT: fsrmi a0, 3 +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZFH-NEXT: ret %v = call @llvm.vp.ceil.nxv1f64( %va, %m, i32 %evl) ret %v } define @vp_ceil_vv_nxv1f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_vv_nxv1f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI35_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: vp_ceil_vv_nxv1f64_unmasked: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: lui a1, %hi(.LCPI35_0) +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI35_0)(a1) +; RV32ZFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZFH-NEXT: vfabs.v v9, v8 +; RV32ZFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZFH-NEXT: fsrmi a0, 3 +; RV32ZFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: vp_ceil_vv_nxv1f64_unmasked: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZFH-NEXT: vfabs.v v9, v8 +; RV64ZFH-NEXT: li a0, 1075 +; RV64ZFH-NEXT: slli a0, a0, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a0 +; RV64ZFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZFH-NEXT: fsrmi a0, 3 +; RV64ZFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZFH-NEXT: ret %v = call @llvm.vp.ceil.nxv1f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1252,43 +1298,79 @@ define @vp_ceil_vv_nxv1f64_unmasked( declare @llvm.vp.ceil.nxv2f64(, , i32) define @vp_ceil_vv_nxv2f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_vv_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI36_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: vp_ceil_vv_nxv2f64: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZFH-NEXT: vmv1r.v v10, v0 +; RV32ZFH-NEXT: lui a0, %hi(.LCPI36_0) +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI36_0)(a0) +; RV32ZFH-NEXT: vfabs.v v12, v8, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZFH-NEXT: fsrmi a0, 3 +; RV32ZFH-NEXT: vmv1r.v v0, v10 +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: vp_ceil_vv_nxv2f64: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZFH-NEXT: vmv1r.v v10, v0 +; RV64ZFH-NEXT: vfabs.v v12, v8, v0.t +; RV64ZFH-NEXT: li a0, 1075 +; RV64ZFH-NEXT: slli a0, a0, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a0 +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZFH-NEXT: fsrmi a0, 3 +; RV64ZFH-NEXT: vmv1r.v v0, v10 +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZFH-NEXT: ret %v = call @llvm.vp.ceil.nxv2f64( %va, %m, i32 %evl) ret %v } define @vp_ceil_vv_nxv2f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_vv_nxv2f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI37_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: vp_ceil_vv_nxv2f64_unmasked: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: lui a1, %hi(.LCPI37_0) +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI37_0)(a1) +; RV32ZFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZFH-NEXT: vfabs.v v10, v8 +; RV32ZFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZFH-NEXT: fsrmi a0, 3 +; RV32ZFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: vp_ceil_vv_nxv2f64_unmasked: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZFH-NEXT: vfabs.v v10, v8 +; RV64ZFH-NEXT: li a0, 1075 +; RV64ZFH-NEXT: slli a0, a0, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a0 +; RV64ZFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZFH-NEXT: fsrmi a0, 3 +; RV64ZFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZFH-NEXT: ret %v = call @llvm.vp.ceil.nxv2f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1296,43 +1378,79 @@ define @vp_ceil_vv_nxv2f64_unmasked( declare @llvm.vp.ceil.nxv4f64(, , i32) define @vp_ceil_vv_nxv4f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_vv_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: vp_ceil_vv_nxv4f64: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZFH-NEXT: vmv1r.v v12, v0 +; RV32ZFH-NEXT: lui a0, %hi(.LCPI38_0) +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI38_0)(a0) +; RV32ZFH-NEXT: vfabs.v v16, v8, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZFH-NEXT: fsrmi a0, 3 +; RV32ZFH-NEXT: vmv1r.v v0, v12 +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: vp_ceil_vv_nxv4f64: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZFH-NEXT: vmv1r.v v12, v0 +; RV64ZFH-NEXT: vfabs.v v16, v8, v0.t +; RV64ZFH-NEXT: li a0, 1075 +; RV64ZFH-NEXT: slli a0, a0, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a0 +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZFH-NEXT: fsrmi a0, 3 +; RV64ZFH-NEXT: vmv1r.v v0, v12 +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZFH-NEXT: ret %v = call @llvm.vp.ceil.nxv4f64( %va, %m, i32 %evl) ret %v } define @vp_ceil_vv_nxv4f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_vv_nxv4f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI39_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: vp_ceil_vv_nxv4f64_unmasked: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: lui a1, %hi(.LCPI39_0) +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI39_0)(a1) +; RV32ZFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZFH-NEXT: vfabs.v v12, v8 +; RV32ZFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZFH-NEXT: fsrmi a0, 3 +; RV32ZFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: vp_ceil_vv_nxv4f64_unmasked: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZFH-NEXT: vfabs.v v12, v8 +; RV64ZFH-NEXT: li a0, 1075 +; RV64ZFH-NEXT: slli a0, a0, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a0 +; RV64ZFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZFH-NEXT: fsrmi a0, 3 +; RV64ZFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZFH-NEXT: ret %v = call @llvm.vp.ceil.nxv4f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1340,43 +1458,79 @@ define @vp_ceil_vv_nxv4f64_unmasked( declare @llvm.vp.ceil.nxv7f64(, , i32) define @vp_ceil_vv_nxv7f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_vv_nxv7f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI40_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: vp_ceil_vv_nxv7f64: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZFH-NEXT: vmv1r.v v16, v0 +; RV32ZFH-NEXT: lui a0, %hi(.LCPI40_0) +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI40_0)(a0) +; RV32ZFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZFH-NEXT: fsrmi a0, 3 +; RV32ZFH-NEXT: vmv1r.v v0, v16 +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: vp_ceil_vv_nxv7f64: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZFH-NEXT: vmv1r.v v16, v0 +; RV64ZFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZFH-NEXT: li a0, 1075 +; RV64ZFH-NEXT: slli a0, a0, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a0 +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZFH-NEXT: fsrmi a0, 3 +; RV64ZFH-NEXT: vmv1r.v v0, v16 +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZFH-NEXT: ret %v = call @llvm.vp.ceil.nxv7f64( %va, %m, i32 %evl) ret %v } define @vp_ceil_vv_nxv7f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_vv_nxv7f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI41_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: vp_ceil_vv_nxv7f64_unmasked: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: lui a1, %hi(.LCPI41_0) +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI41_0)(a1) +; RV32ZFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZFH-NEXT: vfabs.v v16, v8 +; RV32ZFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZFH-NEXT: fsrmi a0, 3 +; RV32ZFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: vp_ceil_vv_nxv7f64_unmasked: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZFH-NEXT: vfabs.v v16, v8 +; RV64ZFH-NEXT: li a0, 1075 +; RV64ZFH-NEXT: slli a0, a0, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a0 +; RV64ZFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZFH-NEXT: fsrmi a0, 3 +; RV64ZFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZFH-NEXT: ret %v = call @llvm.vp.ceil.nxv7f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1384,43 +1538,79 @@ define @vp_ceil_vv_nxv7f64_unmasked( declare @llvm.vp.ceil.nxv8f64(, , i32) define @vp_ceil_vv_nxv8f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_vv_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI42_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: vp_ceil_vv_nxv8f64: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZFH-NEXT: vmv1r.v v16, v0 +; RV32ZFH-NEXT: lui a0, %hi(.LCPI42_0) +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI42_0)(a0) +; RV32ZFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZFH-NEXT: fsrmi a0, 3 +; RV32ZFH-NEXT: vmv1r.v v0, v16 +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: vp_ceil_vv_nxv8f64: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZFH-NEXT: vmv1r.v v16, v0 +; RV64ZFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZFH-NEXT: li a0, 1075 +; RV64ZFH-NEXT: slli a0, a0, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a0 +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZFH-NEXT: fsrmi a0, 3 +; RV64ZFH-NEXT: vmv1r.v v0, v16 +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZFH-NEXT: ret %v = call @llvm.vp.ceil.nxv8f64( %va, %m, i32 %evl) ret %v } define @vp_ceil_vv_nxv8f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_vv_nxv8f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI43_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: vp_ceil_vv_nxv8f64_unmasked: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: lui a1, %hi(.LCPI43_0) +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI43_0)(a1) +; RV32ZFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZFH-NEXT: vfabs.v v16, v8 +; RV32ZFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZFH-NEXT: fsrmi a0, 3 +; RV32ZFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: vp_ceil_vv_nxv8f64_unmasked: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZFH-NEXT: vfabs.v v16, v8 +; RV64ZFH-NEXT: li a0, 1075 +; RV64ZFH-NEXT: slli a0, a0, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a0 +; RV64ZFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZFH-NEXT: fsrmi a0, 3 +; RV64ZFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZFH-NEXT: ret %v = call @llvm.vp.ceil.nxv8f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1429,87 +1619,167 @@ define @vp_ceil_vv_nxv8f64_unmasked( declare @llvm.vp.ceil.nxv16f64(, , i32) define @vp_ceil_vv_nxv16f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_vv_nxv16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI44_0) -; CHECK-NEXT: srli a3, a1, 3 -; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) -; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 -; CHECK-NEXT: sltu a3, a0, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a2, 3 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB44_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: vp_ceil_vv_nxv16f64: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32ZFH-NEXT: vmv1r.v v7, v0 +; RV32ZFH-NEXT: csrr a1, vlenb +; RV32ZFH-NEXT: lui a2, %hi(.LCPI44_0) +; RV32ZFH-NEXT: srli a3, a1, 3 +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI44_0)(a2) +; RV32ZFH-NEXT: sub a2, a0, a1 +; RV32ZFH-NEXT: vslidedown.vx v6, v0, a3 +; RV32ZFH-NEXT: sltu a3, a0, a2 +; RV32ZFH-NEXT: addi a3, a3, -1 +; RV32ZFH-NEXT: and a2, a3, a2 +; RV32ZFH-NEXT: vmv1r.v v0, v6 +; RV32ZFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZFH-NEXT: vfabs.v v24, v16, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZFH-NEXT: fsrmi a2, 3 +; RV32ZFH-NEXT: vmv1r.v v0, v6 +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZFH-NEXT: fsrm a2 +; RV32ZFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZFH-NEXT: bltu a0, a1, .LBB44_2 +; RV32ZFH-NEXT: # %bb.1: +; RV32ZFH-NEXT: mv a0, a1 +; RV32ZFH-NEXT: .LBB44_2: +; RV32ZFH-NEXT: vmv1r.v v0, v7 +; RV32ZFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZFH-NEXT: fsrmi a0, 3 +; RV32ZFH-NEXT: vmv1r.v v0, v7 +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: vp_ceil_vv_nxv16f64: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64ZFH-NEXT: vmv1r.v v7, v0 +; RV64ZFH-NEXT: csrr a1, vlenb +; RV64ZFH-NEXT: li a2, 1075 +; RV64ZFH-NEXT: srli a3, a1, 3 +; RV64ZFH-NEXT: vslidedown.vx v6, v0, a3 +; RV64ZFH-NEXT: sub a3, a0, a1 +; RV64ZFH-NEXT: slli a2, a2, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a2 +; RV64ZFH-NEXT: sltu a2, a0, a3 +; RV64ZFH-NEXT: addi a2, a2, -1 +; RV64ZFH-NEXT: and a2, a2, a3 +; RV64ZFH-NEXT: vmv1r.v v0, v6 +; RV64ZFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZFH-NEXT: vfabs.v v24, v16, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZFH-NEXT: fsrmi a2, 3 +; RV64ZFH-NEXT: vmv1r.v v0, v6 +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZFH-NEXT: fsrm a2 +; RV64ZFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZFH-NEXT: bltu a0, a1, .LBB44_2 +; RV64ZFH-NEXT: # %bb.1: +; RV64ZFH-NEXT: mv a0, a1 +; RV64ZFH-NEXT: .LBB44_2: +; RV64ZFH-NEXT: vmv1r.v v0, v7 +; RV64ZFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZFH-NEXT: fsrmi a0, 3 +; RV64ZFH-NEXT: vmv1r.v v0, v7 +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZFH-NEXT: ret %v = call @llvm.vp.ceil.nxv16f64( %va, %m, i32 %evl) ret %v } define @vp_ceil_vv_nxv16f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_vv_nxv16f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI45_0) -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a2) -; CHECK-NEXT: sltu a2, a0, a3 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a2, 3 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB45_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB45_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: vp_ceil_vv_nxv16f64_unmasked: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: csrr a1, vlenb +; RV32ZFH-NEXT: lui a2, %hi(.LCPI45_0) +; RV32ZFH-NEXT: sub a3, a0, a1 +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI45_0)(a2) +; RV32ZFH-NEXT: sltu a2, a0, a3 +; RV32ZFH-NEXT: addi a2, a2, -1 +; RV32ZFH-NEXT: and a2, a2, a3 +; RV32ZFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZFH-NEXT: vfabs.v v24, v16 +; RV32ZFH-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZFH-NEXT: fsrmi a2, 3 +; RV32ZFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZFH-NEXT: fsrm a2 +; RV32ZFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZFH-NEXT: bltu a0, a1, .LBB45_2 +; RV32ZFH-NEXT: # %bb.1: +; RV32ZFH-NEXT: mv a0, a1 +; RV32ZFH-NEXT: .LBB45_2: +; RV32ZFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZFH-NEXT: vfabs.v v24, v8 +; RV32ZFH-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZFH-NEXT: fsrmi a0, 3 +; RV32ZFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: vp_ceil_vv_nxv16f64_unmasked: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: csrr a1, vlenb +; RV64ZFH-NEXT: li a2, 1075 +; RV64ZFH-NEXT: sub a3, a0, a1 +; RV64ZFH-NEXT: slli a2, a2, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a2 +; RV64ZFH-NEXT: sltu a2, a0, a3 +; RV64ZFH-NEXT: addi a2, a2, -1 +; RV64ZFH-NEXT: and a2, a2, a3 +; RV64ZFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZFH-NEXT: vfabs.v v24, v16 +; RV64ZFH-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZFH-NEXT: fsrmi a2, 3 +; RV64ZFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZFH-NEXT: fsrm a2 +; RV64ZFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZFH-NEXT: bltu a0, a1, .LBB45_2 +; RV64ZFH-NEXT: # %bb.1: +; RV64ZFH-NEXT: mv a0, a1 +; RV64ZFH-NEXT: .LBB45_2: +; RV64ZFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZFH-NEXT: vfabs.v v24, v8 +; RV64ZFH-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZFH-NEXT: fsrmi a0, 3 +; RV64ZFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZFH-NEXT: ret %v = call @llvm.vp.ceil.nxv16f64( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll index 8c63c2d4be8c1..51dc7b0714d7f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll @@ -32,10 +32,11 @@ define @trunc_nxv1f64_to_si8( %x) { ; ; RV64-LABEL: trunc_nxv1f64_to_si8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI0_0) -; RV64-NEXT: fld fa5, %lo(.LCPI0_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -75,10 +76,11 @@ define @trunc_nxv1f64_to_ui8( %x) { ; ; RV64-LABEL: trunc_nxv1f64_to_ui8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI1_0) -; RV64-NEXT: fld fa5, %lo(.LCPI1_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -116,10 +118,11 @@ define @trunc_nxv1f64_to_si16( %x) { ; ; RV64-LABEL: trunc_nxv1f64_to_si16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI2_0) -; RV64-NEXT: fld fa5, %lo(.LCPI2_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -155,10 +158,11 @@ define @trunc_nxv1f64_to_ui16( %x) { ; ; RV64-LABEL: trunc_nxv1f64_to_ui16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI3_0) -; RV64-NEXT: fld fa5, %lo(.LCPI3_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -274,10 +278,11 @@ define @trunc_nxv4f64_to_si8( %x) { ; ; RV64-LABEL: trunc_nxv4f64_to_si8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI8_0) -; RV64-NEXT: fld fa5, %lo(.LCPI8_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -317,10 +322,11 @@ define @trunc_nxv4f64_to_ui8( %x) { ; ; RV64-LABEL: trunc_nxv4f64_to_ui8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI9_0) -; RV64-NEXT: fld fa5, %lo(.LCPI9_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -358,10 +364,11 @@ define @trunc_nxv4f64_to_si16( %x) { ; ; RV64-LABEL: trunc_nxv4f64_to_si16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI10_0) -; RV64-NEXT: fld fa5, %lo(.LCPI10_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -397,10 +404,11 @@ define @trunc_nxv4f64_to_ui16( %x) { ; ; RV64-LABEL: trunc_nxv4f64_to_ui16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI11_0) -; RV64-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -518,10 +526,11 @@ define @ceil_nxv1f64_to_si8( %x) { ; ; RV64-LABEL: ceil_nxv1f64_to_si8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI16_0) -; RV64-NEXT: fld fa5, %lo(.LCPI16_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -565,10 +574,11 @@ define @ceil_nxv1f64_to_ui8( %x) { ; ; RV64-LABEL: ceil_nxv1f64_to_ui8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI17_0) -; RV64-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -610,10 +620,11 @@ define @ceil_nxv1f64_to_si16( %x) { ; ; RV64-LABEL: ceil_nxv1f64_to_si16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI18_0) -; RV64-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -653,10 +664,11 @@ define @ceil_nxv1f64_to_ui16( %x) { ; ; RV64-LABEL: ceil_nxv1f64_to_ui16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI19_0) -; RV64-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -792,10 +804,11 @@ define @ceil_nxv4f64_to_si8( %x) { ; ; RV64-LABEL: ceil_nxv4f64_to_si8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI24_0) -; RV64-NEXT: fld fa5, %lo(.LCPI24_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -839,10 +852,11 @@ define @ceil_nxv4f64_to_ui8( %x) { ; ; RV64-LABEL: ceil_nxv4f64_to_ui8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI25_0) -; RV64-NEXT: fld fa5, %lo(.LCPI25_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -884,10 +898,11 @@ define @ceil_nxv4f64_to_si16( %x) { ; ; RV64-LABEL: ceil_nxv4f64_to_si16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI26_0) -; RV64-NEXT: fld fa5, %lo(.LCPI26_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -927,10 +942,11 @@ define @ceil_nxv4f64_to_ui16( %x) { ; ; RV64-LABEL: ceil_nxv4f64_to_ui16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI27_0) -; RV64-NEXT: fld fa5, %lo(.LCPI27_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -1064,10 +1080,11 @@ define @rint_nxv1f64_to_si8( %x) { ; ; RV64-LABEL: rint_nxv1f64_to_si8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI32_0) -; RV64-NEXT: fld fa5, %lo(.LCPI32_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1107,10 +1124,11 @@ define @rint_nxv1f64_to_ui8( %x) { ; ; RV64-LABEL: rint_nxv1f64_to_ui8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI33_0) -; RV64-NEXT: fld fa5, %lo(.LCPI33_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1148,10 +1166,11 @@ define @rint_nxv1f64_to_si16( %x) { ; ; RV64-LABEL: rint_nxv1f64_to_si16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI34_0) -; RV64-NEXT: fld fa5, %lo(.LCPI34_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1187,10 +1206,11 @@ define @rint_nxv1f64_to_ui16( %x) { ; ; RV64-LABEL: rint_nxv1f64_to_ui16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI35_0) -; RV64-NEXT: fld fa5, %lo(.LCPI35_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1306,10 +1326,11 @@ define @rint_nxv4f64_to_si8( %x) { ; ; RV64-LABEL: rint_nxv4f64_to_si8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI40_0) -; RV64-NEXT: fld fa5, %lo(.LCPI40_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -1349,10 +1370,11 @@ define @rint_nxv4f64_to_ui8( %x) { ; ; RV64-LABEL: rint_nxv4f64_to_ui8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI41_0) -; RV64-NEXT: fld fa5, %lo(.LCPI41_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -1390,10 +1412,11 @@ define @rint_nxv4f64_to_si16( %x) { ; ; RV64-LABEL: rint_nxv4f64_to_si16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI42_0) -; RV64-NEXT: fld fa5, %lo(.LCPI42_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -1429,10 +1452,11 @@ define @rint_nxv4f64_to_ui16( %x) { ; ; RV64-LABEL: rint_nxv4f64_to_ui16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI43_0) -; RV64-NEXT: fld fa5, %lo(.LCPI43_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll index 1626b362fed15..316a84f98be2b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll @@ -1,18 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s define @ceil_nxv1f16( %x) strictfp { ; CHECK-LABEL: ceil_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma @@ -32,10 +33,11 @@ define @ceil_nxv2f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma @@ -55,10 +57,11 @@ define @ceil_nxv4f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -78,10 +81,11 @@ define @ceil_nxv8f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -101,10 +105,11 @@ define @ceil_nxv16f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma @@ -124,10 +129,11 @@ define @ceil_nxv32f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma @@ -258,92 +264,168 @@ define @ceil_nxv16f32( %x) strictfp { declare @llvm.experimental.constrained.ceil.nxv16f32(, metadata) define @ceil_nxv1f64( %x) strictfp { -; CHECK-LABEL: ceil_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: ceil_nxv1f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI11_0) +; RV32-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv1f64( %x, metadata !"fpexcept.strict") ret %a } declare @llvm.experimental.constrained.ceil.nxv1f64(, metadata) define @ceil_nxv2f64( %x) strictfp { -; CHECK-LABEL: ceil_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: ceil_nxv2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI12_0) +; RV32-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv2f64( %x, metadata !"fpexcept.strict") ret %a } declare @llvm.experimental.constrained.ceil.nxv2f64(, metadata) define @ceil_nxv4f64( %x) strictfp { -; CHECK-LABEL: ceil_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: ceil_nxv4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI13_0) +; RV32-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv4f64( %x, metadata !"fpexcept.strict") ret %a } declare @llvm.experimental.constrained.ceil.nxv4f64(, metadata) define @ceil_nxv8f64( %x) strictfp { -; CHECK-LABEL: ceil_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: ceil_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI14_0) +; RV32-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v16, v8 +; RV32-NEXT: vmflt.vf v0, v16, fa5 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v16, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v16, fa5 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv8f64( %x, metadata !"fpexcept.strict") ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll index 4aca2d694dfbb..56edec1cc7a68 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV32ZFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV64ZFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV32ZFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV64ZFHMIN define @ceil_nxv1bf16( %x) { ; CHECK-LABEL: ceil_nxv1bf16: @@ -167,10 +167,11 @@ define @ceil_nxv32bf16( %x) { define @ceil_nxv1f16( %x) { ; ZVFH-LABEL: ceil_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -206,10 +207,11 @@ declare @llvm.ceil.nxv1f16() define @ceil_nxv2f16( %x) { ; ZVFH-LABEL: ceil_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -245,10 +247,11 @@ declare @llvm.ceil.nxv2f16() define @ceil_nxv4f16( %x) { ; ZVFH-LABEL: ceil_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -284,10 +287,11 @@ declare @llvm.ceil.nxv4f16() define @ceil_nxv8f16( %x) { ; ZVFH-LABEL: ceil_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -323,10 +327,11 @@ declare @llvm.ceil.nxv8f16() define @ceil_nxv16f16( %x) { ; ZVFH-LABEL: ceil_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -362,10 +367,11 @@ declare @llvm.ceil.nxv16f16() define @ceil_nxv32f16( %x) { ; ZVFH-LABEL: ceil_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -513,80 +519,268 @@ define @ceil_nxv16f32( %x) { declare @llvm.ceil.nxv16f32() define @ceil_nxv1f64( %x) { -; CHECK-LABEL: ceil_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: ceil_nxv1f64: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: lui a0, %hi(.LCPI17_0) +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI17_0)(a0) +; RV32ZFH-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32ZFH-NEXT: vfabs.v v9, v8 +; RV32ZFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZFH-NEXT: fsrmi a0, 3 +; RV32ZFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: ceil_nxv1f64: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64ZFH-NEXT: vfabs.v v9, v8 +; RV64ZFH-NEXT: li a0, 1075 +; RV64ZFH-NEXT: slli a0, a0, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a0 +; RV64ZFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZFH-NEXT: fsrmi a0, 3 +; RV64ZFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZFH-NEXT: ret +; +; RV32ZFHMIN-LABEL: ceil_nxv1f64: +; RV32ZFHMIN: # %bb.0: +; RV32ZFHMIN-NEXT: lui a0, %hi(.LCPI17_0) +; RV32ZFHMIN-NEXT: fld fa5, %lo(.LCPI17_0)(a0) +; RV32ZFHMIN-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32ZFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZFHMIN-NEXT: fsrmi a0, 3 +; RV32ZFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZFHMIN-NEXT: fsrm a0 +; RV32ZFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZFHMIN-NEXT: ret +; +; RV64ZFHMIN-LABEL: ceil_nxv1f64: +; RV64ZFHMIN: # %bb.0: +; RV64ZFHMIN-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64ZFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZFHMIN-NEXT: li a0, 1075 +; RV64ZFHMIN-NEXT: slli a0, a0, 52 +; RV64ZFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZFHMIN-NEXT: fsrmi a0, 3 +; RV64ZFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZFHMIN-NEXT: fsrm a0 +; RV64ZFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZFHMIN-NEXT: ret %a = call @llvm.ceil.nxv1f64( %x) ret %a } declare @llvm.ceil.nxv1f64() define @ceil_nxv2f64( %x) { -; CHECK-LABEL: ceil_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: ceil_nxv2f64: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZFH-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32ZFH-NEXT: vfabs.v v10, v8 +; RV32ZFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZFH-NEXT: fsrmi a0, 3 +; RV32ZFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: ceil_nxv2f64: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64ZFH-NEXT: vfabs.v v10, v8 +; RV64ZFH-NEXT: li a0, 1075 +; RV64ZFH-NEXT: slli a0, a0, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a0 +; RV64ZFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZFH-NEXT: fsrmi a0, 3 +; RV64ZFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZFH-NEXT: ret +; +; RV32ZFHMIN-LABEL: ceil_nxv2f64: +; RV32ZFHMIN: # %bb.0: +; RV32ZFHMIN-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZFHMIN-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZFHMIN-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32ZFHMIN-NEXT: vfabs.v v10, v8 +; RV32ZFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZFHMIN-NEXT: fsrmi a0, 3 +; RV32ZFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZFHMIN-NEXT: fsrm a0 +; RV32ZFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZFHMIN-NEXT: ret +; +; RV64ZFHMIN-LABEL: ceil_nxv2f64: +; RV64ZFHMIN: # %bb.0: +; RV64ZFHMIN-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64ZFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZFHMIN-NEXT: li a0, 1075 +; RV64ZFHMIN-NEXT: slli a0, a0, 52 +; RV64ZFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZFHMIN-NEXT: fsrmi a0, 3 +; RV64ZFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZFHMIN-NEXT: fsrm a0 +; RV64ZFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZFHMIN-NEXT: ret %a = call @llvm.ceil.nxv2f64( %x) ret %a } declare @llvm.ceil.nxv2f64() define @ceil_nxv4f64( %x) { -; CHECK-LABEL: ceil_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: ceil_nxv4f64: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: lui a0, %hi(.LCPI19_0) +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI19_0)(a0) +; RV32ZFH-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32ZFH-NEXT: vfabs.v v12, v8 +; RV32ZFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZFH-NEXT: fsrmi a0, 3 +; RV32ZFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: ceil_nxv4f64: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64ZFH-NEXT: vfabs.v v12, v8 +; RV64ZFH-NEXT: li a0, 1075 +; RV64ZFH-NEXT: slli a0, a0, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a0 +; RV64ZFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZFH-NEXT: fsrmi a0, 3 +; RV64ZFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZFH-NEXT: ret +; +; RV32ZFHMIN-LABEL: ceil_nxv4f64: +; RV32ZFHMIN: # %bb.0: +; RV32ZFHMIN-NEXT: lui a0, %hi(.LCPI19_0) +; RV32ZFHMIN-NEXT: fld fa5, %lo(.LCPI19_0)(a0) +; RV32ZFHMIN-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32ZFHMIN-NEXT: vfabs.v v12, v8 +; RV32ZFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZFHMIN-NEXT: fsrmi a0, 3 +; RV32ZFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZFHMIN-NEXT: fsrm a0 +; RV32ZFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZFHMIN-NEXT: ret +; +; RV64ZFHMIN-LABEL: ceil_nxv4f64: +; RV64ZFHMIN: # %bb.0: +; RV64ZFHMIN-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64ZFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZFHMIN-NEXT: li a0, 1075 +; RV64ZFHMIN-NEXT: slli a0, a0, 52 +; RV64ZFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZFHMIN-NEXT: fsrmi a0, 3 +; RV64ZFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZFHMIN-NEXT: fsrm a0 +; RV64ZFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZFHMIN-NEXT: ret %a = call @llvm.ceil.nxv4f64( %x) ret %a } declare @llvm.ceil.nxv4f64() define @ceil_nxv8f64( %x) { -; CHECK-LABEL: ceil_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: ceil_nxv8f64: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZFH-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32ZFH-NEXT: vfabs.v v16, v8 +; RV32ZFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZFH-NEXT: fsrmi a0, 3 +; RV32ZFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: ceil_nxv8f64: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64ZFH-NEXT: vfabs.v v16, v8 +; RV64ZFH-NEXT: li a0, 1075 +; RV64ZFH-NEXT: slli a0, a0, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a0 +; RV64ZFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZFH-NEXT: fsrmi a0, 3 +; RV64ZFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZFH-NEXT: ret +; +; RV32ZFHMIN-LABEL: ceil_nxv8f64: +; RV32ZFHMIN: # %bb.0: +; RV32ZFHMIN-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZFHMIN-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZFHMIN-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32ZFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZFHMIN-NEXT: fsrmi a0, 3 +; RV32ZFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZFHMIN-NEXT: fsrm a0 +; RV32ZFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZFHMIN-NEXT: ret +; +; RV64ZFHMIN-LABEL: ceil_nxv8f64: +; RV64ZFHMIN: # %bb.0: +; RV64ZFHMIN-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64ZFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZFHMIN-NEXT: li a0, 1075 +; RV64ZFHMIN-NEXT: slli a0, a0, 52 +; RV64ZFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZFHMIN-NEXT: fsrmi a0, 3 +; RV64ZFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZFHMIN-NEXT: fsrm a0 +; RV64ZFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZFHMIN-NEXT: ret %a = call @llvm.ceil.nxv8f64( %x) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll index d93f15ec44053..7045fc7c50847 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll @@ -1,18 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s define @floor_nxv1f16( %x) strictfp { ; CHECK-LABEL: floor_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma @@ -32,10 +33,11 @@ define @floor_nxv2f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma @@ -55,10 +57,11 @@ define @floor_nxv4f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -78,10 +81,11 @@ define @floor_nxv8f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -101,10 +105,11 @@ define @floor_nxv16f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma @@ -124,10 +129,11 @@ define @floor_nxv32f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma @@ -258,92 +264,168 @@ define @floor_nxv16f32( %x) strictfp declare @llvm.experimental.constrained.floor.nxv16f32(, metadata) define @floor_nxv1f64( %x) strictfp { -; CHECK-LABEL: floor_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: floor_nxv1f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI11_0) +; RV32-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: fsrmi a0, 2 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: floor_nxv1f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: fsrmi a0, 2 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv1f64( %x, metadata !"fpexcept.strict") ret %a } declare @llvm.experimental.constrained.floor.nxv1f64(, metadata) define @floor_nxv2f64( %x) strictfp { -; CHECK-LABEL: floor_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: floor_nxv2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI12_0) +; RV32-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: fsrmi a0, 2 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: floor_nxv2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: fsrmi a0, 2 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv2f64( %x, metadata !"fpexcept.strict") ret %a } declare @llvm.experimental.constrained.floor.nxv2f64(, metadata) define @floor_nxv4f64( %x) strictfp { -; CHECK-LABEL: floor_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: floor_nxv4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI13_0) +; RV32-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: fsrmi a0, 2 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: floor_nxv4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: fsrmi a0, 2 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv4f64( %x, metadata !"fpexcept.strict") ret %a } declare @llvm.experimental.constrained.floor.nxv4f64(, metadata) define @floor_nxv8f64( %x) strictfp { -; CHECK-LABEL: floor_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: floor_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI14_0) +; RV32-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v16, v8 +; RV32-NEXT: vmflt.vf v0, v16, fa5 +; RV32-NEXT: fsrmi a0, 2 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: floor_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v16, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v16, fa5 +; RV64-NEXT: fsrmi a0, 2 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv8f64( %x, metadata !"fpexcept.strict") ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll index 010d7786c8891..9adbca55bcd01 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV32ZFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV64ZFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV32ZFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV64ZFHMIN define @floor_nxv1bf16( %x) { ; CHECK-LABEL: floor_nxv1bf16: @@ -173,10 +173,11 @@ declare @llvm.floor.nxv32bf16() define @floor_nxv1f16( %x) { ; ZVFH-LABEL: floor_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -212,10 +213,11 @@ declare @llvm.floor.nxv1f16() define @floor_nxv2f16( %x) { ; ZVFH-LABEL: floor_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -251,10 +253,11 @@ declare @llvm.floor.nxv2f16() define @floor_nxv4f16( %x) { ; ZVFH-LABEL: floor_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -290,10 +293,11 @@ declare @llvm.floor.nxv4f16() define @floor_nxv8f16( %x) { ; ZVFH-LABEL: floor_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -329,10 +333,11 @@ declare @llvm.floor.nxv8f16() define @floor_nxv16f16( %x) { ; ZVFH-LABEL: floor_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -368,10 +373,11 @@ declare @llvm.floor.nxv16f16() define @floor_nxv32f16( %x) { ; ZVFH-LABEL: floor_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -519,80 +525,268 @@ define @floor_nxv16f32( %x) { declare @llvm.floor.nxv16f32() define @floor_nxv1f64( %x) { -; CHECK-LABEL: floor_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: floor_nxv1f64: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: lui a0, %hi(.LCPI17_0) +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI17_0)(a0) +; RV32ZFH-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32ZFH-NEXT: vfabs.v v9, v8 +; RV32ZFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZFH-NEXT: fsrmi a0, 2 +; RV32ZFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: floor_nxv1f64: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64ZFH-NEXT: vfabs.v v9, v8 +; RV64ZFH-NEXT: li a0, 1075 +; RV64ZFH-NEXT: slli a0, a0, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a0 +; RV64ZFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZFH-NEXT: fsrmi a0, 2 +; RV64ZFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZFH-NEXT: ret +; +; RV32ZFHMIN-LABEL: floor_nxv1f64: +; RV32ZFHMIN: # %bb.0: +; RV32ZFHMIN-NEXT: lui a0, %hi(.LCPI17_0) +; RV32ZFHMIN-NEXT: fld fa5, %lo(.LCPI17_0)(a0) +; RV32ZFHMIN-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32ZFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZFHMIN-NEXT: fsrmi a0, 2 +; RV32ZFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZFHMIN-NEXT: fsrm a0 +; RV32ZFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZFHMIN-NEXT: ret +; +; RV64ZFHMIN-LABEL: floor_nxv1f64: +; RV64ZFHMIN: # %bb.0: +; RV64ZFHMIN-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64ZFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZFHMIN-NEXT: li a0, 1075 +; RV64ZFHMIN-NEXT: slli a0, a0, 52 +; RV64ZFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZFHMIN-NEXT: fsrmi a0, 2 +; RV64ZFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZFHMIN-NEXT: fsrm a0 +; RV64ZFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZFHMIN-NEXT: ret %a = call @llvm.floor.nxv1f64( %x) ret %a } declare @llvm.floor.nxv1f64() define @floor_nxv2f64( %x) { -; CHECK-LABEL: floor_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: floor_nxv2f64: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZFH-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32ZFH-NEXT: vfabs.v v10, v8 +; RV32ZFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZFH-NEXT: fsrmi a0, 2 +; RV32ZFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: floor_nxv2f64: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64ZFH-NEXT: vfabs.v v10, v8 +; RV64ZFH-NEXT: li a0, 1075 +; RV64ZFH-NEXT: slli a0, a0, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a0 +; RV64ZFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZFH-NEXT: fsrmi a0, 2 +; RV64ZFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZFH-NEXT: ret +; +; RV32ZFHMIN-LABEL: floor_nxv2f64: +; RV32ZFHMIN: # %bb.0: +; RV32ZFHMIN-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZFHMIN-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZFHMIN-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32ZFHMIN-NEXT: vfabs.v v10, v8 +; RV32ZFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZFHMIN-NEXT: fsrmi a0, 2 +; RV32ZFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZFHMIN-NEXT: fsrm a0 +; RV32ZFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZFHMIN-NEXT: ret +; +; RV64ZFHMIN-LABEL: floor_nxv2f64: +; RV64ZFHMIN: # %bb.0: +; RV64ZFHMIN-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64ZFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZFHMIN-NEXT: li a0, 1075 +; RV64ZFHMIN-NEXT: slli a0, a0, 52 +; RV64ZFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZFHMIN-NEXT: fsrmi a0, 2 +; RV64ZFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZFHMIN-NEXT: fsrm a0 +; RV64ZFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZFHMIN-NEXT: ret %a = call @llvm.floor.nxv2f64( %x) ret %a } declare @llvm.floor.nxv2f64() define @floor_nxv4f64( %x) { -; CHECK-LABEL: floor_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: floor_nxv4f64: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: lui a0, %hi(.LCPI19_0) +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI19_0)(a0) +; RV32ZFH-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32ZFH-NEXT: vfabs.v v12, v8 +; RV32ZFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZFH-NEXT: fsrmi a0, 2 +; RV32ZFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: floor_nxv4f64: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64ZFH-NEXT: vfabs.v v12, v8 +; RV64ZFH-NEXT: li a0, 1075 +; RV64ZFH-NEXT: slli a0, a0, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a0 +; RV64ZFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZFH-NEXT: fsrmi a0, 2 +; RV64ZFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZFH-NEXT: ret +; +; RV32ZFHMIN-LABEL: floor_nxv4f64: +; RV32ZFHMIN: # %bb.0: +; RV32ZFHMIN-NEXT: lui a0, %hi(.LCPI19_0) +; RV32ZFHMIN-NEXT: fld fa5, %lo(.LCPI19_0)(a0) +; RV32ZFHMIN-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32ZFHMIN-NEXT: vfabs.v v12, v8 +; RV32ZFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZFHMIN-NEXT: fsrmi a0, 2 +; RV32ZFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZFHMIN-NEXT: fsrm a0 +; RV32ZFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZFHMIN-NEXT: ret +; +; RV64ZFHMIN-LABEL: floor_nxv4f64: +; RV64ZFHMIN: # %bb.0: +; RV64ZFHMIN-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64ZFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZFHMIN-NEXT: li a0, 1075 +; RV64ZFHMIN-NEXT: slli a0, a0, 52 +; RV64ZFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZFHMIN-NEXT: fsrmi a0, 2 +; RV64ZFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZFHMIN-NEXT: fsrm a0 +; RV64ZFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZFHMIN-NEXT: ret %a = call @llvm.floor.nxv4f64( %x) ret %a } declare @llvm.floor.nxv4f64() define @floor_nxv8f64( %x) { -; CHECK-LABEL: floor_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZFH-LABEL: floor_nxv8f64: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZFH-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZFH-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32ZFH-NEXT: vfabs.v v16, v8 +; RV32ZFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZFH-NEXT: fsrmi a0, 2 +; RV32ZFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZFH-NEXT: fsrm a0 +; RV32ZFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: floor_nxv8f64: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64ZFH-NEXT: vfabs.v v16, v8 +; RV64ZFH-NEXT: li a0, 1075 +; RV64ZFH-NEXT: slli a0, a0, 52 +; RV64ZFH-NEXT: fmv.d.x fa5, a0 +; RV64ZFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZFH-NEXT: fsrmi a0, 2 +; RV64ZFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZFH-NEXT: fsrm a0 +; RV64ZFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZFH-NEXT: ret +; +; RV32ZFHMIN-LABEL: floor_nxv8f64: +; RV32ZFHMIN: # %bb.0: +; RV32ZFHMIN-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZFHMIN-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZFHMIN-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32ZFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZFHMIN-NEXT: fsrmi a0, 2 +; RV32ZFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZFHMIN-NEXT: fsrm a0 +; RV32ZFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZFHMIN-NEXT: ret +; +; RV64ZFHMIN-LABEL: floor_nxv8f64: +; RV64ZFHMIN: # %bb.0: +; RV64ZFHMIN-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64ZFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZFHMIN-NEXT: li a0, 1075 +; RV64ZFHMIN-NEXT: slli a0, a0, 52 +; RV64ZFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZFHMIN-NEXT: fsrmi a0, 2 +; RV64ZFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZFHMIN-NEXT: fsrm a0 +; RV64ZFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZFHMIN-NEXT: ret %a = call @llvm.floor.nxv8f64( %x) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll index c6ff39ad10d6b..4b42c517379ad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll @@ -1,22 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN declare <2 x half> @llvm.vp.ceil.v2f16(<2 x half>, <2 x i1>, i32) define <2 x half> @vp_ceil_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 3 @@ -59,10 +60,11 @@ define <2 x half> @vp_ceil_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) define <2 x half> @vp_ceil_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_v2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -99,10 +101,11 @@ declare <4 x half> @llvm.vp.ceil.v4f16(<4 x half>, <4 x i1>, i32) define <4 x half> @vp_ceil_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 3 @@ -145,10 +148,11 @@ define <4 x half> @vp_ceil_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) define <4 x half> @vp_ceil_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_v4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -185,10 +189,11 @@ declare <8 x half> @llvm.vp.ceil.v8f16(<8 x half>, <8 x i1>, i32) define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 3 @@ -231,10 +236,11 @@ define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) define <8 x half> @vp_ceil_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_v8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -273,9 +279,10 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 3 @@ -319,10 +326,11 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e define <16 x half> @vp_ceil_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_v16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -529,41 +537,141 @@ define <16 x float> @vp_ceil_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) declare <2 x double> @llvm.vp.ceil.v2f64(<2 x double>, <2 x i1>, i32) define <2 x double> @vp_ceil_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI16_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_ceil_v2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI16_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI16_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 3 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_ceil_v2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 3 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_ceil_v2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI16_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI16_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 3 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_ceil_v2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 3 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) ret <2 x double> %v } define <2 x double> @vp_ceil_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_v2f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_ceil_v2f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI17_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 3 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_ceil_v2f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 3 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_ceil_v2f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI17_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 3 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_ceil_v2f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 3 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -571,43 +679,149 @@ define <2 x double> @vp_ceil_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) declare <4 x double> @llvm.vp.ceil.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_ceil_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_ceil_v4f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v10, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 3 +; RV32ZVFH-NEXT: vmv1r.v v0, v10 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_ceil_v4f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v10, v0 +; RV64ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 3 +; RV64ZVFH-NEXT: vmv1r.v v0, v10 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_ceil_v4f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 3 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_ceil_v4f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 3 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x double> %v } define <4 x double> @vp_ceil_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_v4f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_ceil_v4f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI19_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfabs.v v10, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 3 +; RV32ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_ceil_v4f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfabs.v v10, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 3 +; RV64ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_ceil_v4f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI19_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 3 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_ceil_v4f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 3 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -615,43 +829,149 @@ define <4 x double> @vp_ceil_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) declare <8 x double> @llvm.vp.ceil.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_ceil_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_ceil_v8f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v12, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 3 +; RV32ZVFH-NEXT: vmv1r.v v0, v12 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_ceil_v8f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v12, v0 +; RV64ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 3 +; RV64ZVFH-NEXT: vmv1r.v v0, v12 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_ceil_v8f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 3 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_ceil_v8f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 3 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) ret <8 x double> %v } define <8 x double> @vp_ceil_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_v8f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI21_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_ceil_v8f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI21_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfabs.v v12, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 3 +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_ceil_v8f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfabs.v v12, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 3 +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_ceil_v8f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI21_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 3 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_ceil_v8f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 3 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -659,43 +979,149 @@ define <8 x double> @vp_ceil_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) declare <15 x double> @llvm.vp.ceil.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_ceil_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_v15f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_ceil_v15f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI22_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI22_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 3 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_ceil_v15f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 3 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_ceil_v15f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI22_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI22_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 3 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_ceil_v15f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 3 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.ceil.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v } define <15 x double> @vp_ceil_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_v15f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_ceil_v15f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI23_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 3 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_ceil_v15f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 3 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_ceil_v15f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI23_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 3 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_ceil_v15f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 3 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.ceil.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -703,43 +1129,149 @@ define <15 x double> @vp_ceil_v15f64_unmasked(<15 x double> %va, i32 zeroext %ev declare <16 x double> @llvm.vp.ceil.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_ceil_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_v16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_ceil_v16f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI24_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI24_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 3 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_ceil_v16f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 3 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_ceil_v16f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI24_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI24_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 3 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_ceil_v16f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 3 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v } define <16 x double> @vp_ceil_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_v16f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_ceil_v16f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI25_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 3 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_ceil_v16f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 3 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_ceil_v16f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI25_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 3 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_ceil_v16f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 3 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -747,91 +1279,341 @@ define <16 x double> @vp_ceil_v16f64_unmasked(<16 x double> %va, i32 zeroext %ev declare <32 x double> @llvm.vp.ceil.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v6, v0 -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a1, 3 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_ceil_v32f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v6, v0 +; RV32ZVFH-NEXT: li a2, 16 +; RV32ZVFH-NEXT: vslidedown.vi v7, v0, 2 +; RV32ZVFH-NEXT: mv a1, a0 +; RV32ZVFH-NEXT: bltu a0, a2, .LBB26_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: li a1, 16 +; RV32ZVFH-NEXT: .LBB26_2: +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI26_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; RV32ZVFH-NEXT: addi a1, a0, -16 +; RV32ZVFH-NEXT: sltu a0, a0, a1 +; RV32ZVFH-NEXT: addi a0, a0, -1 +; RV32ZVFH-NEXT: and a0, a0, a1 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a1, 3 +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a1 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 3 +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_ceil_v32f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v6, v0 +; RV64ZVFH-NEXT: li a2, 16 +; RV64ZVFH-NEXT: vslidedown.vi v7, v0, 2 +; RV64ZVFH-NEXT: mv a1, a0 +; RV64ZVFH-NEXT: bltu a0, a2, .LBB26_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: li a1, 16 +; RV64ZVFH-NEXT: .LBB26_2: +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a1, 1075 +; RV64ZVFH-NEXT: slli a1, a1, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a1 +; RV64ZVFH-NEXT: addi a1, a0, -16 +; RV64ZVFH-NEXT: sltu a0, a0, a1 +; RV64ZVFH-NEXT: addi a0, a0, -1 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFH-NEXT: and a0, a0, a1 +; RV64ZVFH-NEXT: fsrmi a1, 3 +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a1 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 3 +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_ceil_v32f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v6, v0 +; RV32ZVFHMIN-NEXT: li a2, 16 +; RV32ZVFHMIN-NEXT: vslidedown.vi v7, v0, 2 +; RV32ZVFHMIN-NEXT: mv a1, a0 +; RV32ZVFHMIN-NEXT: bltu a0, a2, .LBB26_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: li a1, 16 +; RV32ZVFHMIN-NEXT: .LBB26_2: +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI26_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; RV32ZVFHMIN-NEXT: addi a1, a0, -16 +; RV32ZVFHMIN-NEXT: sltu a0, a0, a1 +; RV32ZVFHMIN-NEXT: addi a0, a0, -1 +; RV32ZVFHMIN-NEXT: and a0, a0, a1 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a1, 3 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a1 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 3 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_ceil_v32f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v6, v0 +; RV64ZVFHMIN-NEXT: li a2, 16 +; RV64ZVFHMIN-NEXT: vslidedown.vi v7, v0, 2 +; RV64ZVFHMIN-NEXT: mv a1, a0 +; RV64ZVFHMIN-NEXT: bltu a0, a2, .LBB26_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: li a1, 16 +; RV64ZVFHMIN-NEXT: .LBB26_2: +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a1, 1075 +; RV64ZVFHMIN-NEXT: slli a1, a1, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a1 +; RV64ZVFHMIN-NEXT: addi a1, a0, -16 +; RV64ZVFHMIN-NEXT: sltu a0, a0, a1 +; RV64ZVFHMIN-NEXT: addi a0, a0, -1 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: and a0, a0, a1 +; RV64ZVFHMIN-NEXT: fsrmi a1, 3 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a1 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 3 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.ceil.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } define <32 x double> @vp_ceil_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_v32f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: sltu a0, a0, a2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: fsrmi a2, 3 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v7, v24, fa5 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsrmi a1, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_ceil_v32f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: li a2, 16 +; RV32ZVFH-NEXT: mv a1, a0 +; RV32ZVFH-NEXT: bltu a0, a2, .LBB27_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: li a1, 16 +; RV32ZVFH-NEXT: .LBB27_2: +; RV32ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8 +; RV32ZVFH-NEXT: lui a2, %hi(.LCPI27_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI27_0)(a2) +; RV32ZVFH-NEXT: addi a2, a0, -16 +; RV32ZVFH-NEXT: sltu a0, a0, a2 +; RV32ZVFH-NEXT: addi a0, a0, -1 +; RV32ZVFH-NEXT: and a0, a0, a2 +; RV32ZVFH-NEXT: fsrmi a2, 3 +; RV32ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16 +; RV32ZVFH-NEXT: vmflt.vf v7, v24, fa5 +; RV32ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a2 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: fsrmi a1, 3 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: fsrm a1 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_ceil_v32f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: li a2, 16 +; RV64ZVFH-NEXT: mv a1, a0 +; RV64ZVFH-NEXT: bltu a0, a2, .LBB27_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: li a1, 16 +; RV64ZVFH-NEXT: .LBB27_2: +; RV64ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8 +; RV64ZVFH-NEXT: li a2, 1075 +; RV64ZVFH-NEXT: slli a2, a2, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a2 +; RV64ZVFH-NEXT: addi a2, a0, -16 +; RV64ZVFH-NEXT: sltu a0, a0, a2 +; RV64ZVFH-NEXT: addi a0, a0, -1 +; RV64ZVFH-NEXT: and a0, a0, a2 +; RV64ZVFH-NEXT: fsrmi a2, 3 +; RV64ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16 +; RV64ZVFH-NEXT: vmflt.vf v7, v24, fa5 +; RV64ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a2 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: fsrmi a1, 3 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: fsrm a1 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_ceil_v32f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: li a2, 16 +; RV32ZVFHMIN-NEXT: mv a1, a0 +; RV32ZVFHMIN-NEXT: bltu a0, a2, .LBB27_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: li a1, 16 +; RV32ZVFHMIN-NEXT: .LBB27_2: +; RV32ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV32ZVFHMIN-NEXT: lui a2, %hi(.LCPI27_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI27_0)(a2) +; RV32ZVFHMIN-NEXT: addi a2, a0, -16 +; RV32ZVFHMIN-NEXT: sltu a0, a0, a2 +; RV32ZVFHMIN-NEXT: addi a0, a0, -1 +; RV32ZVFHMIN-NEXT: and a0, a0, a2 +; RV32ZVFHMIN-NEXT: fsrmi a2, 3 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV32ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5 +; RV32ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a2 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a1, 3 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: fsrm a1 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_ceil_v32f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: li a2, 16 +; RV64ZVFHMIN-NEXT: mv a1, a0 +; RV64ZVFHMIN-NEXT: bltu a0, a2, .LBB27_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: li a1, 16 +; RV64ZVFHMIN-NEXT: .LBB27_2: +; RV64ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV64ZVFHMIN-NEXT: li a2, 1075 +; RV64ZVFHMIN-NEXT: slli a2, a2, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a2 +; RV64ZVFHMIN-NEXT: addi a2, a0, -16 +; RV64ZVFHMIN-NEXT: sltu a0, a0, a2 +; RV64ZVFHMIN-NEXT: addi a0, a0, -1 +; RV64ZVFHMIN-NEXT: and a0, a0, a2 +; RV64ZVFHMIN-NEXT: fsrmi a2, 3 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV64ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5 +; RV64ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a2 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a1, 3 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: fsrm a1 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.ceil.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll index ab2d00b9b9137..71b0624d91f22 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll @@ -1,18 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s define <1 x half> @ceil_v1f16(<1 x half> %x) strictfp { ; CHECK-LABEL: ceil_v1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma @@ -32,10 +33,11 @@ define <2 x half> @ceil_v2f16(<2 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma @@ -55,10 +57,11 @@ define <4 x half> @ceil_v4f16(<4 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma @@ -78,10 +81,11 @@ define <8 x half> @ceil_v8f16(<8 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -101,10 +105,11 @@ define <16 x half> @ceil_v16f16(<16 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -123,11 +128,12 @@ define <32 x half> @ceil_v32f16(<32 x half> %x) strictfp { ; CHECK-LABEL: ceil_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: lui a1, %hi(.LCPI5_0) +; CHECK-NEXT: li a1, 25 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; CHECK-NEXT: slli a1, a1, 10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fmv.h.x fa5, a1 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 @@ -259,92 +265,168 @@ define <16 x float> @ceil_v16f32(<16 x float> %x) strictfp { declare <16 x float> @llvm.experimental.constrained.ceil.v16f32(<16 x float>, metadata) define <1 x double> @ceil_v1f64(<1 x double> %x) strictfp { -; CHECK-LABEL: ceil_v1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: ceil_v1f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI11_0) +; RV32-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_v1f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: ret %a = call <1 x double> @llvm.experimental.constrained.ceil.v1f64(<1 x double> %x, metadata !"fpexcept.strict") ret <1 x double> %a } declare <1 x double> @llvm.experimental.constrained.ceil.v1f64(<1 x double>, metadata) define <2 x double> @ceil_v2f64(<2 x double> %x) strictfp { -; CHECK-LABEL: ceil_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: ceil_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI12_0) +; RV32-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: ret %a = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double> %x, metadata !"fpexcept.strict") ret <2 x double> %a } declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata) define <4 x double> @ceil_v4f64(<4 x double> %x) strictfp { -; CHECK-LABEL: ceil_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: ceil_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI13_0) +; RV32-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: ret %a = call <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double> %x, metadata !"fpexcept.strict") ret <4 x double> %a } declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata) define <8 x double> @ceil_v8f64(<8 x double> %x) strictfp { -; CHECK-LABEL: ceil_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: ceil_v8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI14_0) +; RV32-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_v8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: ret %a = call <8 x double> @llvm.experimental.constrained.ceil.v8f64(<8 x double> %x, metadata !"fpexcept.strict") ret <8 x double> %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll index c6ce7c1bbe8b4..9eca66eea865c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll @@ -1,18 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s define <1 x half> @floor_v1f16(<1 x half> %x) strictfp { ; CHECK-LABEL: floor_v1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma @@ -32,10 +33,11 @@ define <2 x half> @floor_v2f16(<2 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma @@ -55,10 +57,11 @@ define <4 x half> @floor_v4f16(<4 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma @@ -78,10 +81,11 @@ define <8 x half> @floor_v8f16(<8 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -101,10 +105,11 @@ define <16 x half> @floor_v16f16(<16 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -123,11 +128,12 @@ define <32 x half> @floor_v32f16(<32 x half> %x) strictfp { ; CHECK-LABEL: floor_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: lui a1, %hi(.LCPI5_0) +; CHECK-NEXT: li a1, 25 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; CHECK-NEXT: slli a1, a1, 10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fmv.h.x fa5, a1 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 @@ -259,92 +265,168 @@ define <16 x float> @floor_v16f32(<16 x float> %x) strictfp { declare <16 x float> @llvm.experimental.constrained.floor.v16f32(<16 x float>, metadata) define <1 x double> @floor_v1f64(<1 x double> %x) strictfp { -; CHECK-LABEL: floor_v1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: floor_v1f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI11_0) +; RV32-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: fsrmi a0, 2 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: floor_v1f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: fsrmi a0, 2 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: ret %a = call <1 x double> @llvm.experimental.constrained.floor.v1f64(<1 x double> %x, metadata !"fpexcept.strict") ret <1 x double> %a } declare <1 x double> @llvm.experimental.constrained.floor.v1f64(<1 x double>, metadata) define <2 x double> @floor_v2f64(<2 x double> %x) strictfp { -; CHECK-LABEL: floor_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: floor_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI12_0) +; RV32-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: fsrmi a0, 2 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: floor_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: fsrmi a0, 2 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: ret %a = call <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double> %x, metadata !"fpexcept.strict") ret <2 x double> %a } declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata) define <4 x double> @floor_v4f64(<4 x double> %x) strictfp { -; CHECK-LABEL: floor_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: floor_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI13_0) +; RV32-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: fsrmi a0, 2 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: floor_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: fsrmi a0, 2 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: ret %a = call <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double> %x, metadata !"fpexcept.strict") ret <4 x double> %a } declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata) define <8 x double> @floor_v8f64(<8 x double> %x) strictfp { -; CHECK-LABEL: floor_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: floor_v8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI14_0) +; RV32-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: fsrmi a0, 2 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: floor_v8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: fsrmi a0, 2 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: ret %a = call <8 x double> @llvm.experimental.constrained.floor.v8f64(<8 x double> %x, metadata !"fpexcept.strict") ret <8 x double> %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll index 6fc0165d7e77f..4494b97119403 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll @@ -1,22 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN declare <2 x half> @llvm.vp.floor.v2f16(<2 x half>, <2 x i1>, i32) define <2 x half> @vp_floor_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 2 @@ -59,10 +60,11 @@ define <2 x half> @vp_floor_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) define <2 x half> @vp_floor_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_v2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -99,10 +101,11 @@ declare <4 x half> @llvm.vp.floor.v4f16(<4 x half>, <4 x i1>, i32) define <4 x half> @vp_floor_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 2 @@ -145,10 +148,11 @@ define <4 x half> @vp_floor_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) define <4 x half> @vp_floor_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_v4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -185,10 +189,11 @@ declare <8 x half> @llvm.vp.floor.v8f16(<8 x half>, <8 x i1>, i32) define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 2 @@ -231,10 +236,11 @@ define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) define <8 x half> @vp_floor_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_v8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -273,9 +279,10 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 2 @@ -319,10 +326,11 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % define <16 x half> @vp_floor_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_v16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -529,41 +537,141 @@ define <16 x float> @vp_floor_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl declare <2 x double> @llvm.vp.floor.v2f64(<2 x double>, <2 x i1>, i32) define <2 x double> @vp_floor_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI16_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_v2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI16_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI16_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_v2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_v2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI16_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI16_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_v2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) ret <2 x double> %v } define <2 x double> @vp_floor_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_v2f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_v2f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI17_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_v2f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_v2f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI17_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_v2f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -571,43 +679,149 @@ define <2 x double> @vp_floor_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) declare <4 x double> @llvm.vp.floor.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_floor_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_v4f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v10, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vmv1r.v v0, v10 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_v4f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v10, v0 +; RV64ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vmv1r.v v0, v10 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_v4f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_v4f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x double> %v } define <4 x double> @vp_floor_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_v4f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_v4f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI19_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfabs.v v10, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_v4f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfabs.v v10, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_v4f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI19_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_v4f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -615,43 +829,149 @@ define <4 x double> @vp_floor_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) declare <8 x double> @llvm.vp.floor.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_floor_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_v8f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v12, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vmv1r.v v0, v12 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_v8f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v12, v0 +; RV64ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vmv1r.v v0, v12 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_v8f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_v8f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) ret <8 x double> %v } define <8 x double> @vp_floor_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_v8f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI21_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_v8f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI21_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfabs.v v12, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_v8f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfabs.v v12, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_v8f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI21_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_v8f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -659,43 +979,149 @@ define <8 x double> @vp_floor_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) declare <15 x double> @llvm.vp.floor.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_floor_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_v15f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_v15f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI22_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI22_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_v15f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_v15f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI22_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI22_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_v15f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.floor.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v } define <15 x double> @vp_floor_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_v15f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_v15f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI23_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_v15f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_v15f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI23_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_v15f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.floor.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -703,43 +1129,149 @@ define <15 x double> @vp_floor_v15f64_unmasked(<15 x double> %va, i32 zeroext %e declare <16 x double> @llvm.vp.floor.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_floor_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_v16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_v16f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI24_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI24_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_v16f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_v16f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI24_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI24_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_v16f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v } define <16 x double> @vp_floor_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_v16f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_v16f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI25_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_v16f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_v16f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI25_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_v16f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -747,91 +1279,341 @@ define <16 x double> @vp_floor_v16f64_unmasked(<16 x double> %va, i32 zeroext %e declare <32 x double> @llvm.vp.floor.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v6, v0 -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a1, 2 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_v32f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v6, v0 +; RV32ZVFH-NEXT: li a2, 16 +; RV32ZVFH-NEXT: vslidedown.vi v7, v0, 2 +; RV32ZVFH-NEXT: mv a1, a0 +; RV32ZVFH-NEXT: bltu a0, a2, .LBB26_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: li a1, 16 +; RV32ZVFH-NEXT: .LBB26_2: +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI26_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; RV32ZVFH-NEXT: addi a1, a0, -16 +; RV32ZVFH-NEXT: sltu a0, a0, a1 +; RV32ZVFH-NEXT: addi a0, a0, -1 +; RV32ZVFH-NEXT: and a0, a0, a1 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a1, 2 +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a1 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_v32f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v6, v0 +; RV64ZVFH-NEXT: li a2, 16 +; RV64ZVFH-NEXT: vslidedown.vi v7, v0, 2 +; RV64ZVFH-NEXT: mv a1, a0 +; RV64ZVFH-NEXT: bltu a0, a2, .LBB26_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: li a1, 16 +; RV64ZVFH-NEXT: .LBB26_2: +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a1, 1075 +; RV64ZVFH-NEXT: slli a1, a1, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a1 +; RV64ZVFH-NEXT: addi a1, a0, -16 +; RV64ZVFH-NEXT: sltu a0, a0, a1 +; RV64ZVFH-NEXT: addi a0, a0, -1 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFH-NEXT: and a0, a0, a1 +; RV64ZVFH-NEXT: fsrmi a1, 2 +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a1 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_v32f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v6, v0 +; RV32ZVFHMIN-NEXT: li a2, 16 +; RV32ZVFHMIN-NEXT: vslidedown.vi v7, v0, 2 +; RV32ZVFHMIN-NEXT: mv a1, a0 +; RV32ZVFHMIN-NEXT: bltu a0, a2, .LBB26_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: li a1, 16 +; RV32ZVFHMIN-NEXT: .LBB26_2: +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI26_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; RV32ZVFHMIN-NEXT: addi a1, a0, -16 +; RV32ZVFHMIN-NEXT: sltu a0, a0, a1 +; RV32ZVFHMIN-NEXT: addi a0, a0, -1 +; RV32ZVFHMIN-NEXT: and a0, a0, a1 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a1, 2 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a1 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_v32f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v6, v0 +; RV64ZVFHMIN-NEXT: li a2, 16 +; RV64ZVFHMIN-NEXT: vslidedown.vi v7, v0, 2 +; RV64ZVFHMIN-NEXT: mv a1, a0 +; RV64ZVFHMIN-NEXT: bltu a0, a2, .LBB26_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: li a1, 16 +; RV64ZVFHMIN-NEXT: .LBB26_2: +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a1, 1075 +; RV64ZVFHMIN-NEXT: slli a1, a1, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a1 +; RV64ZVFHMIN-NEXT: addi a1, a0, -16 +; RV64ZVFHMIN-NEXT: sltu a0, a0, a1 +; RV64ZVFHMIN-NEXT: addi a0, a0, -1 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: and a0, a0, a1 +; RV64ZVFHMIN-NEXT: fsrmi a1, 2 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a1 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.floor.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } define <32 x double> @vp_floor_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_v32f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: sltu a0, a0, a2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: fsrmi a2, 2 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v7, v24, fa5 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsrmi a1, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_v32f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: li a2, 16 +; RV32ZVFH-NEXT: mv a1, a0 +; RV32ZVFH-NEXT: bltu a0, a2, .LBB27_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: li a1, 16 +; RV32ZVFH-NEXT: .LBB27_2: +; RV32ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8 +; RV32ZVFH-NEXT: lui a2, %hi(.LCPI27_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI27_0)(a2) +; RV32ZVFH-NEXT: addi a2, a0, -16 +; RV32ZVFH-NEXT: sltu a0, a0, a2 +; RV32ZVFH-NEXT: addi a0, a0, -1 +; RV32ZVFH-NEXT: and a0, a0, a2 +; RV32ZVFH-NEXT: fsrmi a2, 2 +; RV32ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16 +; RV32ZVFH-NEXT: vmflt.vf v7, v24, fa5 +; RV32ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a2 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: fsrmi a1, 2 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: fsrm a1 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_v32f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: li a2, 16 +; RV64ZVFH-NEXT: mv a1, a0 +; RV64ZVFH-NEXT: bltu a0, a2, .LBB27_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: li a1, 16 +; RV64ZVFH-NEXT: .LBB27_2: +; RV64ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8 +; RV64ZVFH-NEXT: li a2, 1075 +; RV64ZVFH-NEXT: slli a2, a2, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a2 +; RV64ZVFH-NEXT: addi a2, a0, -16 +; RV64ZVFH-NEXT: sltu a0, a0, a2 +; RV64ZVFH-NEXT: addi a0, a0, -1 +; RV64ZVFH-NEXT: and a0, a0, a2 +; RV64ZVFH-NEXT: fsrmi a2, 2 +; RV64ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16 +; RV64ZVFH-NEXT: vmflt.vf v7, v24, fa5 +; RV64ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a2 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: fsrmi a1, 2 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: fsrm a1 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_v32f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: li a2, 16 +; RV32ZVFHMIN-NEXT: mv a1, a0 +; RV32ZVFHMIN-NEXT: bltu a0, a2, .LBB27_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: li a1, 16 +; RV32ZVFHMIN-NEXT: .LBB27_2: +; RV32ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV32ZVFHMIN-NEXT: lui a2, %hi(.LCPI27_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI27_0)(a2) +; RV32ZVFHMIN-NEXT: addi a2, a0, -16 +; RV32ZVFHMIN-NEXT: sltu a0, a0, a2 +; RV32ZVFHMIN-NEXT: addi a0, a0, -1 +; RV32ZVFHMIN-NEXT: and a0, a0, a2 +; RV32ZVFHMIN-NEXT: fsrmi a2, 2 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV32ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5 +; RV32ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a2 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a1, 2 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: fsrm a1 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_v32f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: li a2, 16 +; RV64ZVFHMIN-NEXT: mv a1, a0 +; RV64ZVFHMIN-NEXT: bltu a0, a2, .LBB27_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: li a1, 16 +; RV64ZVFHMIN-NEXT: .LBB27_2: +; RV64ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV64ZVFHMIN-NEXT: li a2, 1075 +; RV64ZVFHMIN-NEXT: slli a2, a2, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a2 +; RV64ZVFHMIN-NEXT: addi a2, a0, -16 +; RV64ZVFHMIN-NEXT: sltu a0, a0, a2 +; RV64ZVFHMIN-NEXT: addi a0, a0, -1 +; RV64ZVFHMIN-NEXT: and a0, a0, a2 +; RV64ZVFHMIN-NEXT: fsrmi a2, 2 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV64ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5 +; RV64ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a2 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a1, 2 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: fsrm a1 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.floor.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll index 3a7ded1537ef6..dd1b99bee6d55 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s declare <2 x half> @llvm.experimental.constrained.nearbyint.v2f16(<2 x half>, metadata, metadata) @@ -11,10 +11,11 @@ define <2 x half> @nearbyint_v2f16(<2 x half> %v) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma @@ -35,10 +36,11 @@ define <4 x half> @nearbyint_v4f16(<4 x half> %v) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma @@ -59,10 +61,11 @@ define <8 x half> @nearbyint_v8f16(<8 x half> %v) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -83,10 +86,11 @@ define <16 x half> @nearbyint_v16f16(<16 x half> %v) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -106,11 +110,12 @@ define <32 x half> @nearbyint_v32f16(<32 x half> %v) strictfp { ; CHECK-LABEL: nearbyint_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: lui a1, %hi(.LCPI4_0) +; CHECK-NEXT: li a1, 25 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a1) +; CHECK-NEXT: slli a1, a1, 10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fmv.h.x fa5, a1 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: frflags a0 @@ -224,23 +229,42 @@ define <16 x float> @nearbyint_v16f32(<16 x float> %v) strictfp { declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata) define <2 x double> @nearbyint_v2f64(<2 x double> %v) strictfp { -; CHECK-LABEL: nearbyint_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI9_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI9_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32-LABEL: nearbyint_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI9_0) +; RV32-NEXT: fld fa5, %lo(.LCPI9_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: frflags a0 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: fsflags a0 +; RV32-NEXT: ret +; +; RV64-LABEL: nearbyint_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: frflags a0 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: fsflags a0 +; RV64-NEXT: ret %r = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <2 x double> %r } @@ -248,23 +272,42 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %v) strictfp { declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata) define <4 x double> @nearbyint_v4f64(<4 x double> %v) strictfp { -; CHECK-LABEL: nearbyint_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI10_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI10_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32-LABEL: nearbyint_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI10_0) +; RV32-NEXT: fld fa5, %lo(.LCPI10_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: frflags a0 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: fsflags a0 +; RV32-NEXT: ret +; +; RV64-LABEL: nearbyint_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: frflags a0 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: fsflags a0 +; RV64-NEXT: ret %r = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <4 x double> %r } @@ -272,23 +315,42 @@ define <4 x double> @nearbyint_v4f64(<4 x double> %v) strictfp { declare <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double>, metadata, metadata) define <8 x double> @nearbyint_v8f64(<8 x double> %v) strictfp { -; CHECK-LABEL: nearbyint_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32-LABEL: nearbyint_v8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI11_0) +; RV32-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: frflags a0 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: fsflags a0 +; RV32-NEXT: ret +; +; RV64-LABEL: nearbyint_v8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: frflags a0 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: fsflags a0 +; RV64-NEXT: ret %r = call <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <8 x double> %r } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 38df622998bf9..dd415116c2327 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32ZVFH +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64ZVFH +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN define void @fadd_v8bf16(ptr %x, ptr %y) { @@ -3925,8 +3925,9 @@ define void @trunc_v8f16(ptr %x) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: lui a1, %hi(.LCPI171_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI171_0)(a1) +; ZVFH-NEXT: li a1, 25 +; ZVFH-NEXT: slli a1, a1, 10 +; ZVFH-NEXT: fmv.h.x fa5, a1 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t @@ -3965,8 +3966,9 @@ define void @trunc_v6f16(ptr %x) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: lui a1, %hi(.LCPI172_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI172_0)(a1) +; ZVFH-NEXT: li a1, 25 +; ZVFH-NEXT: slli a1, a1, 10 +; ZVFH-NEXT: fmv.h.x fa5, a1 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t @@ -4022,20 +4024,67 @@ define void @trunc_v4f32(ptr %x) { } define void @trunc_v2f64(ptr %x) { -; CHECK-LABEL: trunc_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a1, %hi(.LCPI174_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI174_0)(a1) -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vse64.v v8, (a0) -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: trunc_v2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32ZVFH-NEXT: vle64.v v8, (a0) +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI174_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI174_0)(a1) +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: vse64.v v8, (a0) +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: trunc_v2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64ZVFH-NEXT: vle64.v v8, (a0) +; RV64ZVFH-NEXT: li a1, 1075 +; RV64ZVFH-NEXT: slli a1, a1, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a1 +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: vse64.v v8, (a0) +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: trunc_v2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vle64.v v8, (a0) +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI174_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI174_0)(a1) +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vse64.v v8, (a0) +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: trunc_v2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vle64.v v8, (a0) +; RV64ZVFHMIN-NEXT: li a1, 1075 +; RV64ZVFHMIN-NEXT: slli a1, a1, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a1 +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: vse64.v v8, (a0) +; RV64ZVFHMIN-NEXT: ret %a = load <2 x double>, ptr %x %b = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a) store <2 x double> %b, ptr %x @@ -4101,8 +4150,9 @@ define void @ceil_v8f16(ptr %x) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: lui a1, %hi(.LCPI177_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI177_0)(a1) +; ZVFH-NEXT: li a1, 25 +; ZVFH-NEXT: slli a1, a1, 10 +; ZVFH-NEXT: fmv.h.x fa5, a1 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 3 @@ -4145,8 +4195,9 @@ define void @ceil_v6f16(ptr %x) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: lui a1, %hi(.LCPI178_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI178_0)(a1) +; ZVFH-NEXT: li a1, 25 +; ZVFH-NEXT: slli a1, a1, 10 +; ZVFH-NEXT: fmv.h.x fa5, a1 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 3 @@ -4208,22 +4259,75 @@ define void @ceil_v4f32(ptr %x) { } define void @ceil_v2f64(ptr %x) { -; CHECK-LABEL: ceil_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a1, %hi(.LCPI180_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI180_0)(a1) -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vse64.v v8, (a0) -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: ceil_v2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32ZVFH-NEXT: vle64.v v8, (a0) +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI180_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI180_0)(a1) +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: fsrmi a1, 3 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a1 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: vse64.v v8, (a0) +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: ceil_v2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64ZVFH-NEXT: vle64.v v8, (a0) +; RV64ZVFH-NEXT: li a1, 1075 +; RV64ZVFH-NEXT: slli a1, a1, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a1 +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: fsrmi a1, 3 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a1 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: vse64.v v8, (a0) +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: ceil_v2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vle64.v v8, (a0) +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI180_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI180_0)(a1) +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a1, 3 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a1 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vse64.v v8, (a0) +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: ceil_v2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vle64.v v8, (a0) +; RV64ZVFHMIN-NEXT: li a1, 1075 +; RV64ZVFHMIN-NEXT: slli a1, a1, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a1 +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a1, 3 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a1 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: vse64.v v8, (a0) +; RV64ZVFHMIN-NEXT: ret %a = load <2 x double>, ptr %x %b = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) store <2 x double> %b, ptr %x @@ -4289,8 +4393,9 @@ define void @floor_v8f16(ptr %x) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: lui a1, %hi(.LCPI183_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI183_0)(a1) +; ZVFH-NEXT: li a1, 25 +; ZVFH-NEXT: slli a1, a1, 10 +; ZVFH-NEXT: fmv.h.x fa5, a1 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 2 @@ -4333,8 +4438,9 @@ define void @floor_v6f16(ptr %x) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: lui a1, %hi(.LCPI184_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI184_0)(a1) +; ZVFH-NEXT: li a1, 25 +; ZVFH-NEXT: slli a1, a1, 10 +; ZVFH-NEXT: fmv.h.x fa5, a1 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 2 @@ -4396,22 +4502,75 @@ define void @floor_v4f32(ptr %x) { } define void @floor_v2f64(ptr %x) { -; CHECK-LABEL: floor_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a1, %hi(.LCPI186_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI186_0)(a1) -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vse64.v v8, (a0) -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: floor_v2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32ZVFH-NEXT: vle64.v v8, (a0) +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI186_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI186_0)(a1) +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: fsrmi a1, 2 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a1 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: vse64.v v8, (a0) +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: floor_v2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64ZVFH-NEXT: vle64.v v8, (a0) +; RV64ZVFH-NEXT: li a1, 1075 +; RV64ZVFH-NEXT: slli a1, a1, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a1 +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: fsrmi a1, 2 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a1 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: vse64.v v8, (a0) +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: floor_v2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vle64.v v8, (a0) +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI186_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI186_0)(a1) +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a1, 2 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a1 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vse64.v v8, (a0) +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: floor_v2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vle64.v v8, (a0) +; RV64ZVFHMIN-NEXT: li a1, 1075 +; RV64ZVFHMIN-NEXT: slli a1, a1, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a1 +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a1, 2 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a1 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: vse64.v v8, (a0) +; RV64ZVFHMIN-NEXT: ret %a = load <2 x double>, ptr %x %b = call <2 x double> @llvm.floor.v2f64(<2 x double> %a) store <2 x double> %b, ptr %x @@ -4477,8 +4636,9 @@ define void @round_v8f16(ptr %x) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: lui a1, %hi(.LCPI189_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI189_0)(a1) +; ZVFH-NEXT: li a1, 25 +; ZVFH-NEXT: slli a1, a1, 10 +; ZVFH-NEXT: fmv.h.x fa5, a1 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 4 @@ -4521,8 +4681,9 @@ define void @round_v6f16(ptr %x) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: lui a1, %hi(.LCPI190_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI190_0)(a1) +; ZVFH-NEXT: li a1, 25 +; ZVFH-NEXT: slli a1, a1, 10 +; ZVFH-NEXT: fmv.h.x fa5, a1 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 4 @@ -4584,22 +4745,75 @@ define void @round_v4f32(ptr %x) { } define void @round_v2f64(ptr %x) { -; CHECK-LABEL: round_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a1, %hi(.LCPI192_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI192_0)(a1) -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vse64.v v8, (a0) -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: round_v2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32ZVFH-NEXT: vle64.v v8, (a0) +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI192_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI192_0)(a1) +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: fsrmi a1, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a1 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: vse64.v v8, (a0) +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: round_v2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64ZVFH-NEXT: vle64.v v8, (a0) +; RV64ZVFH-NEXT: li a1, 1075 +; RV64ZVFH-NEXT: slli a1, a1, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a1 +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: fsrmi a1, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a1 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: vse64.v v8, (a0) +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: round_v2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vle64.v v8, (a0) +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI192_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI192_0)(a1) +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a1, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a1 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vse64.v v8, (a0) +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: round_v2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vle64.v v8, (a0) +; RV64ZVFHMIN-NEXT: li a1, 1075 +; RV64ZVFHMIN-NEXT: slli a1, a1, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a1 +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a1, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a1 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: vse64.v v8, (a0) +; RV64ZVFHMIN-NEXT: ret %a = load <2 x double>, ptr %x %b = call <2 x double> @llvm.round.v2f64(<2 x double> %a) store <2 x double> %b, ptr %x @@ -4636,8 +4850,9 @@ define void @rint_v8f16(ptr %x) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: lui a1, %hi(.LCPI194_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI194_0)(a1) +; ZVFH-NEXT: li a1, 25 +; ZVFH-NEXT: slli a1, a1, 10 +; ZVFH-NEXT: fmv.h.x fa5, a1 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -4693,20 +4908,67 @@ define void @rint_v4f32(ptr %x) { } define void @rint_v2f64(ptr %x) { -; CHECK-LABEL: rint_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a1, %hi(.LCPI196_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI196_0)(a1) -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vse64.v v8, (a0) -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: rint_v2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32ZVFH-NEXT: vle64.v v8, (a0) +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI196_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI196_0)(a1) +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: vse64.v v8, (a0) +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: rint_v2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64ZVFH-NEXT: vle64.v v8, (a0) +; RV64ZVFH-NEXT: li a1, 1075 +; RV64ZVFH-NEXT: slli a1, a1, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a1 +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: vse64.v v8, (a0) +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: rint_v2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vle64.v v8, (a0) +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI196_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI196_0)(a1) +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vse64.v v8, (a0) +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: rint_v2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vle64.v v8, (a0) +; RV64ZVFHMIN-NEXT: li a1, 1075 +; RV64ZVFHMIN-NEXT: slli a1, a1, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a1 +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: vse64.v v8, (a0) +; RV64ZVFHMIN-NEXT: ret %a = load <2 x double>, ptr %x %b = call <2 x double> @llvm.rint.v2f64(<2 x double> %a) store <2 x double> %b, ptr %x @@ -4745,8 +5007,9 @@ define void @nearbyint_v8f16(ptr %x) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: lui a1, %hi(.LCPI198_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI198_0)(a1) +; ZVFH-NEXT: li a1, 25 +; ZVFH-NEXT: slli a1, a1, 10 +; ZVFH-NEXT: fmv.h.x fa5, a1 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: frflags a1 @@ -4808,22 +5071,75 @@ define void @nearbyint_v4f32(ptr %x) { } define void @nearbyint_v2f64(ptr %x) { -; CHECK-LABEL: nearbyint_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a1, %hi(.LCPI200_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI200_0)(a1) -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a1 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a1 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vse64.v v8, (a0) -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: nearbyint_v2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32ZVFH-NEXT: vle64.v v8, (a0) +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI200_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI200_0)(a1) +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: frflags a1 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: fsflags a1 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: vse64.v v8, (a0) +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: nearbyint_v2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64ZVFH-NEXT: vle64.v v8, (a0) +; RV64ZVFH-NEXT: li a1, 1075 +; RV64ZVFH-NEXT: slli a1, a1, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a1 +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: frflags a1 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: fsflags a1 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: vse64.v v8, (a0) +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: nearbyint_v2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vle64.v v8, (a0) +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI200_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI200_0)(a1) +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: frflags a1 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: fsflags a1 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vse64.v v8, (a0) +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: nearbyint_v2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vle64.v v8, (a0) +; RV64ZVFHMIN-NEXT: li a1, 1075 +; RV64ZVFHMIN-NEXT: slli a1, a1, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a1 +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: frflags a1 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: fsflags a1 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: vse64.v v8, (a0) +; RV64ZVFHMIN-NEXT: ret %a = load <2 x double>, ptr %x %b = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) store <2 x double> %b, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll index be32c033fe373..c0b67dd603ebb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s ; This file tests the code generation for `llvm.experimental.constrained.round.*` on scalable vector type. @@ -11,10 +11,11 @@ define <1 x half> @round_v1f16(<1 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma @@ -34,10 +35,11 @@ define <2 x half> @round_v2f16(<2 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma @@ -57,10 +59,11 @@ define <4 x half> @round_v4f16(<4 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma @@ -80,10 +83,11 @@ define <8 x half> @round_v8f16(<8 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -103,10 +107,11 @@ define <16 x half> @round_v16f16(<16 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -125,11 +130,12 @@ define <32 x half> @round_v32f16(<32 x half> %x) strictfp { ; CHECK-LABEL: round_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: lui a1, %hi(.LCPI5_0) +; CHECK-NEXT: li a1, 25 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; CHECK-NEXT: slli a1, a1, 10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fmv.h.x fa5, a1 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 @@ -261,92 +267,168 @@ define <16 x float> @round_v16f32(<16 x float> %x) strictfp { declare <16 x float> @llvm.experimental.constrained.round.v16f32(<16 x float>, metadata) define <1 x double> @round_v1f64(<1 x double> %x) strictfp { -; CHECK-LABEL: round_v1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: round_v1f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI11_0) +; RV32-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: fsrmi a0, 4 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: round_v1f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: fsrmi a0, 4 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: ret %a = call <1 x double> @llvm.experimental.constrained.round.v1f64(<1 x double> %x, metadata !"fpexcept.strict") ret <1 x double> %a } declare <1 x double> @llvm.experimental.constrained.round.v1f64(<1 x double>, metadata) define <2 x double> @round_v2f64(<2 x double> %x) strictfp { -; CHECK-LABEL: round_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: round_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI12_0) +; RV32-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: fsrmi a0, 4 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: round_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: fsrmi a0, 4 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: ret %a = call <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double> %x, metadata !"fpexcept.strict") ret <2 x double> %a } declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata) define <4 x double> @round_v4f64(<4 x double> %x) strictfp { -; CHECK-LABEL: round_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: round_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI13_0) +; RV32-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: fsrmi a0, 4 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: round_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: fsrmi a0, 4 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: ret %a = call <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double> %x, metadata !"fpexcept.strict") ret <4 x double> %a } declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata) define <8 x double> @round_v8f64(<8 x double> %x) strictfp { -; CHECK-LABEL: round_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: round_v8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI14_0) +; RV32-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: fsrmi a0, 4 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: round_v8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: fsrmi a0, 4 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: ret %a = call <8 x double> @llvm.experimental.constrained.round.v8f64(<8 x double> %x, metadata !"fpexcept.strict") ret <8 x double> %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll index 774ce5c7859c9..455dc0b83c03d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll @@ -1,22 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN ; This file tests the code generation for `llvm.round.*` on fixed vector type. define <1 x half> @round_v1f16(<1 x half> %x) { ; ZVFH-LABEL: round_v1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -52,10 +53,11 @@ declare <1 x half> @llvm.round.v1f16(<1 x half>) define <2 x half> @round_v2f16(<2 x half> %x) { ; ZVFH-LABEL: round_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -91,10 +93,11 @@ declare <2 x half> @llvm.round.v2f16(<2 x half>) define <4 x half> @round_v4f16(<4 x half> %x) { ; ZVFH-LABEL: round_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -130,10 +133,11 @@ declare <4 x half> @llvm.round.v4f16(<4 x half>) define <8 x half> @round_v8f16(<8 x half> %x) { ; ZVFH-LABEL: round_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -169,10 +173,11 @@ declare <8 x half> @llvm.round.v8f16(<8 x half>) define <16 x half> @round_v16f16(<16 x half> %x) { ; ZVFH-LABEL: round_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -208,11 +213,12 @@ declare <16 x half> @llvm.round.v16f16(<16 x half>) define <32 x half> @round_v32f16(<32 x half> %x) { ; ZVFH-LABEL: round_v32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; ZVFH-NEXT: li a0, 32 +; ZVFH-NEXT: li a1, 25 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: slli a1, a1, 10 +; ZVFH-NEXT: fmv.h.x fa5, a1 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -347,80 +353,268 @@ define <16 x float> @round_v16f32(<16 x float> %x) { declare <16 x float> @llvm.round.v16f32(<16 x float>) define <1 x double> @round_v1f64(<1 x double> %x) { -; CHECK-LABEL: round_v1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: round_v1f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI11_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; RV32ZVFH-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: round_v1f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: round_v1f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI11_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; RV32ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: round_v1f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call <1 x double> @llvm.round.v1f64(<1 x double> %x) ret <1 x double> %a } declare <1 x double> @llvm.round.v1f64(<1 x double>) define <2 x double> @round_v2f64(<2 x double> %x) { -; CHECK-LABEL: round_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: round_v2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI12_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; RV32ZVFH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: round_v2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: round_v2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI12_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: round_v2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call <2 x double> @llvm.round.v2f64(<2 x double> %x) ret <2 x double> %a } declare <2 x double> @llvm.round.v2f64(<2 x double>) define <4 x double> @round_v4f64(<4 x double> %x) { -; CHECK-LABEL: round_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: round_v4f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI13_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; RV32ZVFH-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfabs.v v10, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: round_v4f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfabs.v v10, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: round_v4f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI13_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; RV32ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: round_v4f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call <4 x double> @llvm.round.v4f64(<4 x double> %x) ret <4 x double> %a } declare <4 x double> @llvm.round.v4f64(<4 x double>) define <8 x double> @round_v8f64(<8 x double> %x) { -; CHECK-LABEL: round_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: round_v8f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI14_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; RV32ZVFH-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfabs.v v12, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: round_v8f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfabs.v v12, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: round_v8f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI14_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; RV32ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: round_v8f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call <8 x double> @llvm.round.v8f64(<8 x double> %x) ret <8 x double> %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll index 5c0279e133dfa..b1d35d3bcdc1d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s ; This file tests the code generation for `llvm.experimental.constrained.roundeven.*` on scalable vector type. @@ -11,10 +11,11 @@ define <1 x half> @roundeven_v1f16(<1 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma @@ -34,10 +35,11 @@ define <2 x half> @roundeven_v2f16(<2 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma @@ -57,10 +59,11 @@ define <4 x half> @roundeven_v4f16(<4 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma @@ -80,10 +83,11 @@ define <8 x half> @roundeven_v8f16(<8 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -103,10 +107,11 @@ define <16 x half> @roundeven_v16f16(<16 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -125,11 +130,12 @@ define <32 x half> @roundeven_v32f16(<32 x half> %x) strictfp { ; CHECK-LABEL: roundeven_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: lui a1, %hi(.LCPI5_0) +; CHECK-NEXT: li a1, 25 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; CHECK-NEXT: slli a1, a1, 10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fmv.h.x fa5, a1 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 @@ -261,92 +267,168 @@ define <16 x float> @roundeven_v16f32(<16 x float> %x) strictfp { declare <16 x float> @llvm.experimental.constrained.roundeven.v16f32(<16 x float>, metadata) define <1 x double> @roundeven_v1f64(<1 x double> %x) strictfp { -; CHECK-LABEL: roundeven_v1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: roundeven_v1f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI11_0) +; RV32-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: fsrmi a0, 0 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: roundeven_v1f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: fsrmi a0, 0 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: ret %a = call <1 x double> @llvm.experimental.constrained.roundeven.v1f64(<1 x double> %x, metadata !"fpexcept.strict") ret <1 x double> %a } declare <1 x double> @llvm.experimental.constrained.roundeven.v1f64(<1 x double>, metadata) define <2 x double> @roundeven_v2f64(<2 x double> %x) strictfp { -; CHECK-LABEL: roundeven_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: roundeven_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI12_0) +; RV32-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: fsrmi a0, 0 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: roundeven_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: fsrmi a0, 0 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: ret %a = call <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double> %x, metadata !"fpexcept.strict") ret <2 x double> %a } declare <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double>, metadata) define <4 x double> @roundeven_v4f64(<4 x double> %x) strictfp { -; CHECK-LABEL: roundeven_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: roundeven_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI13_0) +; RV32-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: fsrmi a0, 0 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: roundeven_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: fsrmi a0, 0 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: ret %a = call <4 x double> @llvm.experimental.constrained.roundeven.v4f64(<4 x double> %x, metadata !"fpexcept.strict") ret <4 x double> %a } declare <4 x double> @llvm.experimental.constrained.roundeven.v4f64(<4 x double>, metadata) define <8 x double> @roundeven_v8f64(<8 x double> %x) strictfp { -; CHECK-LABEL: roundeven_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: roundeven_v8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI14_0) +; RV32-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: fsrmi a0, 0 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: roundeven_v8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: fsrmi a0, 0 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: ret %a = call <8 x double> @llvm.experimental.constrained.roundeven.v8f64(<8 x double> %x, metadata !"fpexcept.strict") ret <8 x double> %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll index 0b6baad127643..f8b3cb5897dfa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll @@ -1,22 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN ; This file tests the code generation for `llvm.roundeven.*` on fixed vector type. define <1 x half> @roundeven_v1f16(<1 x half> %x) { ; ZVFH-LABEL: roundeven_v1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -52,10 +53,11 @@ declare <1 x half> @llvm.roundeven.v1f16(<1 x half>) define <2 x half> @roundeven_v2f16(<2 x half> %x) { ; ZVFH-LABEL: roundeven_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -91,10 +93,11 @@ declare <2 x half> @llvm.roundeven.v2f16(<2 x half>) define <4 x half> @roundeven_v4f16(<4 x half> %x) { ; ZVFH-LABEL: roundeven_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -130,10 +133,11 @@ declare <4 x half> @llvm.roundeven.v4f16(<4 x half>) define <8 x half> @roundeven_v8f16(<8 x half> %x) { ; ZVFH-LABEL: roundeven_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -169,10 +173,11 @@ declare <8 x half> @llvm.roundeven.v8f16(<8 x half>) define <16 x half> @roundeven_v16f16(<16 x half> %x) { ; ZVFH-LABEL: roundeven_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -208,11 +213,12 @@ declare <16 x half> @llvm.roundeven.v16f16(<16 x half>) define <32 x half> @roundeven_v32f16(<32 x half> %x) { ; ZVFH-LABEL: roundeven_v32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; ZVFH-NEXT: li a0, 32 +; ZVFH-NEXT: li a1, 25 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: slli a1, a1, 10 +; ZVFH-NEXT: fmv.h.x fa5, a1 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -347,80 +353,268 @@ define <16 x float> @roundeven_v16f32(<16 x float> %x) { declare <16 x float> @llvm.roundeven.v16f32(<16 x float>) define <1 x double> @roundeven_v1f64(<1 x double> %x) { -; CHECK-LABEL: roundeven_v1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: roundeven_v1f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI11_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; RV32ZVFH-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: roundeven_v1f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: roundeven_v1f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI11_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; RV32ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: roundeven_v1f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %x) ret <1 x double> %a } declare <1 x double> @llvm.roundeven.v1f64(<1 x double>) define <2 x double> @roundeven_v2f64(<2 x double> %x) { -; CHECK-LABEL: roundeven_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: roundeven_v2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI12_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; RV32ZVFH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: roundeven_v2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: roundeven_v2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI12_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: roundeven_v2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %x) ret <2 x double> %a } declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) define <4 x double> @roundeven_v4f64(<4 x double> %x) { -; CHECK-LABEL: roundeven_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: roundeven_v4f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI13_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; RV32ZVFH-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfabs.v v10, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: roundeven_v4f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfabs.v v10, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: roundeven_v4f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI13_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; RV32ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: roundeven_v4f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %x) ret <4 x double> %a } declare <4 x double> @llvm.roundeven.v4f64(<4 x double>) define <8 x double> @roundeven_v8f64(<8 x double> %x) { -; CHECK-LABEL: roundeven_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: roundeven_v8f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI14_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; RV32ZVFH-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfabs.v v12, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: roundeven_v8f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfabs.v v12, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: roundeven_v8f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI14_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; RV32ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: roundeven_v8f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %x) ret <8 x double> %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll index 2173887e85417..b7cf84fba4210 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll @@ -1,18 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s define <1 x half> @trunc_v1f16(<1 x half> %x) strictfp { ; CHECK-LABEL: trunc_v1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t @@ -30,10 +31,11 @@ define <2 x half> @trunc_v2f16(<2 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t @@ -51,10 +53,11 @@ define <4 x half> @trunc_v4f16(<4 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t @@ -72,10 +75,11 @@ define <8 x half> @trunc_v8f16(<8 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t @@ -93,10 +97,11 @@ define <16 x half> @trunc_v16f16(<16 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t @@ -113,11 +118,12 @@ define <32 x half> @trunc_v32f16(<32 x half> %x) strictfp { ; CHECK-LABEL: trunc_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: lui a1, %hi(.LCPI5_0) +; CHECK-NEXT: li a1, 25 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; CHECK-NEXT: slli a1, a1, 10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fmv.h.x fa5, a1 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma @@ -237,84 +243,152 @@ define <16 x float> @trunc_v16f32(<16 x float> %x) strictfp { declare <16 x float> @llvm.experimental.constrained.trunc.v16f32(<16 x float>, metadata) define <1 x double> @trunc_v1f64(<1 x double> %x) strictfp { -; CHECK-LABEL: trunc_v1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: trunc_v1f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI11_0) +; RV32-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_v1f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: ret %a = call <1 x double> @llvm.experimental.constrained.trunc.v1f64(<1 x double> %x, metadata !"fpexcept.strict") ret <1 x double> %a } declare <1 x double> @llvm.experimental.constrained.trunc.v1f64(<1 x double>, metadata) define <2 x double> @trunc_v2f64(<2 x double> %x) strictfp { -; CHECK-LABEL: trunc_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: trunc_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI12_0) +; RV32-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: ret %a = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double> %x, metadata !"fpexcept.strict") ret <2 x double> %a } declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata) define <4 x double> @trunc_v4f64(<4 x double> %x) strictfp { -; CHECK-LABEL: trunc_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: trunc_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI13_0) +; RV32-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: ret %a = call <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double> %x, metadata !"fpexcept.strict") ret <4 x double> %a } declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata) define <8 x double> @trunc_v8f64(<8 x double> %x) strictfp { -; CHECK-LABEL: trunc_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: trunc_v8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI14_0) +; RV32-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_v8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: ret %a = call <8 x double> @llvm.experimental.constrained.trunc.v8f64(<8 x double> %x, metadata !"fpexcept.strict") ret <8 x double> %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll index b6c441290ee45..08da7d6bc50f7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll @@ -1,18 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s declare <2 x half> @llvm.vp.nearbyint.v2f16(<2 x half>, <2 x i1>, i32) define <2 x half> @vp_nearbyint_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 @@ -30,10 +31,11 @@ define <2 x half> @vp_nearbyint_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext % define <2 x half> @vp_nearbyint_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v2f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -51,10 +53,11 @@ declare <4 x half> @llvm.vp.nearbyint.v4f16(<4 x half>, <4 x i1>, i32) define <4 x half> @vp_nearbyint_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 @@ -72,10 +75,11 @@ define <4 x half> @vp_nearbyint_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext % define <4 x half> @vp_nearbyint_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v4f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -93,10 +97,11 @@ declare <8 x half> @llvm.vp.nearbyint.v8f16(<8 x half>, <8 x i1>, i32) define <8 x half> @vp_nearbyint_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 @@ -114,10 +119,11 @@ define <8 x half> @vp_nearbyint_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext % define <8 x half> @vp_nearbyint_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v8f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -137,9 +143,10 @@ define <16 x half> @vp_nearbyint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI6_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 @@ -158,10 +165,11 @@ define <16 x half> @vp_nearbyint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe define <16 x half> @vp_nearbyint_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v16f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI7_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -349,41 +357,75 @@ define <16 x float> @vp_nearbyint_v16f32_unmasked(<16 x float> %va, i32 zeroext declare <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double>, <2 x i1>, i32) define <2 x double> @vp_nearbyint_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI16_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32-LABEL: vp_nearbyint_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, %hi(.LCPI16_0) +; RV32-NEXT: fld fa5, %lo(.LCPI16_0)(a1) +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32-NEXT: frflags a0 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: fsflags a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_nearbyint_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8, v0.t +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64-NEXT: frflags a0 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: fsflags a0 +; RV64-NEXT: ret %v = call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) ret <2 x double> %v } define <2 x double> @vp_nearbyint_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_v2f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32-LABEL: vp_nearbyint_v2f64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, %hi(.LCPI17_0) +; RV32-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: frflags a0 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: fsflags a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_nearbyint_v2f64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: frflags a0 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: fsflags a0 +; RV64-NEXT: ret %v = call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -391,43 +433,79 @@ define <2 x double> @vp_nearbyint_v2f64_unmasked(<2 x double> %va, i32 zeroext % declare <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_nearbyint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32-LABEL: vp_nearbyint_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: vmv1r.v v10, v0 +; RV32-NEXT: lui a0, %hi(.LCPI18_0) +; RV32-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32-NEXT: vfabs.v v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32-NEXT: frflags a0 +; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: fsflags a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_nearbyint_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64-NEXT: vmv1r.v v10, v0 +; RV64-NEXT: vfabs.v v12, v8, v0.t +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64-NEXT: frflags a0 +; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: fsflags a0 +; RV64-NEXT: ret %v = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x double> %v } define <4 x double> @vp_nearbyint_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_v4f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32-LABEL: vp_nearbyint_v4f64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, %hi(.LCPI19_0) +; RV32-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: frflags a0 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: fsflags a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_nearbyint_v4f64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: frflags a0 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: fsflags a0 +; RV64-NEXT: ret %v = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -435,43 +513,79 @@ define <4 x double> @vp_nearbyint_v4f64_unmasked(<4 x double> %va, i32 zeroext % declare <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_nearbyint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32-LABEL: vp_nearbyint_v8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vmv1r.v v12, v0 +; RV32-NEXT: lui a0, %hi(.LCPI20_0) +; RV32-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32-NEXT: vfabs.v v16, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32-NEXT: frflags a0 +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32-NEXT: fsflags a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_nearbyint_v8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64-NEXT: vmv1r.v v12, v0 +; RV64-NEXT: vfabs.v v16, v8, v0.t +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64-NEXT: frflags a0 +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64-NEXT: fsflags a0 +; RV64-NEXT: ret %v = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) ret <8 x double> %v } define <8 x double> @vp_nearbyint_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_v8f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI21_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32-LABEL: vp_nearbyint_v8f64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, %hi(.LCPI21_0) +; RV32-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: frflags a0 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: fsflags a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_nearbyint_v8f64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: frflags a0 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: fsflags a0 +; RV64-NEXT: ret %v = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -479,43 +593,79 @@ define <8 x double> @vp_nearbyint_v8f64_unmasked(<8 x double> %va, i32 zeroext % declare <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_nearbyint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_v15f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32-LABEL: vp_nearbyint_v15f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v16, v0 +; RV32-NEXT: lui a0, %hi(.LCPI22_0) +; RV32-NEXT: fld fa5, %lo(.LCPI22_0)(a0) +; RV32-NEXT: vfabs.v v24, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32-NEXT: frflags a0 +; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32-NEXT: fsflags a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_nearbyint_v15f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vmv1r.v v16, v0 +; RV64-NEXT: vfabs.v v24, v8, v0.t +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64-NEXT: frflags a0 +; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64-NEXT: fsflags a0 +; RV64-NEXT: ret %v = call <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v } define <15 x double> @vp_nearbyint_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_v15f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32-LABEL: vp_nearbyint_v15f64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, %hi(.LCPI23_0) +; RV32-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vfabs.v v16, v8 +; RV32-NEXT: vmflt.vf v0, v16, fa5 +; RV32-NEXT: frflags a0 +; RV32-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32-NEXT: fsflags a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_nearbyint_v15f64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vfabs.v v16, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v16, fa5 +; RV64-NEXT: frflags a0 +; RV64-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64-NEXT: fsflags a0 +; RV64-NEXT: ret %v = call <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -523,43 +673,79 @@ define <15 x double> @vp_nearbyint_v15f64_unmasked(<15 x double> %va, i32 zeroex declare <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_nearbyint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_v16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32-LABEL: vp_nearbyint_v16f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v16, v0 +; RV32-NEXT: lui a0, %hi(.LCPI24_0) +; RV32-NEXT: fld fa5, %lo(.LCPI24_0)(a0) +; RV32-NEXT: vfabs.v v24, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32-NEXT: frflags a0 +; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32-NEXT: fsflags a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_nearbyint_v16f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vmv1r.v v16, v0 +; RV64-NEXT: vfabs.v v24, v8, v0.t +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64-NEXT: frflags a0 +; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64-NEXT: fsflags a0 +; RV64-NEXT: ret %v = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v } define <16 x double> @vp_nearbyint_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_v16f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32-LABEL: vp_nearbyint_v16f64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, %hi(.LCPI25_0) +; RV32-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vfabs.v v16, v8 +; RV32-NEXT: vmflt.vf v0, v16, fa5 +; RV32-NEXT: frflags a0 +; RV32-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32-NEXT: fsflags a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_nearbyint_v16f64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vfabs.v v16, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v16, fa5 +; RV64-NEXT: frflags a0 +; RV64-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64-NEXT: fsflags a0 +; RV64-NEXT: ret %v = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -567,91 +753,175 @@ define <16 x double> @vp_nearbyint_v16f64_unmasked(<16 x double> %va, i32 zeroex declare <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v6, v0 -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: frflags a1 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32-LABEL: vp_nearbyint_v32f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v6, v0 +; RV32-NEXT: li a2, 16 +; RV32-NEXT: vslidedown.vi v7, v0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB26_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB26_2: +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vfabs.v v24, v8, v0.t +; RV32-NEXT: lui a1, %hi(.LCPI26_0) +; RV32-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32-NEXT: frflags a1 +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32-NEXT: fsflags a1 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vfabs.v v24, v16, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32-NEXT: frflags a0 +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32-NEXT: fsflags a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_nearbyint_v32f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v6, v0 +; RV64-NEXT: li a2, 16 +; RV64-NEXT: vslidedown.vi v7, v0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB26_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a1, 16 +; RV64-NEXT: .LBB26_2: +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vfabs.v v24, v8, v0.t +; RV64-NEXT: li a1, 1075 +; RV64-NEXT: slli a1, a1, 52 +; RV64-NEXT: fmv.d.x fa5, a1 +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: frflags a1 +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64-NEXT: fsflags a1 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vfabs.v v24, v16, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64-NEXT: frflags a0 +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64-NEXT: fsflags a0 +; RV64-NEXT: ret %v = call <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } define <32 x double> @vp_nearbyint_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_v32f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: sltu a0, a0, a2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: frflags a2 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v7, v24, fa5 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: frflags a1 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: fsflags a1 -; CHECK-NEXT: ret +; RV32-LABEL: vp_nearbyint_v32f64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: li a2, 16 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB27_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB27_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vfabs.v v24, v8 +; RV32-NEXT: lui a2, %hi(.LCPI27_0) +; RV32-NEXT: fld fa5, %lo(.LCPI27_0)(a2) +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: sltu a0, a0, a2 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: frflags a2 +; RV32-NEXT: vmflt.vf v0, v24, fa5 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vfabs.v v24, v16 +; RV32-NEXT: vmflt.vf v7, v24, fa5 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32-NEXT: fsflags a2 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32-NEXT: frflags a1 +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32-NEXT: fsflags a1 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_nearbyint_v32f64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB27_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a1, 16 +; RV64-NEXT: .LBB27_2: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vfabs.v v24, v8 +; RV64-NEXT: li a2, 1075 +; RV64-NEXT: slli a2, a2, 52 +; RV64-NEXT: fmv.d.x fa5, a2 +; RV64-NEXT: addi a2, a0, -16 +; RV64-NEXT: sltu a0, a0, a2 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: frflags a2 +; RV64-NEXT: vmflt.vf v0, v24, fa5 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vfabs.v v24, v16 +; RV64-NEXT: vmflt.vf v7, v24, fa5 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64-NEXT: fsflags a2 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64-NEXT: frflags a1 +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64-NEXT: fsflags a1 +; RV64-NEXT: ret %v = call <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index a426f8c619e99..eec12212d0d37 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>) @@ -2083,21 +2083,38 @@ define float @vreduce_fminimum_v128f32_nonans(ptr %x) { declare double @llvm.vector.reduce.fminimum.v2f64(<2 x double>) define double @vreduce_fminimum_v2f64(ptr %x) { -; CHECK-LABEL: vreduce_fminimum_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v9, v8, v8 -; CHECK-NEXT: vcpop.m a0, v9 -; CHECK-NEXT: beqz a0, .LBB123_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: lui a0, %hi(.LCPI123_0) -; CHECK-NEXT: fld fa0, %lo(.LCPI123_0)(a0) -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB123_2: -; CHECK-NEXT: vfredmin.vs v8, v8, v8 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fminimum_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vmfne.vv v9, v8, v8 +; RV32-NEXT: vcpop.m a0, v9 +; RV32-NEXT: beqz a0, .LBB123_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: lui a0, %hi(.LCPI123_0) +; RV32-NEXT: fld fa0, %lo(.LCPI123_0)(a0) +; RV32-NEXT: ret +; RV32-NEXT: .LBB123_2: +; RV32-NEXT: vfredmin.vs v8, v8, v8 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fminimum_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vmfne.vv v9, v8, v8 +; RV64-NEXT: vcpop.m a0, v9 +; RV64-NEXT: beqz a0, .LBB123_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: lui a0, 4095 +; RV64-NEXT: slli a0, a0, 39 +; RV64-NEXT: fmv.d.x fa0, a0 +; RV64-NEXT: ret +; RV64-NEXT: .LBB123_2: +; RV64-NEXT: vfredmin.vs v8, v8, v8 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <2 x double>, ptr %x %red = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %v) ret double %red @@ -2119,21 +2136,38 @@ define double @vreduce_fminimum_v2f64_nonans(ptr %x) { declare double @llvm.vector.reduce.fminimum.v4f64(<4 x double>) define double @vreduce_fminimum_v4f64(ptr %x) { -; CHECK-LABEL: vreduce_fminimum_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vcpop.m a0, v10 -; CHECK-NEXT: beqz a0, .LBB125_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: lui a0, %hi(.LCPI125_0) -; CHECK-NEXT: fld fa0, %lo(.LCPI125_0)(a0) -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB125_2: -; CHECK-NEXT: vfredmin.vs v8, v8, v8 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fminimum_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vmfne.vv v10, v8, v8 +; RV32-NEXT: vcpop.m a0, v10 +; RV32-NEXT: beqz a0, .LBB125_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: lui a0, %hi(.LCPI125_0) +; RV32-NEXT: fld fa0, %lo(.LCPI125_0)(a0) +; RV32-NEXT: ret +; RV32-NEXT: .LBB125_2: +; RV32-NEXT: vfredmin.vs v8, v8, v8 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fminimum_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vmfne.vv v10, v8, v8 +; RV64-NEXT: vcpop.m a0, v10 +; RV64-NEXT: beqz a0, .LBB125_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: lui a0, 4095 +; RV64-NEXT: slli a0, a0, 39 +; RV64-NEXT: fmv.d.x fa0, a0 +; RV64-NEXT: ret +; RV64-NEXT: .LBB125_2: +; RV64-NEXT: vfredmin.vs v8, v8, v8 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x double>, ptr %x %red = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %v) ret double %red @@ -2155,21 +2189,38 @@ define double @vreduce_fminimum_v4f64_nonans(ptr %x) { declare double @llvm.vector.reduce.fminimum.v8f64(<8 x double>) define double @vreduce_fminimum_v8f64(ptr %x) { -; CHECK-LABEL: vreduce_fminimum_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vcpop.m a0, v12 -; CHECK-NEXT: beqz a0, .LBB127_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: lui a0, %hi(.LCPI127_0) -; CHECK-NEXT: fld fa0, %lo(.LCPI127_0)(a0) -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB127_2: -; CHECK-NEXT: vfredmin.vs v8, v8, v8 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fminimum_v8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vmfne.vv v12, v8, v8 +; RV32-NEXT: vcpop.m a0, v12 +; RV32-NEXT: beqz a0, .LBB127_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: lui a0, %hi(.LCPI127_0) +; RV32-NEXT: fld fa0, %lo(.LCPI127_0)(a0) +; RV32-NEXT: ret +; RV32-NEXT: .LBB127_2: +; RV32-NEXT: vfredmin.vs v8, v8, v8 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fminimum_v8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vmfne.vv v12, v8, v8 +; RV64-NEXT: vcpop.m a0, v12 +; RV64-NEXT: beqz a0, .LBB127_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: lui a0, 4095 +; RV64-NEXT: slli a0, a0, 39 +; RV64-NEXT: fmv.d.x fa0, a0 +; RV64-NEXT: ret +; RV64-NEXT: .LBB127_2: +; RV64-NEXT: vfredmin.vs v8, v8, v8 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <8 x double>, ptr %x %red = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> %v) ret double %red @@ -2191,21 +2242,38 @@ define double @vreduce_fminimum_v8f64_nonans(ptr %x) { declare double @llvm.vector.reduce.fminimum.v16f64(<16 x double>) define double @vreduce_fminimum_v16f64(ptr %x) { -; CHECK-LABEL: vreduce_fminimum_v16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 -; CHECK-NEXT: beqz a0, .LBB129_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: lui a0, %hi(.LCPI129_0) -; CHECK-NEXT: fld fa0, %lo(.LCPI129_0)(a0) -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB129_2: -; CHECK-NEXT: vfredmin.vs v8, v8, v8 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fminimum_v16f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vmfne.vv v16, v8, v8 +; RV32-NEXT: vcpop.m a0, v16 +; RV32-NEXT: beqz a0, .LBB129_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: lui a0, %hi(.LCPI129_0) +; RV32-NEXT: fld fa0, %lo(.LCPI129_0)(a0) +; RV32-NEXT: ret +; RV32-NEXT: .LBB129_2: +; RV32-NEXT: vfredmin.vs v8, v8, v8 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fminimum_v16f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vmfne.vv v16, v8, v8 +; RV64-NEXT: vcpop.m a0, v16 +; RV64-NEXT: beqz a0, .LBB129_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: lui a0, 4095 +; RV64-NEXT: slli a0, a0, 39 +; RV64-NEXT: fmv.d.x fa0, a0 +; RV64-NEXT: ret +; RV64-NEXT: .LBB129_2: +; RV64-NEXT: vfredmin.vs v8, v8, v8 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <16 x double>, ptr %x %red = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> %v) ret double %red @@ -2227,29 +2295,54 @@ define double @vreduce_fminimum_v16f64_nonans(ptr %x) { declare double @llvm.vector.reduce.fminimum.v32f64(<32 x double>) define double @vreduce_fminimum_v32f64(ptr %x) { -; CHECK-LABEL: vreduce_fminimum_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 -; CHECK-NEXT: vfmin.vv v8, v16, v8 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 -; CHECK-NEXT: beqz a0, .LBB131_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: lui a0, %hi(.LCPI131_0) -; CHECK-NEXT: fld fa0, %lo(.LCPI131_0)(a0) -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB131_2: -; CHECK-NEXT: vfredmin.vs v8, v8, v8 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fminimum_v32f64: +; RV32: # %bb.0: +; RV32-NEXT: addi a1, a0, 128 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v16, (a0) +; RV32-NEXT: vle64.v v24, (a1) +; RV32-NEXT: vmfeq.vv v0, v16, v16 +; RV32-NEXT: vmfeq.vv v7, v24, v24 +; RV32-NEXT: vmerge.vvm v8, v16, v24, v0 +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vmerge.vvm v16, v24, v16, v0 +; RV32-NEXT: vfmin.vv v8, v16, v8 +; RV32-NEXT: vmfne.vv v16, v8, v8 +; RV32-NEXT: vcpop.m a0, v16 +; RV32-NEXT: beqz a0, .LBB131_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: lui a0, %hi(.LCPI131_0) +; RV32-NEXT: fld fa0, %lo(.LCPI131_0)(a0) +; RV32-NEXT: ret +; RV32-NEXT: .LBB131_2: +; RV32-NEXT: vfredmin.vs v8, v8, v8 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fminimum_v32f64: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, a0, 128 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: vmfeq.vv v0, v16, v16 +; RV64-NEXT: vmfeq.vv v7, v24, v24 +; RV64-NEXT: vmerge.vvm v8, v16, v24, v0 +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vmerge.vvm v16, v24, v16, v0 +; RV64-NEXT: vfmin.vv v8, v16, v8 +; RV64-NEXT: vmfne.vv v16, v8, v8 +; RV64-NEXT: vcpop.m a0, v16 +; RV64-NEXT: beqz a0, .LBB131_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: lui a0, 4095 +; RV64-NEXT: slli a0, a0, 39 +; RV64-NEXT: fmv.d.x fa0, a0 +; RV64-NEXT: ret +; RV64-NEXT: .LBB131_2: +; RV64-NEXT: vfredmin.vs v8, v8, v8 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <32 x double>, ptr %x %red = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> %v) ret double %red @@ -2274,85 +2367,166 @@ define double @vreduce_fminimum_v32f64_nonans(ptr %x) { declare double @llvm.vector.reduce.fminimum.v64f64(<64 x double>) define double @vreduce_fminimum_v64f64(ptr %x) { -; CHECK-LABEL: vreduce_fminimum_v64f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a1, a0, 384 -; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v24, v24 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 -; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vfmin.vv v24, v16, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v8, v8 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vfmin.vv v16, v8, v16 -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 -; CHECK-NEXT: vfmin.vv v8, v16, v8 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 -; CHECK-NEXT: beqz a0, .LBB133_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: lui a0, %hi(.LCPI133_0) -; CHECK-NEXT: fld fa0, %lo(.LCPI133_0)(a0) -; CHECK-NEXT: j .LBB133_3 -; CHECK-NEXT: .LBB133_2: -; CHECK-NEXT: vfredmin.vs v8, v8, v8 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: .LBB133_3: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fminimum_v64f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-NEXT: addi a1, a0, 128 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v24, (a1) +; RV32-NEXT: addi a1, a0, 384 +; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: addi a1, a0, 256 +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vmfeq.vv v0, v24, v24 +; RV32-NEXT: vmfeq.vv v7, v16, v16 +; RV32-NEXT: vmerge.vvm v8, v24, v16, v0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vle64.v v8, (a1) +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vmerge.vvm v16, v16, v24, v0 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vfmin.vv v24, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmfeq.vv v0, v16, v16 +; RV32-NEXT: vmfeq.vv v7, v8, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmerge.vvm v16, v16, v8, v0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vfmin.vv v16, v8, v16 +; RV32-NEXT: vmfeq.vv v0, v16, v16 +; RV32-NEXT: vmfeq.vv v7, v24, v24 +; RV32-NEXT: vmerge.vvm v8, v16, v24, v0 +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vmerge.vvm v16, v24, v16, v0 +; RV32-NEXT: vfmin.vv v8, v16, v8 +; RV32-NEXT: vmfne.vv v16, v8, v8 +; RV32-NEXT: vcpop.m a0, v16 +; RV32-NEXT: beqz a0, .LBB133_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: lui a0, %hi(.LCPI133_0) +; RV32-NEXT: fld fa0, %lo(.LCPI133_0)(a0) +; RV32-NEXT: j .LBB133_3 +; RV32-NEXT: .LBB133_2: +; RV32-NEXT: vfredmin.vs v8, v8, v8 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: .LBB133_3: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa sp, 16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fminimum_v64f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: addi a1, a0, 128 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: addi a1, a0, 384 +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vmfeq.vv v0, v24, v24 +; RV64-NEXT: vmfeq.vv v7, v16, v16 +; RV64-NEXT: vmerge.vvm v8, v24, v16, v0 +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vle64.v v8, (a1) +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vfmin.vv v24, v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vmfeq.vv v0, v16, v16 +; RV64-NEXT: vmfeq.vv v7, v8, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vmerge.vvm v16, v16, v8, v0 +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vfmin.vv v16, v8, v16 +; RV64-NEXT: vmfeq.vv v0, v16, v16 +; RV64-NEXT: vmfeq.vv v7, v24, v24 +; RV64-NEXT: vmerge.vvm v8, v16, v24, v0 +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vmerge.vvm v16, v24, v16, v0 +; RV64-NEXT: vfmin.vv v8, v16, v8 +; RV64-NEXT: vmfne.vv v16, v8, v8 +; RV64-NEXT: vcpop.m a0, v16 +; RV64-NEXT: beqz a0, .LBB133_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: lui a0, 4095 +; RV64-NEXT: slli a0, a0, 39 +; RV64-NEXT: fmv.d.x fa0, a0 +; RV64-NEXT: j .LBB133_3 +; RV64-NEXT: .LBB133_2: +; RV64-NEXT: vfredmin.vs v8, v8, v8 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: .LBB133_3: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 16 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret %v = load <64 x double>, ptr %x %red = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> %v) ret double %red @@ -2765,21 +2939,38 @@ define float @vreduce_fmaximum_v128f32_nonans(ptr %x) { declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>) define double @vreduce_fmaximum_v2f64(ptr %x) { -; CHECK-LABEL: vreduce_fmaximum_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v9, v8, v8 -; CHECK-NEXT: vcpop.m a0, v9 -; CHECK-NEXT: beqz a0, .LBB151_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: lui a0, %hi(.LCPI151_0) -; CHECK-NEXT: fld fa0, %lo(.LCPI151_0)(a0) -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB151_2: -; CHECK-NEXT: vfredmax.vs v8, v8, v8 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmaximum_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vmfne.vv v9, v8, v8 +; RV32-NEXT: vcpop.m a0, v9 +; RV32-NEXT: beqz a0, .LBB151_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: lui a0, %hi(.LCPI151_0) +; RV32-NEXT: fld fa0, %lo(.LCPI151_0)(a0) +; RV32-NEXT: ret +; RV32-NEXT: .LBB151_2: +; RV32-NEXT: vfredmax.vs v8, v8, v8 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmaximum_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vmfne.vv v9, v8, v8 +; RV64-NEXT: vcpop.m a0, v9 +; RV64-NEXT: beqz a0, .LBB151_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: lui a0, 4095 +; RV64-NEXT: slli a0, a0, 39 +; RV64-NEXT: fmv.d.x fa0, a0 +; RV64-NEXT: ret +; RV64-NEXT: .LBB151_2: +; RV64-NEXT: vfredmax.vs v8, v8, v8 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <2 x double>, ptr %x %red = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %v) ret double %red @@ -2801,21 +2992,38 @@ define double @vreduce_fmaximum_v2f64_nonans(ptr %x) { declare double @llvm.vector.reduce.fmaximum.v4f64(<4 x double>) define double @vreduce_fmaximum_v4f64(ptr %x) { -; CHECK-LABEL: vreduce_fmaximum_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vcpop.m a0, v10 -; CHECK-NEXT: beqz a0, .LBB153_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: lui a0, %hi(.LCPI153_0) -; CHECK-NEXT: fld fa0, %lo(.LCPI153_0)(a0) -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB153_2: -; CHECK-NEXT: vfredmax.vs v8, v8, v8 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmaximum_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vmfne.vv v10, v8, v8 +; RV32-NEXT: vcpop.m a0, v10 +; RV32-NEXT: beqz a0, .LBB153_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: lui a0, %hi(.LCPI153_0) +; RV32-NEXT: fld fa0, %lo(.LCPI153_0)(a0) +; RV32-NEXT: ret +; RV32-NEXT: .LBB153_2: +; RV32-NEXT: vfredmax.vs v8, v8, v8 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmaximum_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vmfne.vv v10, v8, v8 +; RV64-NEXT: vcpop.m a0, v10 +; RV64-NEXT: beqz a0, .LBB153_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: lui a0, 4095 +; RV64-NEXT: slli a0, a0, 39 +; RV64-NEXT: fmv.d.x fa0, a0 +; RV64-NEXT: ret +; RV64-NEXT: .LBB153_2: +; RV64-NEXT: vfredmax.vs v8, v8, v8 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x double>, ptr %x %red = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %v) ret double %red @@ -2837,21 +3045,38 @@ define double @vreduce_fmaximum_v4f64_nonans(ptr %x) { declare double @llvm.vector.reduce.fmaximum.v8f64(<8 x double>) define double @vreduce_fmaximum_v8f64(ptr %x) { -; CHECK-LABEL: vreduce_fmaximum_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vcpop.m a0, v12 -; CHECK-NEXT: beqz a0, .LBB155_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: lui a0, %hi(.LCPI155_0) -; CHECK-NEXT: fld fa0, %lo(.LCPI155_0)(a0) -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB155_2: -; CHECK-NEXT: vfredmax.vs v8, v8, v8 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmaximum_v8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vmfne.vv v12, v8, v8 +; RV32-NEXT: vcpop.m a0, v12 +; RV32-NEXT: beqz a0, .LBB155_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: lui a0, %hi(.LCPI155_0) +; RV32-NEXT: fld fa0, %lo(.LCPI155_0)(a0) +; RV32-NEXT: ret +; RV32-NEXT: .LBB155_2: +; RV32-NEXT: vfredmax.vs v8, v8, v8 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmaximum_v8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vmfne.vv v12, v8, v8 +; RV64-NEXT: vcpop.m a0, v12 +; RV64-NEXT: beqz a0, .LBB155_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: lui a0, 4095 +; RV64-NEXT: slli a0, a0, 39 +; RV64-NEXT: fmv.d.x fa0, a0 +; RV64-NEXT: ret +; RV64-NEXT: .LBB155_2: +; RV64-NEXT: vfredmax.vs v8, v8, v8 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <8 x double>, ptr %x %red = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> %v) ret double %red @@ -2873,21 +3098,38 @@ define double @vreduce_fmaximum_v8f64_nonans(ptr %x) { declare double @llvm.vector.reduce.fmaximum.v16f64(<16 x double>) define double @vreduce_fmaximum_v16f64(ptr %x) { -; CHECK-LABEL: vreduce_fmaximum_v16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 -; CHECK-NEXT: beqz a0, .LBB157_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: lui a0, %hi(.LCPI157_0) -; CHECK-NEXT: fld fa0, %lo(.LCPI157_0)(a0) -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB157_2: -; CHECK-NEXT: vfredmax.vs v8, v8, v8 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmaximum_v16f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vmfne.vv v16, v8, v8 +; RV32-NEXT: vcpop.m a0, v16 +; RV32-NEXT: beqz a0, .LBB157_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: lui a0, %hi(.LCPI157_0) +; RV32-NEXT: fld fa0, %lo(.LCPI157_0)(a0) +; RV32-NEXT: ret +; RV32-NEXT: .LBB157_2: +; RV32-NEXT: vfredmax.vs v8, v8, v8 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmaximum_v16f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vmfne.vv v16, v8, v8 +; RV64-NEXT: vcpop.m a0, v16 +; RV64-NEXT: beqz a0, .LBB157_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: lui a0, 4095 +; RV64-NEXT: slli a0, a0, 39 +; RV64-NEXT: fmv.d.x fa0, a0 +; RV64-NEXT: ret +; RV64-NEXT: .LBB157_2: +; RV64-NEXT: vfredmax.vs v8, v8, v8 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <16 x double>, ptr %x %red = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> %v) ret double %red @@ -2909,29 +3151,54 @@ define double @vreduce_fmaximum_v16f64_nonans(ptr %x) { declare double @llvm.vector.reduce.fmaximum.v32f64(<32 x double>) define double @vreduce_fmaximum_v32f64(ptr %x) { -; CHECK-LABEL: vreduce_fmaximum_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 -; CHECK-NEXT: vfmax.vv v8, v16, v8 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 -; CHECK-NEXT: beqz a0, .LBB159_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: lui a0, %hi(.LCPI159_0) -; CHECK-NEXT: fld fa0, %lo(.LCPI159_0)(a0) -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB159_2: -; CHECK-NEXT: vfredmax.vs v8, v8, v8 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmaximum_v32f64: +; RV32: # %bb.0: +; RV32-NEXT: addi a1, a0, 128 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v16, (a0) +; RV32-NEXT: vle64.v v24, (a1) +; RV32-NEXT: vmfeq.vv v0, v16, v16 +; RV32-NEXT: vmfeq.vv v7, v24, v24 +; RV32-NEXT: vmerge.vvm v8, v16, v24, v0 +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vmerge.vvm v16, v24, v16, v0 +; RV32-NEXT: vfmax.vv v8, v16, v8 +; RV32-NEXT: vmfne.vv v16, v8, v8 +; RV32-NEXT: vcpop.m a0, v16 +; RV32-NEXT: beqz a0, .LBB159_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: lui a0, %hi(.LCPI159_0) +; RV32-NEXT: fld fa0, %lo(.LCPI159_0)(a0) +; RV32-NEXT: ret +; RV32-NEXT: .LBB159_2: +; RV32-NEXT: vfredmax.vs v8, v8, v8 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmaximum_v32f64: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, a0, 128 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: vmfeq.vv v0, v16, v16 +; RV64-NEXT: vmfeq.vv v7, v24, v24 +; RV64-NEXT: vmerge.vvm v8, v16, v24, v0 +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vmerge.vvm v16, v24, v16, v0 +; RV64-NEXT: vfmax.vv v8, v16, v8 +; RV64-NEXT: vmfne.vv v16, v8, v8 +; RV64-NEXT: vcpop.m a0, v16 +; RV64-NEXT: beqz a0, .LBB159_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: lui a0, 4095 +; RV64-NEXT: slli a0, a0, 39 +; RV64-NEXT: fmv.d.x fa0, a0 +; RV64-NEXT: ret +; RV64-NEXT: .LBB159_2: +; RV64-NEXT: vfredmax.vs v8, v8, v8 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <32 x double>, ptr %x %red = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> %v) ret double %red @@ -2956,85 +3223,166 @@ define double @vreduce_fmaximum_v32f64_nonans(ptr %x) { declare double @llvm.vector.reduce.fmaximum.v64f64(<64 x double>) define double @vreduce_fmaximum_v64f64(ptr %x) { -; CHECK-LABEL: vreduce_fmaximum_v64f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a1, a0, 384 -; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v24, v24 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 -; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vfmax.vv v24, v16, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v8, v8 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vfmax.vv v16, v8, v16 -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 -; CHECK-NEXT: vfmax.vv v8, v16, v8 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 -; CHECK-NEXT: beqz a0, .LBB161_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: lui a0, %hi(.LCPI161_0) -; CHECK-NEXT: fld fa0, %lo(.LCPI161_0)(a0) -; CHECK-NEXT: j .LBB161_3 -; CHECK-NEXT: .LBB161_2: -; CHECK-NEXT: vfredmax.vs v8, v8, v8 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: .LBB161_3: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmaximum_v64f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-NEXT: addi a1, a0, 128 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v24, (a1) +; RV32-NEXT: addi a1, a0, 384 +; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: addi a1, a0, 256 +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vmfeq.vv v0, v24, v24 +; RV32-NEXT: vmfeq.vv v7, v16, v16 +; RV32-NEXT: vmerge.vvm v8, v24, v16, v0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vle64.v v8, (a1) +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vmerge.vvm v16, v16, v24, v0 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vfmax.vv v24, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmfeq.vv v0, v16, v16 +; RV32-NEXT: vmfeq.vv v7, v8, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmerge.vvm v16, v16, v8, v0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vfmax.vv v16, v8, v16 +; RV32-NEXT: vmfeq.vv v0, v16, v16 +; RV32-NEXT: vmfeq.vv v7, v24, v24 +; RV32-NEXT: vmerge.vvm v8, v16, v24, v0 +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vmerge.vvm v16, v24, v16, v0 +; RV32-NEXT: vfmax.vv v8, v16, v8 +; RV32-NEXT: vmfne.vv v16, v8, v8 +; RV32-NEXT: vcpop.m a0, v16 +; RV32-NEXT: beqz a0, .LBB161_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: lui a0, %hi(.LCPI161_0) +; RV32-NEXT: fld fa0, %lo(.LCPI161_0)(a0) +; RV32-NEXT: j .LBB161_3 +; RV32-NEXT: .LBB161_2: +; RV32-NEXT: vfredmax.vs v8, v8, v8 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: .LBB161_3: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa sp, 16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmaximum_v64f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: addi a1, a0, 128 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: addi a1, a0, 384 +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vmfeq.vv v0, v24, v24 +; RV64-NEXT: vmfeq.vv v7, v16, v16 +; RV64-NEXT: vmerge.vvm v8, v24, v16, v0 +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vle64.v v8, (a1) +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vfmax.vv v24, v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vmfeq.vv v0, v16, v16 +; RV64-NEXT: vmfeq.vv v7, v8, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vmerge.vvm v16, v16, v8, v0 +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vfmax.vv v16, v8, v16 +; RV64-NEXT: vmfeq.vv v0, v16, v16 +; RV64-NEXT: vmfeq.vv v7, v24, v24 +; RV64-NEXT: vmerge.vvm v8, v16, v24, v0 +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vmerge.vvm v16, v24, v16, v0 +; RV64-NEXT: vfmax.vv v8, v16, v8 +; RV64-NEXT: vmfne.vv v16, v8, v8 +; RV64-NEXT: vcpop.m a0, v16 +; RV64-NEXT: beqz a0, .LBB161_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: lui a0, 4095 +; RV64-NEXT: slli a0, a0, 39 +; RV64-NEXT: fmv.d.x fa0, a0 +; RV64-NEXT: j .LBB161_3 +; RV64-NEXT: .LBB161_2: +; RV64-NEXT: vfredmax.vs v8, v8, v8 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: .LBB161_3: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 16 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret %v = load <64 x double>, ptr %x %red = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> %v) ret double %red diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll index 35cd789acfcc8..97cf7e6902e32 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll @@ -1,18 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s declare <2 x half> @llvm.vp.rint.v2f16(<2 x half>, <2 x i1>, i32) define <2 x half> @vp_rint_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma @@ -28,10 +29,11 @@ define <2 x half> @vp_rint_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) define <2 x half> @vp_rint_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v2f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -47,10 +49,11 @@ declare <4 x half> @llvm.vp.rint.v4f16(<4 x half>, <4 x i1>, i32) define <4 x half> @vp_rint_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma @@ -66,10 +69,11 @@ define <4 x half> @vp_rint_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) define <4 x half> @vp_rint_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v4f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -85,10 +89,11 @@ declare <8 x half> @llvm.vp.rint.v8f16(<8 x half>, <8 x i1>, i32) define <8 x half> @vp_rint_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -104,10 +109,11 @@ define <8 x half> @vp_rint_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) define <8 x half> @vp_rint_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v8f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -125,9 +131,10 @@ define <16 x half> @vp_rint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI6_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 @@ -144,10 +151,11 @@ define <16 x half> @vp_rint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e define <16 x half> @vp_rint_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v16f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI7_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -317,37 +325,67 @@ define <16 x float> @vp_rint_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) declare <2 x double> @llvm.vp.rint.v2f64(<2 x double>, <2 x i1>, i32) define <2 x double> @vp_rint_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI16_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: vp_rint_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, %hi(.LCPI16_0) +; RV32-NEXT: fld fa5, %lo(.LCPI16_0)(a1) +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vp_rint_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8, v0.t +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: ret %v = call <2 x double> @llvm.vp.rint.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) ret <2 x double> %v } define <2 x double> @vp_rint_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_v2f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: vp_rint_v2f64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, %hi(.LCPI17_0) +; RV32-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vp_rint_v2f64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: ret %v = call <2 x double> @llvm.vp.rint.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -355,39 +393,71 @@ define <2 x double> @vp_rint_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) declare <4 x double> @llvm.vp.rint.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_rint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: vp_rint_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: vmv1r.v v10, v0 +; RV32-NEXT: lui a0, %hi(.LCPI18_0) +; RV32-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32-NEXT: vfabs.v v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vp_rint_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64-NEXT: vmv1r.v v10, v0 +; RV64-NEXT: vfabs.v v12, v8, v0.t +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: ret %v = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x double> %v } define <4 x double> @vp_rint_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_v4f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: vp_rint_v4f64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, %hi(.LCPI19_0) +; RV32-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vp_rint_v4f64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: ret %v = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -395,39 +465,71 @@ define <4 x double> @vp_rint_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) declare <8 x double> @llvm.vp.rint.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_rint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: vp_rint_v8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vmv1r.v v12, v0 +; RV32-NEXT: lui a0, %hi(.LCPI20_0) +; RV32-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32-NEXT: vfabs.v v16, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vp_rint_v8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64-NEXT: vmv1r.v v12, v0 +; RV64-NEXT: vfabs.v v16, v8, v0.t +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64-NEXT: ret %v = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) ret <8 x double> %v } define <8 x double> @vp_rint_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_v8f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI21_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: vp_rint_v8f64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, %hi(.LCPI21_0) +; RV32-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vp_rint_v8f64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: ret %v = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -435,39 +537,71 @@ define <8 x double> @vp_rint_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) declare <15 x double> @llvm.vp.rint.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_rint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_v15f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: vp_rint_v15f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v16, v0 +; RV32-NEXT: lui a0, %hi(.LCPI22_0) +; RV32-NEXT: fld fa5, %lo(.LCPI22_0)(a0) +; RV32-NEXT: vfabs.v v24, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vp_rint_v15f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vmv1r.v v16, v0 +; RV64-NEXT: vfabs.v v24, v8, v0.t +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64-NEXT: ret %v = call <15 x double> @llvm.vp.rint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v } define <15 x double> @vp_rint_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_v15f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: vp_rint_v15f64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, %hi(.LCPI23_0) +; RV32-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vfabs.v v16, v8 +; RV32-NEXT: vmflt.vf v0, v16, fa5 +; RV32-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vp_rint_v15f64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vfabs.v v16, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v16, fa5 +; RV64-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64-NEXT: ret %v = call <15 x double> @llvm.vp.rint.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -475,39 +609,71 @@ define <15 x double> @vp_rint_v15f64_unmasked(<15 x double> %va, i32 zeroext %ev declare <16 x double> @llvm.vp.rint.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_rint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_v16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: vp_rint_v16f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v16, v0 +; RV32-NEXT: lui a0, %hi(.LCPI24_0) +; RV32-NEXT: fld fa5, %lo(.LCPI24_0)(a0) +; RV32-NEXT: vfabs.v v24, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vp_rint_v16f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vmv1r.v v16, v0 +; RV64-NEXT: vfabs.v v24, v8, v0.t +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64-NEXT: ret %v = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v } define <16 x double> @vp_rint_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_v16f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: vp_rint_v16f64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, %hi(.LCPI25_0) +; RV32-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vfabs.v v16, v8 +; RV32-NEXT: vmflt.vf v0, v16, fa5 +; RV32-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vp_rint_v16f64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vfabs.v v16, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v16, fa5 +; RV64-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64-NEXT: ret %v = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -515,83 +681,159 @@ define <16 x double> @vp_rint_v16f64_unmasked(<16 x double> %va, i32 zeroext %ev declare <32 x double> @llvm.vp.rint.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v6, v0 -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: vp_rint_v32f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v6, v0 +; RV32-NEXT: li a2, 16 +; RV32-NEXT: vslidedown.vi v7, v0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB26_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB26_2: +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vfabs.v v24, v8, v0.t +; RV32-NEXT: lui a1, %hi(.LCPI26_0) +; RV32-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vfabs.v v24, v16, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vp_rint_v32f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v6, v0 +; RV64-NEXT: li a2, 16 +; RV64-NEXT: vslidedown.vi v7, v0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB26_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a1, 16 +; RV64-NEXT: .LBB26_2: +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vfabs.v v24, v8, v0.t +; RV64-NEXT: li a1, 1075 +; RV64-NEXT: slli a1, a1, 52 +; RV64-NEXT: fmv.d.x fa5, a1 +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vfabs.v v24, v16, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64-NEXT: ret %v = call <32 x double> @llvm.vp.rint.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } define <32 x double> @vp_rint_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_v32f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: sltu a0, a0, a2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v7, v24, fa5 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: vp_rint_v32f64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: li a2, 16 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB27_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB27_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vfabs.v v24, v8 +; RV32-NEXT: lui a2, %hi(.LCPI27_0) +; RV32-NEXT: fld fa5, %lo(.LCPI27_0)(a2) +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: sltu a0, a0, a2 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: vmflt.vf v0, v24, fa5 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vfabs.v v24, v16 +; RV32-NEXT: vmflt.vf v7, v24, fa5 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vp_rint_v32f64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB27_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a1, 16 +; RV64-NEXT: .LBB27_2: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vfabs.v v24, v8 +; RV64-NEXT: li a2, 1075 +; RV64-NEXT: slli a2, a2, 52 +; RV64-NEXT: fmv.d.x fa5, a2 +; RV64-NEXT: addi a2, a0, -16 +; RV64-NEXT: sltu a0, a0, a2 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: vmflt.vf v0, v24, fa5 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vfabs.v v24, v16 +; RV64-NEXT: vmflt.vf v7, v24, fa5 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64-NEXT: ret %v = call <32 x double> @llvm.vp.rint.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll index d8ff7062f033e..16c8b2b9da682 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll @@ -1,22 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN declare <2 x half> @llvm.vp.round.v2f16(<2 x half>, <2 x i1>, i32) define <2 x half> @vp_round_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 4 @@ -59,10 +60,11 @@ define <2 x half> @vp_round_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) define <2 x half> @vp_round_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_v2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -99,10 +101,11 @@ declare <4 x half> @llvm.vp.round.v4f16(<4 x half>, <4 x i1>, i32) define <4 x half> @vp_round_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 4 @@ -145,10 +148,11 @@ define <4 x half> @vp_round_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) define <4 x half> @vp_round_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_v4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -185,10 +189,11 @@ declare <8 x half> @llvm.vp.round.v8f16(<8 x half>, <8 x i1>, i32) define <8 x half> @vp_round_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 4 @@ -231,10 +236,11 @@ define <8 x half> @vp_round_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) define <8 x half> @vp_round_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_v8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -273,9 +279,10 @@ define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 4 @@ -319,10 +326,11 @@ define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % define <16 x half> @vp_round_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_v16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -529,41 +537,141 @@ define <16 x float> @vp_round_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl declare <2 x double> @llvm.vp.round.v2f64(<2 x double>, <2 x i1>, i32) define <2 x double> @vp_round_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI16_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_v2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI16_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI16_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_v2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_v2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI16_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI16_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_v2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.round.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) ret <2 x double> %v } define <2 x double> @vp_round_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_v2f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_v2f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI17_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_v2f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_v2f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI17_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_v2f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.round.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -571,43 +679,149 @@ define <2 x double> @vp_round_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) declare <4 x double> @llvm.vp.round.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_round_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_v4f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v10, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vmv1r.v v0, v10 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_v4f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v10, v0 +; RV64ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vmv1r.v v0, v10 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_v4f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_v4f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.round.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x double> %v } define <4 x double> @vp_round_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_v4f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_v4f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI19_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfabs.v v10, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_v4f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfabs.v v10, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_v4f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI19_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_v4f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.round.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -615,43 +829,149 @@ define <4 x double> @vp_round_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) declare <8 x double> @llvm.vp.round.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_round_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_v8f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v12, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vmv1r.v v0, v12 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_v8f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v12, v0 +; RV64ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vmv1r.v v0, v12 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_v8f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_v8f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.round.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) ret <8 x double> %v } define <8 x double> @vp_round_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_v8f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI21_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_v8f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI21_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfabs.v v12, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_v8f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfabs.v v12, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_v8f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI21_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_v8f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.round.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -659,43 +979,149 @@ define <8 x double> @vp_round_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) declare <15 x double> @llvm.vp.round.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_round_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_v15f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_v15f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI22_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI22_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_v15f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_v15f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI22_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI22_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_v15f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.round.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v } define <15 x double> @vp_round_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_v15f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_v15f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI23_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_v15f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_v15f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI23_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_v15f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.round.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -703,43 +1129,149 @@ define <15 x double> @vp_round_v15f64_unmasked(<15 x double> %va, i32 zeroext %e declare <16 x double> @llvm.vp.round.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_round_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_v16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_v16f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI24_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI24_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_v16f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_v16f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI24_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI24_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_v16f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.round.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v } define <16 x double> @vp_round_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_v16f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_v16f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI25_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_v16f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_v16f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI25_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_v16f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.round.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -747,91 +1279,341 @@ define <16 x double> @vp_round_v16f64_unmasked(<16 x double> %va, i32 zeroext %e declare <32 x double> @llvm.vp.round.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v6, v0 -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a1, 4 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_v32f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v6, v0 +; RV32ZVFH-NEXT: li a2, 16 +; RV32ZVFH-NEXT: vslidedown.vi v7, v0, 2 +; RV32ZVFH-NEXT: mv a1, a0 +; RV32ZVFH-NEXT: bltu a0, a2, .LBB26_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: li a1, 16 +; RV32ZVFH-NEXT: .LBB26_2: +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI26_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; RV32ZVFH-NEXT: addi a1, a0, -16 +; RV32ZVFH-NEXT: sltu a0, a0, a1 +; RV32ZVFH-NEXT: addi a0, a0, -1 +; RV32ZVFH-NEXT: and a0, a0, a1 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a1, 4 +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a1 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_v32f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v6, v0 +; RV64ZVFH-NEXT: li a2, 16 +; RV64ZVFH-NEXT: vslidedown.vi v7, v0, 2 +; RV64ZVFH-NEXT: mv a1, a0 +; RV64ZVFH-NEXT: bltu a0, a2, .LBB26_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: li a1, 16 +; RV64ZVFH-NEXT: .LBB26_2: +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a1, 1075 +; RV64ZVFH-NEXT: slli a1, a1, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a1 +; RV64ZVFH-NEXT: addi a1, a0, -16 +; RV64ZVFH-NEXT: sltu a0, a0, a1 +; RV64ZVFH-NEXT: addi a0, a0, -1 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFH-NEXT: and a0, a0, a1 +; RV64ZVFH-NEXT: fsrmi a1, 4 +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a1 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_v32f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v6, v0 +; RV32ZVFHMIN-NEXT: li a2, 16 +; RV32ZVFHMIN-NEXT: vslidedown.vi v7, v0, 2 +; RV32ZVFHMIN-NEXT: mv a1, a0 +; RV32ZVFHMIN-NEXT: bltu a0, a2, .LBB26_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: li a1, 16 +; RV32ZVFHMIN-NEXT: .LBB26_2: +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI26_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; RV32ZVFHMIN-NEXT: addi a1, a0, -16 +; RV32ZVFHMIN-NEXT: sltu a0, a0, a1 +; RV32ZVFHMIN-NEXT: addi a0, a0, -1 +; RV32ZVFHMIN-NEXT: and a0, a0, a1 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a1, 4 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a1 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_v32f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v6, v0 +; RV64ZVFHMIN-NEXT: li a2, 16 +; RV64ZVFHMIN-NEXT: vslidedown.vi v7, v0, 2 +; RV64ZVFHMIN-NEXT: mv a1, a0 +; RV64ZVFHMIN-NEXT: bltu a0, a2, .LBB26_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: li a1, 16 +; RV64ZVFHMIN-NEXT: .LBB26_2: +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a1, 1075 +; RV64ZVFHMIN-NEXT: slli a1, a1, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a1 +; RV64ZVFHMIN-NEXT: addi a1, a0, -16 +; RV64ZVFHMIN-NEXT: sltu a0, a0, a1 +; RV64ZVFHMIN-NEXT: addi a0, a0, -1 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: and a0, a0, a1 +; RV64ZVFHMIN-NEXT: fsrmi a1, 4 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a1 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.round.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } define <32 x double> @vp_round_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_v32f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: sltu a0, a0, a2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: fsrmi a2, 4 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v7, v24, fa5 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsrmi a1, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_v32f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: li a2, 16 +; RV32ZVFH-NEXT: mv a1, a0 +; RV32ZVFH-NEXT: bltu a0, a2, .LBB27_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: li a1, 16 +; RV32ZVFH-NEXT: .LBB27_2: +; RV32ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8 +; RV32ZVFH-NEXT: lui a2, %hi(.LCPI27_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI27_0)(a2) +; RV32ZVFH-NEXT: addi a2, a0, -16 +; RV32ZVFH-NEXT: sltu a0, a0, a2 +; RV32ZVFH-NEXT: addi a0, a0, -1 +; RV32ZVFH-NEXT: and a0, a0, a2 +; RV32ZVFH-NEXT: fsrmi a2, 4 +; RV32ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16 +; RV32ZVFH-NEXT: vmflt.vf v7, v24, fa5 +; RV32ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a2 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: fsrmi a1, 4 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: fsrm a1 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_v32f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: li a2, 16 +; RV64ZVFH-NEXT: mv a1, a0 +; RV64ZVFH-NEXT: bltu a0, a2, .LBB27_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: li a1, 16 +; RV64ZVFH-NEXT: .LBB27_2: +; RV64ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8 +; RV64ZVFH-NEXT: li a2, 1075 +; RV64ZVFH-NEXT: slli a2, a2, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a2 +; RV64ZVFH-NEXT: addi a2, a0, -16 +; RV64ZVFH-NEXT: sltu a0, a0, a2 +; RV64ZVFH-NEXT: addi a0, a0, -1 +; RV64ZVFH-NEXT: and a0, a0, a2 +; RV64ZVFH-NEXT: fsrmi a2, 4 +; RV64ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16 +; RV64ZVFH-NEXT: vmflt.vf v7, v24, fa5 +; RV64ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a2 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: fsrmi a1, 4 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: fsrm a1 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_v32f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: li a2, 16 +; RV32ZVFHMIN-NEXT: mv a1, a0 +; RV32ZVFHMIN-NEXT: bltu a0, a2, .LBB27_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: li a1, 16 +; RV32ZVFHMIN-NEXT: .LBB27_2: +; RV32ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV32ZVFHMIN-NEXT: lui a2, %hi(.LCPI27_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI27_0)(a2) +; RV32ZVFHMIN-NEXT: addi a2, a0, -16 +; RV32ZVFHMIN-NEXT: sltu a0, a0, a2 +; RV32ZVFHMIN-NEXT: addi a0, a0, -1 +; RV32ZVFHMIN-NEXT: and a0, a0, a2 +; RV32ZVFHMIN-NEXT: fsrmi a2, 4 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV32ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5 +; RV32ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a2 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a1, 4 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: fsrm a1 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_v32f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: li a2, 16 +; RV64ZVFHMIN-NEXT: mv a1, a0 +; RV64ZVFHMIN-NEXT: bltu a0, a2, .LBB27_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: li a1, 16 +; RV64ZVFHMIN-NEXT: .LBB27_2: +; RV64ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV64ZVFHMIN-NEXT: li a2, 1075 +; RV64ZVFHMIN-NEXT: slli a2, a2, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a2 +; RV64ZVFHMIN-NEXT: addi a2, a0, -16 +; RV64ZVFHMIN-NEXT: sltu a0, a0, a2 +; RV64ZVFHMIN-NEXT: addi a0, a0, -1 +; RV64ZVFHMIN-NEXT: and a0, a0, a2 +; RV64ZVFHMIN-NEXT: fsrmi a2, 4 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV64ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5 +; RV64ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a2 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a1, 4 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: fsrm a1 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.round.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll index 2649f234375d2..14c550d555cf7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll @@ -1,22 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN declare <2 x half> @llvm.vp.roundeven.v2f16(<2 x half>, <2 x i1>, i32) define <2 x half> @vp_roundeven_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 0 @@ -59,10 +60,11 @@ define <2 x half> @vp_roundeven_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext % define <2 x half> @vp_roundeven_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_v2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -99,10 +101,11 @@ declare <4 x half> @llvm.vp.roundeven.v4f16(<4 x half>, <4 x i1>, i32) define <4 x half> @vp_roundeven_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 0 @@ -145,10 +148,11 @@ define <4 x half> @vp_roundeven_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext % define <4 x half> @vp_roundeven_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_v4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -185,10 +189,11 @@ declare <8 x half> @llvm.vp.roundeven.v8f16(<8 x half>, <8 x i1>, i32) define <8 x half> @vp_roundeven_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 0 @@ -231,10 +236,11 @@ define <8 x half> @vp_roundeven_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext % define <8 x half> @vp_roundeven_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_v8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -273,9 +279,10 @@ define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 0 @@ -319,10 +326,11 @@ define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe define <16 x half> @vp_roundeven_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_v16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -529,41 +537,141 @@ define <16 x float> @vp_roundeven_v16f32_unmasked(<16 x float> %va, i32 zeroext declare <2 x double> @llvm.vp.roundeven.v2f64(<2 x double>, <2 x i1>, i32) define <2 x double> @vp_roundeven_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI16_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_v2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI16_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI16_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_v2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_v2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI16_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI16_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_v2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) ret <2 x double> %v } define <2 x double> @vp_roundeven_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_v2f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_v2f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI17_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_v2f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_v2f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI17_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_v2f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -571,43 +679,149 @@ define <2 x double> @vp_roundeven_v2f64_unmasked(<2 x double> %va, i32 zeroext % declare <4 x double> @llvm.vp.roundeven.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_roundeven_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_v4f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v10, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vmv1r.v v0, v10 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_v4f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v10, v0 +; RV64ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vmv1r.v v0, v10 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_v4f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_v4f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x double> %v } define <4 x double> @vp_roundeven_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_v4f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_v4f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI19_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfabs.v v10, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_v4f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfabs.v v10, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_v4f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI19_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_v4f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -615,43 +829,149 @@ define <4 x double> @vp_roundeven_v4f64_unmasked(<4 x double> %va, i32 zeroext % declare <8 x double> @llvm.vp.roundeven.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_roundeven_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_v8f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v12, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vmv1r.v v0, v12 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_v8f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v12, v0 +; RV64ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vmv1r.v v0, v12 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_v8f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_v8f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) ret <8 x double> %v } define <8 x double> @vp_roundeven_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_v8f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI21_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_v8f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI21_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfabs.v v12, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_v8f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfabs.v v12, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_v8f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI21_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_v8f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -659,43 +979,149 @@ define <8 x double> @vp_roundeven_v8f64_unmasked(<8 x double> %va, i32 zeroext % declare <15 x double> @llvm.vp.roundeven.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_roundeven_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_v15f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_v15f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI22_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI22_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_v15f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_v15f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI22_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI22_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_v15f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.roundeven.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v } define <15 x double> @vp_roundeven_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_v15f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_v15f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI23_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_v15f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_v15f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI23_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_v15f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.roundeven.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -703,43 +1129,149 @@ define <15 x double> @vp_roundeven_v15f64_unmasked(<15 x double> %va, i32 zeroex declare <16 x double> @llvm.vp.roundeven.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_roundeven_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_v16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_v16f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI24_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI24_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_v16f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_v16f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI24_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI24_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_v16f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v } define <16 x double> @vp_roundeven_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_v16f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_v16f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI25_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_v16f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_v16f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI25_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_v16f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -747,91 +1279,341 @@ define <16 x double> @vp_roundeven_v16f64_unmasked(<16 x double> %va, i32 zeroex declare <32 x double> @llvm.vp.roundeven.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v6, v0 -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_v32f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v6, v0 +; RV32ZVFH-NEXT: li a2, 16 +; RV32ZVFH-NEXT: vslidedown.vi v7, v0, 2 +; RV32ZVFH-NEXT: mv a1, a0 +; RV32ZVFH-NEXT: bltu a0, a2, .LBB26_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: li a1, 16 +; RV32ZVFH-NEXT: .LBB26_2: +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI26_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; RV32ZVFH-NEXT: addi a1, a0, -16 +; RV32ZVFH-NEXT: sltu a0, a0, a1 +; RV32ZVFH-NEXT: addi a0, a0, -1 +; RV32ZVFH-NEXT: and a0, a0, a1 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a1, 0 +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a1 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_v32f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v6, v0 +; RV64ZVFH-NEXT: li a2, 16 +; RV64ZVFH-NEXT: vslidedown.vi v7, v0, 2 +; RV64ZVFH-NEXT: mv a1, a0 +; RV64ZVFH-NEXT: bltu a0, a2, .LBB26_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: li a1, 16 +; RV64ZVFH-NEXT: .LBB26_2: +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a1, 1075 +; RV64ZVFH-NEXT: slli a1, a1, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a1 +; RV64ZVFH-NEXT: addi a1, a0, -16 +; RV64ZVFH-NEXT: sltu a0, a0, a1 +; RV64ZVFH-NEXT: addi a0, a0, -1 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFH-NEXT: and a0, a0, a1 +; RV64ZVFH-NEXT: fsrmi a1, 0 +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a1 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_v32f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v6, v0 +; RV32ZVFHMIN-NEXT: li a2, 16 +; RV32ZVFHMIN-NEXT: vslidedown.vi v7, v0, 2 +; RV32ZVFHMIN-NEXT: mv a1, a0 +; RV32ZVFHMIN-NEXT: bltu a0, a2, .LBB26_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: li a1, 16 +; RV32ZVFHMIN-NEXT: .LBB26_2: +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI26_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; RV32ZVFHMIN-NEXT: addi a1, a0, -16 +; RV32ZVFHMIN-NEXT: sltu a0, a0, a1 +; RV32ZVFHMIN-NEXT: addi a0, a0, -1 +; RV32ZVFHMIN-NEXT: and a0, a0, a1 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a1, 0 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a1 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_v32f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v6, v0 +; RV64ZVFHMIN-NEXT: li a2, 16 +; RV64ZVFHMIN-NEXT: vslidedown.vi v7, v0, 2 +; RV64ZVFHMIN-NEXT: mv a1, a0 +; RV64ZVFHMIN-NEXT: bltu a0, a2, .LBB26_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: li a1, 16 +; RV64ZVFHMIN-NEXT: .LBB26_2: +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a1, 1075 +; RV64ZVFHMIN-NEXT: slli a1, a1, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a1 +; RV64ZVFHMIN-NEXT: addi a1, a0, -16 +; RV64ZVFHMIN-NEXT: sltu a0, a0, a1 +; RV64ZVFHMIN-NEXT: addi a0, a0, -1 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: and a0, a0, a1 +; RV64ZVFHMIN-NEXT: fsrmi a1, 0 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a1 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.roundeven.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } define <32 x double> @vp_roundeven_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_v32f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: sltu a0, a0, a2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: fsrmi a2, 0 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v7, v24, fa5 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_v32f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: li a2, 16 +; RV32ZVFH-NEXT: mv a1, a0 +; RV32ZVFH-NEXT: bltu a0, a2, .LBB27_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: li a1, 16 +; RV32ZVFH-NEXT: .LBB27_2: +; RV32ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8 +; RV32ZVFH-NEXT: lui a2, %hi(.LCPI27_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI27_0)(a2) +; RV32ZVFH-NEXT: addi a2, a0, -16 +; RV32ZVFH-NEXT: sltu a0, a0, a2 +; RV32ZVFH-NEXT: addi a0, a0, -1 +; RV32ZVFH-NEXT: and a0, a0, a2 +; RV32ZVFH-NEXT: fsrmi a2, 0 +; RV32ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16 +; RV32ZVFH-NEXT: vmflt.vf v7, v24, fa5 +; RV32ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a2 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: fsrmi a1, 0 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: fsrm a1 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_v32f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: li a2, 16 +; RV64ZVFH-NEXT: mv a1, a0 +; RV64ZVFH-NEXT: bltu a0, a2, .LBB27_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: li a1, 16 +; RV64ZVFH-NEXT: .LBB27_2: +; RV64ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8 +; RV64ZVFH-NEXT: li a2, 1075 +; RV64ZVFH-NEXT: slli a2, a2, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a2 +; RV64ZVFH-NEXT: addi a2, a0, -16 +; RV64ZVFH-NEXT: sltu a0, a0, a2 +; RV64ZVFH-NEXT: addi a0, a0, -1 +; RV64ZVFH-NEXT: and a0, a0, a2 +; RV64ZVFH-NEXT: fsrmi a2, 0 +; RV64ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16 +; RV64ZVFH-NEXT: vmflt.vf v7, v24, fa5 +; RV64ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a2 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: fsrmi a1, 0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: fsrm a1 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_v32f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: li a2, 16 +; RV32ZVFHMIN-NEXT: mv a1, a0 +; RV32ZVFHMIN-NEXT: bltu a0, a2, .LBB27_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: li a1, 16 +; RV32ZVFHMIN-NEXT: .LBB27_2: +; RV32ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV32ZVFHMIN-NEXT: lui a2, %hi(.LCPI27_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI27_0)(a2) +; RV32ZVFHMIN-NEXT: addi a2, a0, -16 +; RV32ZVFHMIN-NEXT: sltu a0, a0, a2 +; RV32ZVFHMIN-NEXT: addi a0, a0, -1 +; RV32ZVFHMIN-NEXT: and a0, a0, a2 +; RV32ZVFHMIN-NEXT: fsrmi a2, 0 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV32ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5 +; RV32ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a2 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a1, 0 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: fsrm a1 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_v32f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: li a2, 16 +; RV64ZVFHMIN-NEXT: mv a1, a0 +; RV64ZVFHMIN-NEXT: bltu a0, a2, .LBB27_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: li a1, 16 +; RV64ZVFHMIN-NEXT: .LBB27_2: +; RV64ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV64ZVFHMIN-NEXT: li a2, 1075 +; RV64ZVFHMIN-NEXT: slli a2, a2, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a2 +; RV64ZVFHMIN-NEXT: addi a2, a0, -16 +; RV64ZVFHMIN-NEXT: sltu a0, a0, a2 +; RV64ZVFHMIN-NEXT: addi a0, a0, -1 +; RV64ZVFHMIN-NEXT: and a0, a0, a2 +; RV64ZVFHMIN-NEXT: fsrmi a2, 0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV64ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5 +; RV64ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a2 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a1, 0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: fsrm a1 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.roundeven.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll index 50e65b62e7848..16f04f14721d0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll @@ -1,22 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN declare <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half>, <2 x i1>, i32) define <2 x half> @vp_roundtozero_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 1 @@ -59,10 +60,11 @@ define <2 x half> @vp_roundtozero_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext define <2 x half> @vp_roundtozero_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_v2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -99,10 +101,11 @@ declare <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half>, <4 x i1>, i32) define <4 x half> @vp_roundtozero_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 1 @@ -145,10 +148,11 @@ define <4 x half> @vp_roundtozero_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext define <4 x half> @vp_roundtozero_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_v4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -185,10 +189,11 @@ declare <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half>, <8 x i1>, i32) define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 1 @@ -231,10 +236,11 @@ define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext define <8 x half> @vp_roundtozero_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_v8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -273,9 +279,10 @@ define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zer ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 1 @@ -319,10 +326,11 @@ define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zer define <16 x half> @vp_roundtozero_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_v16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -529,41 +537,141 @@ define <16 x float> @vp_roundtozero_v16f32_unmasked(<16 x float> %va, i32 zeroex declare <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double>, <2 x i1>, i32) define <2 x double> @vp_roundtozero_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI16_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_v2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI16_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI16_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_v2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_v2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI16_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI16_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_v2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) ret <2 x double> %v } define <2 x double> @vp_roundtozero_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_v2f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_v2f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI17_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_v2f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_v2f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI17_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_v2f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -571,43 +679,149 @@ define <2 x double> @vp_roundtozero_v2f64_unmasked(<2 x double> %va, i32 zeroext declare <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_roundtozero_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_v4f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v10, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vmv1r.v v0, v10 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_v4f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v10, v0 +; RV64ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vmv1r.v v0, v10 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_v4f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_v4f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x double> %v } define <4 x double> @vp_roundtozero_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_v4f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_v4f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI19_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfabs.v v10, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_v4f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfabs.v v10, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_v4f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI19_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_v4f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -615,43 +829,149 @@ define <4 x double> @vp_roundtozero_v4f64_unmasked(<4 x double> %va, i32 zeroext declare <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_roundtozero_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_v8f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v12, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vmv1r.v v0, v12 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_v8f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v12, v0 +; RV64ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vmv1r.v v0, v12 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_v8f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_v8f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) ret <8 x double> %v } define <8 x double> @vp_roundtozero_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_v8f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI21_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_v8f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI21_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfabs.v v12, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_v8f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfabs.v v12, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_v8f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI21_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_v8f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -659,43 +979,149 @@ define <8 x double> @vp_roundtozero_v8f64_unmasked(<8 x double> %va, i32 zeroext declare <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_roundtozero_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_v15f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_v15f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI22_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI22_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_v15f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_v15f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI22_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI22_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_v15f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v } define <15 x double> @vp_roundtozero_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_v15f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_v15f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI23_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_v15f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_v15f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI23_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_v15f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -703,43 +1129,149 @@ define <15 x double> @vp_roundtozero_v15f64_unmasked(<15 x double> %va, i32 zero declare <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_roundtozero_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_v16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_v16f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI24_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI24_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_v16f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_v16f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI24_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI24_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_v16f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v } define <16 x double> @vp_roundtozero_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_v16f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_v16f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI25_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_v16f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_v16f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI25_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_v16f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -747,91 +1279,341 @@ define <16 x double> @vp_roundtozero_v16f64_unmasked(<16 x double> %va, i32 zero declare <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v6, v0 -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a1, 1 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_v32f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v6, v0 +; RV32ZVFH-NEXT: li a2, 16 +; RV32ZVFH-NEXT: vslidedown.vi v7, v0, 2 +; RV32ZVFH-NEXT: mv a1, a0 +; RV32ZVFH-NEXT: bltu a0, a2, .LBB26_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: li a1, 16 +; RV32ZVFH-NEXT: .LBB26_2: +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI26_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; RV32ZVFH-NEXT: addi a1, a0, -16 +; RV32ZVFH-NEXT: sltu a0, a0, a1 +; RV32ZVFH-NEXT: addi a0, a0, -1 +; RV32ZVFH-NEXT: and a0, a0, a1 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a1, 1 +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a1 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_v32f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v6, v0 +; RV64ZVFH-NEXT: li a2, 16 +; RV64ZVFH-NEXT: vslidedown.vi v7, v0, 2 +; RV64ZVFH-NEXT: mv a1, a0 +; RV64ZVFH-NEXT: bltu a0, a2, .LBB26_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: li a1, 16 +; RV64ZVFH-NEXT: .LBB26_2: +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a1, 1075 +; RV64ZVFH-NEXT: slli a1, a1, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a1 +; RV64ZVFH-NEXT: addi a1, a0, -16 +; RV64ZVFH-NEXT: sltu a0, a0, a1 +; RV64ZVFH-NEXT: addi a0, a0, -1 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFH-NEXT: and a0, a0, a1 +; RV64ZVFH-NEXT: fsrmi a1, 1 +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a1 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_v32f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v6, v0 +; RV32ZVFHMIN-NEXT: li a2, 16 +; RV32ZVFHMIN-NEXT: vslidedown.vi v7, v0, 2 +; RV32ZVFHMIN-NEXT: mv a1, a0 +; RV32ZVFHMIN-NEXT: bltu a0, a2, .LBB26_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: li a1, 16 +; RV32ZVFHMIN-NEXT: .LBB26_2: +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI26_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; RV32ZVFHMIN-NEXT: addi a1, a0, -16 +; RV32ZVFHMIN-NEXT: sltu a0, a0, a1 +; RV32ZVFHMIN-NEXT: addi a0, a0, -1 +; RV32ZVFHMIN-NEXT: and a0, a0, a1 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a1, 1 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a1 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_v32f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v6, v0 +; RV64ZVFHMIN-NEXT: li a2, 16 +; RV64ZVFHMIN-NEXT: vslidedown.vi v7, v0, 2 +; RV64ZVFHMIN-NEXT: mv a1, a0 +; RV64ZVFHMIN-NEXT: bltu a0, a2, .LBB26_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: li a1, 16 +; RV64ZVFHMIN-NEXT: .LBB26_2: +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a1, 1075 +; RV64ZVFHMIN-NEXT: slli a1, a1, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a1 +; RV64ZVFHMIN-NEXT: addi a1, a0, -16 +; RV64ZVFHMIN-NEXT: sltu a0, a0, a1 +; RV64ZVFHMIN-NEXT: addi a0, a0, -1 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: and a0, a0, a1 +; RV64ZVFHMIN-NEXT: fsrmi a1, 1 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a1 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } define <32 x double> @vp_roundtozero_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_v32f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: sltu a0, a0, a2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: fsrmi a2, 1 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v7, v24, fa5 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsrmi a1, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_v32f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: li a2, 16 +; RV32ZVFH-NEXT: mv a1, a0 +; RV32ZVFH-NEXT: bltu a0, a2, .LBB27_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: li a1, 16 +; RV32ZVFH-NEXT: .LBB27_2: +; RV32ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8 +; RV32ZVFH-NEXT: lui a2, %hi(.LCPI27_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI27_0)(a2) +; RV32ZVFH-NEXT: addi a2, a0, -16 +; RV32ZVFH-NEXT: sltu a0, a0, a2 +; RV32ZVFH-NEXT: addi a0, a0, -1 +; RV32ZVFH-NEXT: and a0, a0, a2 +; RV32ZVFH-NEXT: fsrmi a2, 1 +; RV32ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16 +; RV32ZVFH-NEXT: vmflt.vf v7, v24, fa5 +; RV32ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a2 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: fsrmi a1, 1 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: fsrm a1 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_v32f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: li a2, 16 +; RV64ZVFH-NEXT: mv a1, a0 +; RV64ZVFH-NEXT: bltu a0, a2, .LBB27_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: li a1, 16 +; RV64ZVFH-NEXT: .LBB27_2: +; RV64ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8 +; RV64ZVFH-NEXT: li a2, 1075 +; RV64ZVFH-NEXT: slli a2, a2, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a2 +; RV64ZVFH-NEXT: addi a2, a0, -16 +; RV64ZVFH-NEXT: sltu a0, a0, a2 +; RV64ZVFH-NEXT: addi a0, a0, -1 +; RV64ZVFH-NEXT: and a0, a0, a2 +; RV64ZVFH-NEXT: fsrmi a2, 1 +; RV64ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16 +; RV64ZVFH-NEXT: vmflt.vf v7, v24, fa5 +; RV64ZVFH-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a2 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: fsrmi a1, 1 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: fsrm a1 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_v32f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: li a2, 16 +; RV32ZVFHMIN-NEXT: mv a1, a0 +; RV32ZVFHMIN-NEXT: bltu a0, a2, .LBB27_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: li a1, 16 +; RV32ZVFHMIN-NEXT: .LBB27_2: +; RV32ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV32ZVFHMIN-NEXT: lui a2, %hi(.LCPI27_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI27_0)(a2) +; RV32ZVFHMIN-NEXT: addi a2, a0, -16 +; RV32ZVFHMIN-NEXT: sltu a0, a0, a2 +; RV32ZVFHMIN-NEXT: addi a0, a0, -1 +; RV32ZVFHMIN-NEXT: and a0, a0, a2 +; RV32ZVFHMIN-NEXT: fsrmi a2, 1 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV32ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5 +; RV32ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a2 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a1, 1 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: fsrm a1 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_v32f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: li a2, 16 +; RV64ZVFHMIN-NEXT: mv a1, a0 +; RV64ZVFHMIN-NEXT: bltu a0, a2, .LBB27_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: li a1, 16 +; RV64ZVFHMIN-NEXT: .LBB27_2: +; RV64ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV64ZVFHMIN-NEXT: li a2, 1075 +; RV64ZVFHMIN-NEXT: slli a2, a2, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a2 +; RV64ZVFHMIN-NEXT: addi a2, a0, -16 +; RV64ZVFHMIN-NEXT: sltu a0, a0, a2 +; RV64ZVFHMIN-NEXT: addi a0, a0, -1 +; RV64ZVFHMIN-NEXT: and a0, a0, a2 +; RV64ZVFHMIN-NEXT: fsrmi a2, 1 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV64ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5 +; RV64ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a2 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a1, 1 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: fsrm a1 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll index af79ace04cf54..965d0b0fe0f9b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s define <4 x bfloat> @shuffle_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) { ; CHECK-LABEL: shuffle_v4bf16: @@ -39,29 +39,49 @@ define <8 x float> @shuffle_v8f32(<8 x float> %x, <8 x float> %y) { } define <4 x double> @shuffle_fv_v4f64(<4 x double> %x) { -; CHECK-LABEL: shuffle_fv_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v0, 9 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 -; CHECK-NEXT: ret +; RV32-LABEL: shuffle_fv_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI3_0) +; RV32-NEXT: fld fa5, %lo(.LCPI3_0)(a0) +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 9 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vfmerge.vfm v8, v8, fa5, v0 +; RV32-NEXT: ret +; +; RV64-LABEL: shuffle_fv_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vmv.v.i v0, 9 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: slli a0, a0, 62 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret %s = shufflevector <4 x double> , <4 x double> %x, <4 x i32> ret <4 x double> %s } define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) { -; CHECK-LABEL: shuffle_vf_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v0, 6 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 -; CHECK-NEXT: ret +; RV32-LABEL: shuffle_vf_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI4_0) +; RV32-NEXT: fld fa5, %lo(.LCPI4_0)(a0) +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 6 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vfmerge.vfm v8, v8, fa5, v0 +; RV32-NEXT: ret +; +; RV64-LABEL: shuffle_vf_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vmv.v.i v0, 6 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: slli a0, a0, 62 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret %s = shufflevector <4 x double> %x, <4 x double> , <4 x i32> ret <4 x double> %s } @@ -79,15 +99,25 @@ define <4 x float> @vfmerge_constant_v4f32(<4 x float> %x) { } define <4 x double> @vfmerge_constant_v4f64(<4 x double> %x) { -; CHECK-LABEL: vfmerge_constant_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI6_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI6_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v0, 6 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 -; CHECK-NEXT: ret +; RV32-LABEL: vfmerge_constant_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI6_0) +; RV32-NEXT: fld fa5, %lo(.LCPI6_0)(a0) +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 6 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vfmerge.vfm v8, v8, fa5, v0 +; RV32-NEXT: ret +; +; RV64-LABEL: vfmerge_constant_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vmv.v.i v0, 6 +; RV64-NEXT: lui a0, 4101 +; RV64-NEXT: slli a0, a0, 38 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret %s = shufflevector <4 x double> %x, <4 x double> , <4 x i32> ret <4 x double> %s } @@ -161,40 +191,71 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) } define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) { -; CHECK-LABEL: vrgather_shuffle_xv_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v0, 8 -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vmv2r.v v10, v8 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vslideup.vi v10, v8, 2, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v0, 12 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa5 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: ret +; RV32-LABEL: vrgather_shuffle_xv_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 8 +; RV32-NEXT: lui a0, %hi(.LCPI12_0) +; RV32-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; RV32-NEXT: vmv2r.v v10, v8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: vslideup.vi v10, v8, 2, v0.t +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 12 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vfmv.v.f v8, fa5 +; RV32-NEXT: vmerge.vvm v8, v8, v10, v0 +; RV32-NEXT: ret +; +; RV64-LABEL: vrgather_shuffle_xv_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vmv.v.i v0, 8 +; RV64-NEXT: vmv2r.v v10, v8 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vslideup.vi v10, v8, 2, v0.t +; RV64-NEXT: slli a0, a0, 62 +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vmv.v.i v0, 12 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v8, a0 +; RV64-NEXT: vmerge.vvm v8, v8, v10, v0 +; RV64-NEXT: ret %s = shufflevector <4 x double> , <4 x double> %x, <4 x i32> ret <4 x double> %s } define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) { -; CHECK-LABEL: vrgather_shuffle_vx_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v0, 2 -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 2, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v0, 3 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v10, fa5 -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: ret +; RV32-LABEL: vrgather_shuffle_vx_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 2 +; RV32-NEXT: lui a0, %hi(.LCPI13_0) +; RV32-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: vslidedown.vi v8, v8, 2, v0.t +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 3 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vfmv.v.f v10, fa5 +; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 +; RV32-NEXT: ret +; +; RV64-LABEL: vrgather_shuffle_vx_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vmv.v.i v0, 2 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vslidedown.vi v8, v8, 2, v0.t +; RV64-NEXT: slli a0, a0, 62 +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vmv.v.i v0, 3 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 +; RV64-NEXT: ret %s = shufflevector <4 x double> %x, <4 x double> , <4 x i32> ret <4 x double> %s } diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll index 6ebb03ff0297e..8f2aec3140e9d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV32ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV64ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN declare @llvm.vp.floor.nxv1bf16(, , i32) @@ -407,10 +407,11 @@ declare @llvm.vp.floor.nxv1f16(, @vp_floor_nxv1f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 2 @@ -453,10 +454,11 @@ define @vp_floor_nxv1f16( %va, @vp_floor_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -493,10 +495,11 @@ declare @llvm.vp.floor.nxv2f16(, @vp_floor_nxv2f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 2 @@ -539,10 +542,11 @@ define @vp_floor_nxv2f16( %va, @vp_floor_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -579,10 +583,11 @@ declare @llvm.vp.floor.nxv4f16(, @vp_floor_nxv4f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 2 @@ -625,10 +630,11 @@ define @vp_floor_nxv4f16( %va, @vp_floor_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -667,9 +673,10 @@ define @vp_floor_nxv8f16( %va, @vp_floor_nxv8f16( %va, @vp_floor_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -755,9 +763,10 @@ define @vp_floor_nxv16f16( %va, @vp_floor_nxv16f16( %va, @vp_floor_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -843,9 +853,10 @@ define @vp_floor_nxv32f16( %va, @vp_floor_nxv32f16( %va, @vp_floor_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -1210,41 +1222,141 @@ define @vp_floor_nxv16f32_unmasked( % declare @llvm.vp.floor.nxv1f64(, , i32) define @vp_floor_nxv1f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI34_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_nxv1f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI34_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI34_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_nxv1f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_nxv1f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI34_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI34_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_nxv1f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.floor.nxv1f64( %va, %m, i32 %evl) ret %v } define @vp_floor_nxv1f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_nxv1f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI35_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_nxv1f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI35_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI35_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_nxv1f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_nxv1f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI35_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI35_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_nxv1f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.floor.nxv1f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1252,43 +1364,149 @@ define @vp_floor_nxv1f64_unmasked( %v declare @llvm.vp.floor.nxv2f64(, , i32) define @vp_floor_nxv2f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI36_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_nxv2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v10, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI36_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI36_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vmv1r.v v0, v10 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_nxv2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v10, v0 +; RV64ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vmv1r.v v0, v10 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_nxv2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI36_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI36_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_nxv2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.floor.nxv2f64( %va, %m, i32 %evl) ret %v } define @vp_floor_nxv2f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_nxv2f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI37_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_nxv2f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI37_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI37_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfabs.v v10, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_nxv2f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfabs.v v10, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_nxv2f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI37_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI37_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_nxv2f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.floor.nxv2f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1296,43 +1514,149 @@ define @vp_floor_nxv2f64_unmasked( %v declare @llvm.vp.floor.nxv4f64(, , i32) define @vp_floor_nxv4f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_nxv4f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v12, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI38_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI38_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vmv1r.v v0, v12 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_nxv4f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v12, v0 +; RV64ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vmv1r.v v0, v12 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_nxv4f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI38_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI38_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_nxv4f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.floor.nxv4f64( %va, %m, i32 %evl) ret %v } define @vp_floor_nxv4f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_nxv4f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI39_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_nxv4f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI39_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI39_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfabs.v v12, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_nxv4f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfabs.v v12, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_nxv4f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI39_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI39_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_nxv4f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.floor.nxv4f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1340,43 +1664,149 @@ define @vp_floor_nxv4f64_unmasked( %v declare @llvm.vp.floor.nxv7f64(, , i32) define @vp_floor_nxv7f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_nxv7f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI40_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_nxv7f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI40_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI40_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_nxv7f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_nxv7f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI40_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI40_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_nxv7f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.floor.nxv7f64( %va, %m, i32 %evl) ret %v } define @vp_floor_nxv7f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_nxv7f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI41_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_nxv7f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI41_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI41_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_nxv7f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_nxv7f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI41_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI41_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_nxv7f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.floor.nxv7f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1384,43 +1814,149 @@ define @vp_floor_nxv7f64_unmasked( %v declare @llvm.vp.floor.nxv8f64(, , i32) define @vp_floor_nxv8f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI42_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_nxv8f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI42_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI42_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_nxv8f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_nxv8f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI42_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI42_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_nxv8f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.floor.nxv8f64( %va, %m, i32 %evl) ret %v } define @vp_floor_nxv8f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_nxv8f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI43_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_nxv8f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI43_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI43_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_nxv8f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_nxv8f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI43_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI43_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_nxv8f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.floor.nxv8f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1429,87 +1965,325 @@ define @vp_floor_nxv8f64_unmasked( %v declare @llvm.vp.floor.nxv16f64(, , i32) define @vp_floor_nxv16f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_nxv16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI44_0) -; CHECK-NEXT: srli a3, a1, 3 -; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) -; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 -; CHECK-NEXT: sltu a3, a0, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a2, 2 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB44_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_nxv16f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v7, v0 +; RV32ZVFH-NEXT: csrr a1, vlenb +; RV32ZVFH-NEXT: lui a2, %hi(.LCPI44_0) +; RV32ZVFH-NEXT: srli a3, a1, 3 +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI44_0)(a2) +; RV32ZVFH-NEXT: sub a2, a0, a1 +; RV32ZVFH-NEXT: vslidedown.vx v6, v0, a3 +; RV32ZVFH-NEXT: sltu a3, a0, a2 +; RV32ZVFH-NEXT: addi a3, a3, -1 +; RV32ZVFH-NEXT: and a2, a3, a2 +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a2, 2 +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: fsrm a2 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: bltu a0, a1, .LBB44_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: mv a0, a1 +; RV32ZVFH-NEXT: .LBB44_2: +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_nxv16f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v7, v0 +; RV64ZVFH-NEXT: csrr a1, vlenb +; RV64ZVFH-NEXT: li a2, 1075 +; RV64ZVFH-NEXT: srli a3, a1, 3 +; RV64ZVFH-NEXT: vslidedown.vx v6, v0, a3 +; RV64ZVFH-NEXT: sub a3, a0, a1 +; RV64ZVFH-NEXT: slli a2, a2, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a2 +; RV64ZVFH-NEXT: sltu a2, a0, a3 +; RV64ZVFH-NEXT: addi a2, a2, -1 +; RV64ZVFH-NEXT: and a2, a2, a3 +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a2, 2 +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: fsrm a2 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: bltu a0, a1, .LBB44_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: mv a0, a1 +; RV64ZVFH-NEXT: .LBB44_2: +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_nxv16f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v7, v0 +; RV32ZVFHMIN-NEXT: csrr a1, vlenb +; RV32ZVFHMIN-NEXT: lui a2, %hi(.LCPI44_0) +; RV32ZVFHMIN-NEXT: srli a3, a1, 3 +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI44_0)(a2) +; RV32ZVFHMIN-NEXT: sub a2, a0, a1 +; RV32ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3 +; RV32ZVFHMIN-NEXT: sltu a3, a0, a2 +; RV32ZVFHMIN-NEXT: addi a3, a3, -1 +; RV32ZVFHMIN-NEXT: and a2, a3, a2 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a2, 2 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: fsrm a2 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: bltu a0, a1, .LBB44_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: mv a0, a1 +; RV32ZVFHMIN-NEXT: .LBB44_2: +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_nxv16f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v7, v0 +; RV64ZVFHMIN-NEXT: csrr a1, vlenb +; RV64ZVFHMIN-NEXT: li a2, 1075 +; RV64ZVFHMIN-NEXT: srli a3, a1, 3 +; RV64ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3 +; RV64ZVFHMIN-NEXT: sub a3, a0, a1 +; RV64ZVFHMIN-NEXT: slli a2, a2, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a2 +; RV64ZVFHMIN-NEXT: sltu a2, a0, a3 +; RV64ZVFHMIN-NEXT: addi a2, a2, -1 +; RV64ZVFHMIN-NEXT: and a2, a2, a3 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a2, 2 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: fsrm a2 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: bltu a0, a1, .LBB44_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: mv a0, a1 +; RV64ZVFHMIN-NEXT: .LBB44_2: +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.floor.nxv16f64( %va, %m, i32 %evl) ret %v } define @vp_floor_nxv16f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_nxv16f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI45_0) -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a2) -; CHECK-NEXT: sltu a2, a0, a3 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a2, 2 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB45_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB45_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_floor_nxv16f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: csrr a1, vlenb +; RV32ZVFH-NEXT: lui a2, %hi(.LCPI45_0) +; RV32ZVFH-NEXT: sub a3, a0, a1 +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI45_0)(a2) +; RV32ZVFH-NEXT: sltu a2, a0, a3 +; RV32ZVFH-NEXT: addi a2, a2, -1 +; RV32ZVFH-NEXT: and a2, a2, a3 +; RV32ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16 +; RV32ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFH-NEXT: fsrmi a2, 2 +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: fsrm a2 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: bltu a0, a1, .LBB45_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: mv a0, a1 +; RV32ZVFH-NEXT: .LBB45_2: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 2 +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_floor_nxv16f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: csrr a1, vlenb +; RV64ZVFH-NEXT: li a2, 1075 +; RV64ZVFH-NEXT: sub a3, a0, a1 +; RV64ZVFH-NEXT: slli a2, a2, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a2 +; RV64ZVFH-NEXT: sltu a2, a0, a3 +; RV64ZVFH-NEXT: addi a2, a2, -1 +; RV64ZVFH-NEXT: and a2, a2, a3 +; RV64ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16 +; RV64ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFH-NEXT: fsrmi a2, 2 +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: fsrm a2 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: bltu a0, a1, .LBB45_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: mv a0, a1 +; RV64ZVFH-NEXT: .LBB45_2: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8 +; RV64ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 2 +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_floor_nxv16f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: csrr a1, vlenb +; RV32ZVFHMIN-NEXT: lui a2, %hi(.LCPI45_0) +; RV32ZVFHMIN-NEXT: sub a3, a0, a1 +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI45_0)(a2) +; RV32ZVFHMIN-NEXT: sltu a2, a0, a3 +; RV32ZVFHMIN-NEXT: addi a2, a2, -1 +; RV32ZVFHMIN-NEXT: and a2, a2, a3 +; RV32ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a2, 2 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: fsrm a2 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: bltu a0, a1, .LBB45_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: mv a0, a1 +; RV32ZVFHMIN-NEXT: .LBB45_2: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 2 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_floor_nxv16f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: csrr a1, vlenb +; RV64ZVFHMIN-NEXT: li a2, 1075 +; RV64ZVFHMIN-NEXT: sub a3, a0, a1 +; RV64ZVFHMIN-NEXT: slli a2, a2, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a2 +; RV64ZVFHMIN-NEXT: sltu a2, a0, a3 +; RV64ZVFHMIN-NEXT: addi a2, a2, -1 +; RV64ZVFHMIN-NEXT: and a2, a2, a3 +; RV64ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a2, 2 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: fsrm a2 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: bltu a0, a1, .LBB45_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: mv a0, a1 +; RV64ZVFHMIN-NEXT: .LBB45_2: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 2 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.floor.nxv16f64( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll index 7a4695d1c25c1..409235f7e1b2c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s declare @llvm.experimental.constrained.nearbyint.nxv1f16(, metadata, metadata) @@ -11,10 +11,11 @@ define @nearbyint_nxv1f16( %v) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma @@ -35,10 +36,11 @@ define @nearbyint_nxv2f16( %v) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma @@ -59,10 +61,11 @@ define @nearbyint_nxv4f16( %v) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -83,10 +86,11 @@ define @nearbyint_nxv8f16( %v) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -107,10 +111,11 @@ define @nearbyint_nxv16f16( %v) strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma @@ -131,10 +136,11 @@ define @nearbyint_nxv32f16( %v) strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma @@ -271,23 +277,42 @@ define @nearbyint_nxv16f32( %v) stric declare @llvm.experimental.constrained.nearbyint.nxv1f64(, metadata, metadata) define @nearbyint_nxv1f64( %v) strictfp { -; CHECK-LABEL: nearbyint_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32-LABEL: nearbyint_nxv1f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI11_0) +; RV32-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: frflags a0 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: fsflags a0 +; RV32-NEXT: ret +; +; RV64-LABEL: nearbyint_nxv1f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: frflags a0 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: fsflags a0 +; RV64-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv1f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r } @@ -295,23 +320,42 @@ define @nearbyint_nxv1f64( %v) strict declare @llvm.experimental.constrained.nearbyint.nxv2f64(, metadata, metadata) define @nearbyint_nxv2f64( %v) strictfp { -; CHECK-LABEL: nearbyint_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32-LABEL: nearbyint_nxv2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI12_0) +; RV32-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: frflags a0 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: fsflags a0 +; RV32-NEXT: ret +; +; RV64-LABEL: nearbyint_nxv2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: frflags a0 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: fsflags a0 +; RV64-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv2f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r } @@ -319,23 +363,42 @@ define @nearbyint_nxv2f64( %v) strict declare @llvm.experimental.constrained.nearbyint.nxv4f64(, metadata, metadata) define @nearbyint_nxv4f64( %v) strictfp { -; CHECK-LABEL: nearbyint_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32-LABEL: nearbyint_nxv4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI13_0) +; RV32-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: frflags a0 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: fsflags a0 +; RV32-NEXT: ret +; +; RV64-LABEL: nearbyint_nxv4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: frflags a0 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: fsflags a0 +; RV64-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv4f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r } @@ -343,23 +406,42 @@ define @nearbyint_nxv4f64( %v) strict declare @llvm.experimental.constrained.nearbyint.nxv8f64(, metadata, metadata) define @nearbyint_nxv8f64( %v) strictfp { -; CHECK-LABEL: nearbyint_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32-LABEL: nearbyint_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI14_0) +; RV32-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v16, v8 +; RV32-NEXT: vmflt.vf v0, v16, fa5 +; RV32-NEXT: frflags a0 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32-NEXT: fsflags a0 +; RV32-NEXT: ret +; +; RV64-LABEL: nearbyint_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v16, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v16, fa5 +; RV64-NEXT: frflags a0 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64-NEXT: fsflags a0 +; RV64-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv8f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r } diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll index 4ea3269cec0b1..97e65f4e4b53a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV32ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV64ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN define @nearbyint_nxv1bf16( %x) { ; CHECK-LABEL: nearbyint_nxv1bf16: @@ -167,10 +167,11 @@ define @nearbyint_nxv32bf16( %x) { define @nearbyint_nxv1f16( %x) { ; ZVFH-LABEL: nearbyint_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -206,10 +207,11 @@ declare @llvm.nearbyint.nxv1f16() define @nearbyint_nxv2f16( %x) { ; ZVFH-LABEL: nearbyint_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -245,10 +247,11 @@ declare @llvm.nearbyint.nxv2f16() define @nearbyint_nxv4f16( %x) { ; ZVFH-LABEL: nearbyint_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -284,10 +287,11 @@ declare @llvm.nearbyint.nxv4f16() define @nearbyint_nxv8f16( %x) { ; ZVFH-LABEL: nearbyint_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -323,10 +327,11 @@ declare @llvm.nearbyint.nxv8f16() define @nearbyint_nxv16f16( %x) { ; ZVFH-LABEL: nearbyint_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -362,10 +367,11 @@ declare @llvm.nearbyint.nxv16f16() define @nearbyint_nxv32f16( %x) { ; ZVFH-LABEL: nearbyint_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -513,80 +519,268 @@ define @nearbyint_nxv16f32( %x) { declare @llvm.nearbyint.nxv16f32() define @nearbyint_nxv1f64( %x) { -; CHECK-LABEL: nearbyint_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: nearbyint_nxv1f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI17_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI17_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: frflags a0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: fsflags a0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: nearbyint_nxv1f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: frflags a0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: fsflags a0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: nearbyint_nxv1f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI17_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI17_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: frflags a0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsflags a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: nearbyint_nxv1f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: frflags a0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsflags a0 +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.nearbyint.nxv1f64( %x) ret %a } declare @llvm.nearbyint.nxv1f64() define @nearbyint_nxv2f64( %x) { -; CHECK-LABEL: nearbyint_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: nearbyint_nxv2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfabs.v v10, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFH-NEXT: frflags a0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFH-NEXT: fsflags a0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: nearbyint_nxv2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfabs.v v10, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFH-NEXT: frflags a0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFH-NEXT: fsflags a0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: nearbyint_nxv2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFHMIN-NEXT: frflags a0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFHMIN-NEXT: fsflags a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: nearbyint_nxv2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFHMIN-NEXT: frflags a0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFHMIN-NEXT: fsflags a0 +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.nearbyint.nxv2f64( %x) ret %a } declare @llvm.nearbyint.nxv2f64() define @nearbyint_nxv4f64( %x) { -; CHECK-LABEL: nearbyint_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: nearbyint_nxv4f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI19_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI19_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfabs.v v12, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFH-NEXT: frflags a0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: fsflags a0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: nearbyint_nxv4f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfabs.v v12, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFH-NEXT: frflags a0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: fsflags a0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: nearbyint_nxv4f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI19_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI19_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFHMIN-NEXT: frflags a0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsflags a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: nearbyint_nxv4f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFHMIN-NEXT: frflags a0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsflags a0 +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.nearbyint.nxv4f64( %x) ret %a } declare @llvm.nearbyint.nxv4f64() define @nearbyint_nxv8f64( %x) { -; CHECK-LABEL: nearbyint_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: nearbyint_nxv8f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: frflags a0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: fsflags a0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: nearbyint_nxv8f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: frflags a0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: fsflags a0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: nearbyint_nxv8f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: frflags a0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsflags a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: nearbyint_nxv8f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: frflags a0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsflags a0 +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.nearbyint.nxv8f64( %x) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll index 5fe59f3b3933d..5ed921d39590d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV32ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV64ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN define @rint_nxv1bf16( %x) { ; CHECK-LABEL: rint_nxv1bf16: @@ -153,10 +153,11 @@ define @rint_nxv32bf16( %x) { define @rint_nxv1f16( %x) { ; ZVFH-LABEL: rint_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -188,10 +189,11 @@ declare @llvm.rint.nxv1f16() define @rint_nxv2f16( %x) { ; ZVFH-LABEL: rint_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -223,10 +225,11 @@ declare @llvm.rint.nxv2f16() define @rint_nxv4f16( %x) { ; ZVFH-LABEL: rint_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -258,10 +261,11 @@ declare @llvm.rint.nxv4f16() define @rint_nxv8f16( %x) { ; ZVFH-LABEL: rint_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -293,10 +297,11 @@ declare @llvm.rint.nxv8f16() define @rint_nxv16f16( %x) { ; ZVFH-LABEL: rint_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -328,10 +333,11 @@ declare @llvm.rint.nxv16f16() define @rint_nxv32f16( %x) { ; ZVFH-LABEL: rint_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -463,72 +469,236 @@ define @rint_nxv16f32( %x) { declare @llvm.rint.nxv16f32() define @rint_nxv1f64( %x) { -; CHECK-LABEL: rint_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: rint_nxv1f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI17_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI17_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: rint_nxv1f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: rint_nxv1f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI17_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI17_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: rint_nxv1f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.rint.nxv1f64( %x) ret %a } declare @llvm.rint.nxv1f64() define @rint_nxv2f64( %x) { -; CHECK-LABEL: rint_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: rint_nxv2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfabs.v v10, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: rint_nxv2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfabs.v v10, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: rint_nxv2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: rint_nxv2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.rint.nxv2f64( %x) ret %a } declare @llvm.rint.nxv2f64() define @rint_nxv4f64( %x) { -; CHECK-LABEL: rint_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: rint_nxv4f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI19_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI19_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfabs.v v12, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: rint_nxv4f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfabs.v v12, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: rint_nxv4f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI19_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI19_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: rint_nxv4f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.rint.nxv4f64( %x) ret %a } declare @llvm.rint.nxv4f64() define @rint_nxv8f64( %x) { -; CHECK-LABEL: rint_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: rint_nxv8f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: rint_nxv8f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: rint_nxv8f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: rint_nxv8f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.rint.nxv8f64( %x) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll index 3d992aa13e379..295c264e7d924 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s ; This file tests the code generation for `llvm.experimental.constrained.round.*` on scalable vector type. @@ -11,10 +11,11 @@ define @round_nxv1f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma @@ -34,10 +35,11 @@ define @round_nxv2f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma @@ -57,10 +59,11 @@ define @round_nxv4f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -80,10 +83,11 @@ define @round_nxv8f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -103,10 +107,11 @@ define @round_nxv16f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma @@ -126,10 +131,11 @@ define @round_nxv32f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma @@ -260,92 +266,168 @@ define @round_nxv16f32( %x) strictfp declare @llvm.experimental.constrained.round.nxv16f32(, metadata) define @round_nxv1f64( %x) strictfp { -; CHECK-LABEL: round_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: round_nxv1f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI11_0) +; RV32-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: fsrmi a0, 4 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: round_nxv1f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: fsrmi a0, 4 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv1f64( %x, metadata !"fpexcept.strict") ret %a } declare @llvm.experimental.constrained.round.nxv1f64(, metadata) define @round_nxv2f64( %x) strictfp { -; CHECK-LABEL: round_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: round_nxv2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI12_0) +; RV32-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: fsrmi a0, 4 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: round_nxv2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: fsrmi a0, 4 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv2f64( %x, metadata !"fpexcept.strict") ret %a } declare @llvm.experimental.constrained.round.nxv2f64(, metadata) define @round_nxv4f64( %x) strictfp { -; CHECK-LABEL: round_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: round_nxv4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI13_0) +; RV32-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: fsrmi a0, 4 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: round_nxv4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: fsrmi a0, 4 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv4f64( %x, metadata !"fpexcept.strict") ret %a } declare @llvm.experimental.constrained.round.nxv4f64(, metadata) define @round_nxv8f64( %x) strictfp { -; CHECK-LABEL: round_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: round_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI14_0) +; RV32-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v16, v8 +; RV32-NEXT: vmflt.vf v0, v16, fa5 +; RV32-NEXT: fsrmi a0, 4 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: round_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v16, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v16, fa5 +; RV64-NEXT: fsrmi a0, 4 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv8f64( %x, metadata !"fpexcept.strict") ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll index f7422b279149f..d420636a573fe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV32ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV64ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN ; This file tests the code generation for `llvm.round.*` on scalable vector type. @@ -169,10 +169,11 @@ define @round_nxv32bf16( %x) { define @round_nxv1f16( %x) { ; ZVFH-LABEL: round_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -208,10 +209,11 @@ declare @llvm.round.nxv1f16() define @round_nxv2f16( %x) { ; ZVFH-LABEL: round_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -247,10 +249,11 @@ declare @llvm.round.nxv2f16() define @round_nxv4f16( %x) { ; ZVFH-LABEL: round_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -286,10 +289,11 @@ declare @llvm.round.nxv4f16() define @round_nxv8f16( %x) { ; ZVFH-LABEL: round_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -325,10 +329,11 @@ declare @llvm.round.nxv8f16() define @round_nxv16f16( %x) { ; ZVFH-LABEL: round_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -364,10 +369,11 @@ declare @llvm.round.nxv16f16() define @round_nxv32f16( %x) { ; ZVFH-LABEL: round_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -515,80 +521,268 @@ define @round_nxv16f32( %x) { declare @llvm.round.nxv16f32() define @round_nxv1f64( %x) { -; CHECK-LABEL: round_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: round_nxv1f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI17_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI17_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: round_nxv1f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: round_nxv1f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI17_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI17_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: round_nxv1f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.round.nxv1f64( %x) ret %a } declare @llvm.round.nxv1f64() define @round_nxv2f64( %x) { -; CHECK-LABEL: round_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: round_nxv2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfabs.v v10, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: round_nxv2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfabs.v v10, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: round_nxv2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: round_nxv2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.round.nxv2f64( %x) ret %a } declare @llvm.round.nxv2f64() define @round_nxv4f64( %x) { -; CHECK-LABEL: round_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: round_nxv4f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI19_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI19_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfabs.v v12, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: round_nxv4f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfabs.v v12, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: round_nxv4f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI19_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI19_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: round_nxv4f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.round.nxv4f64( %x) ret %a } declare @llvm.round.nxv4f64() define @round_nxv8f64( %x) { -; CHECK-LABEL: round_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: round_nxv8f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: round_nxv8f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: round_nxv8f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: round_nxv8f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.round.nxv8f64( %x) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll index c293ac91b63bf..de766895c734f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s ; This file tests the code generation for `llvm.experimental.constrained.roundeven.*` on scalable vector type. @@ -11,10 +11,11 @@ define @roundeven_nxv1f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma @@ -34,10 +35,11 @@ define @roundeven_nxv2f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma @@ -57,10 +59,11 @@ define @roundeven_nxv4f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -80,10 +83,11 @@ define @roundeven_nxv8f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -103,10 +107,11 @@ define @roundeven_nxv16f16( %x) strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma @@ -126,10 +131,11 @@ define @roundeven_nxv32f16( %x) strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma @@ -260,92 +266,168 @@ define @roundeven_nxv16f32( %x) stric declare @llvm.experimental.constrained.roundeven.nxv16f32(, metadata) define @roundeven_nxv1f64( %x) strictfp { -; CHECK-LABEL: roundeven_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: roundeven_nxv1f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI11_0) +; RV32-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: fsrmi a0, 0 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: roundeven_nxv1f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: fsrmi a0, 0 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv1f64( %x, metadata !"fpexcept.strict") ret %a } declare @llvm.experimental.constrained.roundeven.nxv1f64(, metadata) define @roundeven_nxv2f64( %x) strictfp { -; CHECK-LABEL: roundeven_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: roundeven_nxv2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI12_0) +; RV32-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: fsrmi a0, 0 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: roundeven_nxv2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: fsrmi a0, 0 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv2f64( %x, metadata !"fpexcept.strict") ret %a } declare @llvm.experimental.constrained.roundeven.nxv2f64(, metadata) define @roundeven_nxv4f64( %x) strictfp { -; CHECK-LABEL: roundeven_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: roundeven_nxv4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI13_0) +; RV32-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: fsrmi a0, 0 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: roundeven_nxv4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: fsrmi a0, 0 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv4f64( %x, metadata !"fpexcept.strict") ret %a } declare @llvm.experimental.constrained.roundeven.nxv4f64(, metadata) define @roundeven_nxv8f64( %x) strictfp { -; CHECK-LABEL: roundeven_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: roundeven_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI14_0) +; RV32-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v16, v8 +; RV32-NEXT: vmflt.vf v0, v16, fa5 +; RV32-NEXT: fsrmi a0, 0 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: roundeven_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v16, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v16, fa5 +; RV64-NEXT: fsrmi a0, 0 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv8f64( %x, metadata !"fpexcept.strict") ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll index 865531b77eb29..b9121c55684ee 100644 --- a/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV32ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV64ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN ; This file tests the code generation for `llvm.roundeven.*` on scalable vector type. define @roundeven_nxv1bf16( %x) { @@ -168,10 +168,11 @@ define @roundeven_nxv32bf16( %x) { define @roundeven_nxv1f16( %x) { ; ZVFH-LABEL: roundeven_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -207,10 +208,11 @@ declare @llvm.roundeven.nxv1f16() define @roundeven_nxv2f16( %x) { ; ZVFH-LABEL: roundeven_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -246,10 +248,11 @@ declare @llvm.roundeven.nxv2f16() define @roundeven_nxv4f16( %x) { ; ZVFH-LABEL: roundeven_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -285,10 +288,11 @@ declare @llvm.roundeven.nxv4f16() define @roundeven_nxv8f16( %x) { ; ZVFH-LABEL: roundeven_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -324,10 +328,11 @@ declare @llvm.roundeven.nxv8f16() define @roundeven_nxv16f16( %x) { ; ZVFH-LABEL: roundeven_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -363,10 +368,11 @@ declare @llvm.roundeven.nxv16f16() define @roundeven_nxv32f16( %x) { ; ZVFH-LABEL: roundeven_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -514,80 +520,268 @@ define @roundeven_nxv16f32( %x) { declare @llvm.roundeven.nxv16f32() define @roundeven_nxv1f64( %x) { -; CHECK-LABEL: roundeven_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: roundeven_nxv1f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI17_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI17_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: roundeven_nxv1f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: roundeven_nxv1f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI17_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI17_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: roundeven_nxv1f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.roundeven.nxv1f64( %x) ret %a } declare @llvm.roundeven.nxv1f64() define @roundeven_nxv2f64( %x) { -; CHECK-LABEL: roundeven_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: roundeven_nxv2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfabs.v v10, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: roundeven_nxv2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfabs.v v10, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: roundeven_nxv2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: roundeven_nxv2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.roundeven.nxv2f64( %x) ret %a } declare @llvm.roundeven.nxv2f64() define @roundeven_nxv4f64( %x) { -; CHECK-LABEL: roundeven_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: roundeven_nxv4f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI19_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI19_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfabs.v v12, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: roundeven_nxv4f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfabs.v v12, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: roundeven_nxv4f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI19_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI19_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: roundeven_nxv4f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.roundeven.nxv4f64( %x) ret %a } declare @llvm.roundeven.nxv4f64() define @roundeven_nxv8f64( %x) { -; CHECK-LABEL: roundeven_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: roundeven_nxv8f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: roundeven_nxv8f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: roundeven_nxv8f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: roundeven_nxv8f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.roundeven.nxv8f64( %x) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll index 8a5f118d8f6ac..63cb72e8795e1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll @@ -1,18 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s define @trunc_nxv1f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t @@ -30,10 +31,11 @@ define @trunc_nxv2f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t @@ -51,10 +53,11 @@ define @trunc_nxv4f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t @@ -72,10 +75,11 @@ define @trunc_nxv8f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t @@ -93,10 +97,11 @@ define @trunc_nxv16f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t @@ -114,10 +119,11 @@ define @trunc_nxv32f16( %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: slli a0, a0, 10 ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t @@ -236,84 +242,152 @@ define @trunc_nxv16f32( %x) strictfp declare @llvm.experimental.constrained.trunc.nxv16f32(, metadata) define @trunc_nxv1f64( %x) strictfp { -; CHECK-LABEL: trunc_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: trunc_nxv1f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI11_0) +; RV32-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv1f64( %x, metadata !"fpexcept.strict") ret %a } declare @llvm.experimental.constrained.trunc.nxv1f64(, metadata) define @trunc_nxv2f64( %x) strictfp { -; CHECK-LABEL: trunc_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: trunc_nxv2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI12_0) +; RV32-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv2f64( %x, metadata !"fpexcept.strict") ret %a } declare @llvm.experimental.constrained.trunc.nxv2f64(, metadata) define @trunc_nxv4f64( %x) strictfp { -; CHECK-LABEL: trunc_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: trunc_nxv4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI13_0) +; RV32-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv4f64( %x, metadata !"fpexcept.strict") ret %a } declare @llvm.experimental.constrained.trunc.nxv4f64(, metadata) define @trunc_nxv8f64( %x) strictfp { -; CHECK-LABEL: trunc_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: trunc_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: vmfne.vv v0, v8, v8 +; RV32-NEXT: lui a0, %hi(.LCPI14_0) +; RV32-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; RV32-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV32-NEXT: vfabs.v v16, v8 +; RV32-NEXT: vmflt.vf v0, v16, fa5 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV64-NEXT: vmfne.vv v0, v8, v8 +; RV64-NEXT: li a0, 1075 +; RV64-NEXT: vfadd.vv v8, v8, v8, v0.t +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: vfabs.v v16, v8 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v16, fa5 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv8f64( %x, metadata !"fpexcept.strict") ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll index d597e166be4ee..34b3e8d2849b7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV32ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV64ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN define @trunc_nxv1bf16( %x) { ; CHECK-LABEL: trunc_nxv1bf16: @@ -153,10 +153,11 @@ define @trunc_nxv32bf16( %x) { define @trunc_nxv1f16( %x) { ; ZVFH-LABEL: trunc_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -188,10 +189,11 @@ declare @llvm.trunc.nxv1f16() define @trunc_nxv2f16( %x) { ; ZVFH-LABEL: trunc_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -223,10 +225,11 @@ declare @llvm.trunc.nxv2f16() define @trunc_nxv4f16( %x) { ; ZVFH-LABEL: trunc_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -258,10 +261,11 @@ declare @llvm.trunc.nxv4f16() define @trunc_nxv8f16( %x) { ; ZVFH-LABEL: trunc_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -293,10 +297,11 @@ declare @llvm.trunc.nxv8f16() define @trunc_nxv16f16( %x) { ; ZVFH-LABEL: trunc_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -328,10 +333,11 @@ declare @llvm.trunc.nxv16f16() define @trunc_nxv32f16( %x) { ; ZVFH-LABEL: trunc_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -463,72 +469,236 @@ define @trunc_nxv16f32( %x) { declare @llvm.trunc.nxv16f32() define @trunc_nxv1f64( %x) { -; CHECK-LABEL: trunc_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: trunc_nxv1f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI17_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI17_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: trunc_nxv1f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: trunc_nxv1f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI17_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI17_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: trunc_nxv1f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.trunc.nxv1f64( %x) ret %a } declare @llvm.trunc.nxv1f64() define @trunc_nxv2f64( %x) { -; CHECK-LABEL: trunc_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: trunc_nxv2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfabs.v v10, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFH-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: trunc_nxv2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfabs.v v10, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFH-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: trunc_nxv2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI18_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI18_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: trunc_nxv2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.trunc.nxv2f64( %x) ret %a } declare @llvm.trunc.nxv2f64() define @trunc_nxv4f64( %x) { -; CHECK-LABEL: trunc_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: trunc_nxv4f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI19_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI19_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfabs.v v12, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFH-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: trunc_nxv4f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfabs.v v12, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFH-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: trunc_nxv4f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI19_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI19_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: trunc_nxv4f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.trunc.nxv4f64( %x) ret %a } declare @llvm.trunc.nxv4f64() define @trunc_nxv8f64( %x) { -; CHECK-LABEL: trunc_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: trunc_nxv8f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFH-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: trunc_nxv8f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: trunc_nxv8f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI20_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI20_0)(a0) +; RV32ZVFHMIN-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: trunc_nxv8f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %a = call @llvm.trunc.nxv8f64( %x) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll index ae0542fb5b74f..d7bf566b9b5f4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll @@ -83,10 +83,11 @@ define @trunc_nxv1f16_to_ui32( %x) { define @trunc_nxv1f16_to_si64( %x) { ; CHECK-LABEL: trunc_nxv1f16_to_si64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI6_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -104,10 +105,11 @@ define @trunc_nxv1f16_to_si64( %x) { define @trunc_nxv1f16_to_ui64( %x) { ; CHECK-LABEL: trunc_nxv1f16_to_ui64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI7_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -201,10 +203,11 @@ define @trunc_nxv4f16_to_ui32( %x) { define @trunc_nxv4f16_to_si64( %x) { ; CHECK-LABEL: trunc_nxv4f16_to_si64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -222,10 +225,11 @@ define @trunc_nxv4f16_to_si64( %x) { define @trunc_nxv4f16_to_ui64( %x) { ; CHECK-LABEL: trunc_nxv4f16_to_ui64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI15_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI15_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -331,10 +335,11 @@ define @ceil_nxv1f16_to_ui32( %x) { define @ceil_nxv1f16_to_si64( %x) { ; CHECK-LABEL: ceil_nxv1f16_to_si64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI22_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI22_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -388,10 +393,11 @@ define @ceil_nxv1f16_to_si64( %x) { define @ceil_nxv1f16_to_ui64( %x) { ; CHECK-LABEL: ceil_nxv1f16_to_ui64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI23_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI23_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -533,10 +539,11 @@ define @ceil_nxv4f16_to_ui32( %x) { define @ceil_nxv4f16_to_si64( %x) { ; CHECK-LABEL: ceil_nxv4f16_to_si64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI30_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI30_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -590,10 +597,11 @@ define @ceil_nxv4f16_to_si64( %x) { define @ceil_nxv4f16_to_ui64( %x) { ; CHECK-LABEL: ceil_nxv4f16_to_ui64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI31_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI31_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -723,10 +731,11 @@ define @rint_nxv1f16_to_ui32( %x) { define @rint_nxv1f16_to_si64( %x) { ; CHECK-LABEL: rint_nxv1f16_to_si64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI38_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -778,10 +787,11 @@ define @rint_nxv1f16_to_si64( %x) { define @rint_nxv1f16_to_ui64( %x) { ; CHECK-LABEL: rint_nxv1f16_to_ui64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI39_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI39_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -909,10 +919,11 @@ define @rint_nxv4f16_to_ui32( %x) { define @rint_nxv4f16_to_si64( %x) { ; CHECK-LABEL: rint_nxv4f16_to_si64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI46_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI46_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -964,10 +975,11 @@ define @rint_nxv4f16_to_si64( %x) { define @rint_nxv4f16_to_ui64( %x) { ; CHECK-LABEL: rint_nxv4f16_to_ui64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI47_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI47_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: li a0, 25 +; CHECK-NEXT: slli a0, a0, 10 +; CHECK-NEXT: fmv.h.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll index 9bb5717d6fc25..64e305f130dd7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV32ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV64ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN declare @llvm.vp.nearbyint.nxv1bf16(, , i32) @@ -407,10 +407,11 @@ declare @llvm.vp.nearbyint.nxv1f16(, @vp_nearbyint_nxv1f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: frflags a0 @@ -453,10 +454,11 @@ define @vp_nearbyint_nxv1f16( %va, @vp_nearbyint_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -493,10 +495,11 @@ declare @llvm.vp.nearbyint.nxv2f16(, @vp_nearbyint_nxv2f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: frflags a0 @@ -539,10 +542,11 @@ define @vp_nearbyint_nxv2f16( %va, @vp_nearbyint_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -579,10 +583,11 @@ declare @llvm.vp.nearbyint.nxv4f16(, @vp_nearbyint_nxv4f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: frflags a0 @@ -625,10 +630,11 @@ define @vp_nearbyint_nxv4f16( %va, @vp_nearbyint_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -667,9 +673,10 @@ define @vp_nearbyint_nxv8f16( %va, @vp_nearbyint_nxv8f16( %va, @vp_nearbyint_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -755,9 +763,10 @@ define @vp_nearbyint_nxv16f16( %va, @vp_nearbyint_nxv16f16( %va, @vp_nearbyint_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -843,9 +853,10 @@ define @vp_nearbyint_nxv32f16( %va, @vp_nearbyint_nxv32f16( %va, @vp_nearbyint_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -1210,41 +1222,141 @@ define @vp_nearbyint_nxv16f32_unmasked( @llvm.vp.nearbyint.nxv1f64(, , i32) define @vp_nearbyint_nxv1f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI34_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_nearbyint_nxv1f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI34_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI34_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFH-NEXT: frflags a0 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: fsflags a0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_nearbyint_nxv1f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFH-NEXT: frflags a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: fsflags a0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_nearbyint_nxv1f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI34_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI34_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFHMIN-NEXT: frflags a0 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsflags a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_nearbyint_nxv1f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFHMIN-NEXT: frflags a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsflags a0 +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv1f64( %va, %m, i32 %evl) ret %v } define @vp_nearbyint_nxv1f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv1f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI35_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_nearbyint_nxv1f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI35_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI35_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: frflags a0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: fsflags a0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_nearbyint_nxv1f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: frflags a0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: fsflags a0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_nearbyint_nxv1f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI35_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI35_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: frflags a0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsflags a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_nearbyint_nxv1f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: frflags a0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsflags a0 +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv1f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1252,43 +1364,149 @@ define @vp_nearbyint_nxv1f64_unmasked( @llvm.vp.nearbyint.nxv2f64(, , i32) define @vp_nearbyint_nxv2f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI36_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_nearbyint_nxv2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v10, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI36_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI36_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFH-NEXT: frflags a0 +; RV32ZVFH-NEXT: vmv1r.v v0, v10 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: fsflags a0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_nearbyint_nxv2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v10, v0 +; RV64ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFH-NEXT: frflags a0 +; RV64ZVFH-NEXT: vmv1r.v v0, v10 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: fsflags a0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_nearbyint_nxv2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI36_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI36_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFHMIN-NEXT: frflags a0 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsflags a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_nearbyint_nxv2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFHMIN-NEXT: frflags a0 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsflags a0 +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv2f64( %va, %m, i32 %evl) ret %v } define @vp_nearbyint_nxv2f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv2f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI37_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_nearbyint_nxv2f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI37_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI37_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfabs.v v10, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFH-NEXT: frflags a0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFH-NEXT: fsflags a0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_nearbyint_nxv2f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfabs.v v10, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFH-NEXT: frflags a0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFH-NEXT: fsflags a0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_nearbyint_nxv2f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI37_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI37_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFHMIN-NEXT: frflags a0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFHMIN-NEXT: fsflags a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_nearbyint_nxv2f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFHMIN-NEXT: frflags a0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFHMIN-NEXT: fsflags a0 +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv2f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1296,43 +1514,149 @@ define @vp_nearbyint_nxv2f64_unmasked( @llvm.vp.nearbyint.nxv4f64(, , i32) define @vp_nearbyint_nxv4f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_nearbyint_nxv4f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v12, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI38_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI38_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFH-NEXT: frflags a0 +; RV32ZVFH-NEXT: vmv1r.v v0, v12 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: fsflags a0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_nearbyint_nxv4f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v12, v0 +; RV64ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFH-NEXT: frflags a0 +; RV64ZVFH-NEXT: vmv1r.v v0, v12 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: fsflags a0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_nearbyint_nxv4f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI38_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI38_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFHMIN-NEXT: frflags a0 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsflags a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_nearbyint_nxv4f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFHMIN-NEXT: frflags a0 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsflags a0 +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv4f64( %va, %m, i32 %evl) ret %v } define @vp_nearbyint_nxv4f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv4f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI39_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_nearbyint_nxv4f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI39_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI39_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfabs.v v12, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFH-NEXT: frflags a0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: fsflags a0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_nearbyint_nxv4f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfabs.v v12, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFH-NEXT: frflags a0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: fsflags a0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_nearbyint_nxv4f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI39_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI39_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFHMIN-NEXT: frflags a0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsflags a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_nearbyint_nxv4f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFHMIN-NEXT: frflags a0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsflags a0 +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv4f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1340,43 +1664,149 @@ define @vp_nearbyint_nxv4f64_unmasked( @llvm.vp.nearbyint.nxv7f64(, , i32) define @vp_nearbyint_nxv7f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv7f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI40_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_nearbyint_nxv7f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI40_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI40_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: frflags a0 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: fsflags a0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_nearbyint_nxv7f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: frflags a0 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: fsflags a0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_nearbyint_nxv7f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI40_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI40_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: frflags a0 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsflags a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_nearbyint_nxv7f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: frflags a0 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsflags a0 +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv7f64( %va, %m, i32 %evl) ret %v } define @vp_nearbyint_nxv7f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv7f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI41_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_nearbyint_nxv7f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI41_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI41_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: frflags a0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: fsflags a0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_nearbyint_nxv7f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: frflags a0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: fsflags a0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_nearbyint_nxv7f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI41_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI41_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: frflags a0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsflags a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_nearbyint_nxv7f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: frflags a0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsflags a0 +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv7f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1384,43 +1814,149 @@ define @vp_nearbyint_nxv7f64_unmasked( @llvm.vp.nearbyint.nxv8f64(, , i32) define @vp_nearbyint_nxv8f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI42_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_nearbyint_nxv8f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI42_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI42_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: frflags a0 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: fsflags a0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_nearbyint_nxv8f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: frflags a0 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: fsflags a0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_nearbyint_nxv8f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI42_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI42_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: frflags a0 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsflags a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_nearbyint_nxv8f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: frflags a0 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsflags a0 +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv8f64( %va, %m, i32 %evl) ret %v } define @vp_nearbyint_nxv8f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv8f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI43_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_nearbyint_nxv8f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI43_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI43_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: frflags a0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: fsflags a0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_nearbyint_nxv8f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: frflags a0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: fsflags a0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_nearbyint_nxv8f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI43_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI43_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: frflags a0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsflags a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_nearbyint_nxv8f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: frflags a0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsflags a0 +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv8f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1429,87 +1965,325 @@ define @vp_nearbyint_nxv8f64_unmasked( @llvm.vp.nearbyint.nxv16f64(, , i32) define @vp_nearbyint_nxv16f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI44_0) -; CHECK-NEXT: srli a3, a1, 3 -; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) -; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 -; CHECK-NEXT: sltu a3, a0, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: frflags a2 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB44_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_nearbyint_nxv16f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v7, v0 +; RV32ZVFH-NEXT: csrr a1, vlenb +; RV32ZVFH-NEXT: lui a2, %hi(.LCPI44_0) +; RV32ZVFH-NEXT: srli a3, a1, 3 +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI44_0)(a2) +; RV32ZVFH-NEXT: sub a2, a0, a1 +; RV32ZVFH-NEXT: vslidedown.vx v6, v0, a3 +; RV32ZVFH-NEXT: sltu a3, a0, a2 +; RV32ZVFH-NEXT: addi a3, a3, -1 +; RV32ZVFH-NEXT: and a2, a3, a2 +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFH-NEXT: frflags a2 +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: fsflags a2 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: bltu a0, a1, .LBB44_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: mv a0, a1 +; RV32ZVFH-NEXT: .LBB44_2: +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFH-NEXT: frflags a0 +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: fsflags a0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_nearbyint_nxv16f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v7, v0 +; RV64ZVFH-NEXT: csrr a1, vlenb +; RV64ZVFH-NEXT: li a2, 1075 +; RV64ZVFH-NEXT: srli a3, a1, 3 +; RV64ZVFH-NEXT: vslidedown.vx v6, v0, a3 +; RV64ZVFH-NEXT: sub a3, a0, a1 +; RV64ZVFH-NEXT: slli a2, a2, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a2 +; RV64ZVFH-NEXT: sltu a2, a0, a3 +; RV64ZVFH-NEXT: addi a2, a2, -1 +; RV64ZVFH-NEXT: and a2, a2, a3 +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFH-NEXT: frflags a2 +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: fsflags a2 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: bltu a0, a1, .LBB44_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: mv a0, a1 +; RV64ZVFH-NEXT: .LBB44_2: +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFH-NEXT: frflags a0 +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: fsflags a0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_nearbyint_nxv16f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v7, v0 +; RV32ZVFHMIN-NEXT: csrr a1, vlenb +; RV32ZVFHMIN-NEXT: lui a2, %hi(.LCPI44_0) +; RV32ZVFHMIN-NEXT: srli a3, a1, 3 +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI44_0)(a2) +; RV32ZVFHMIN-NEXT: sub a2, a0, a1 +; RV32ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3 +; RV32ZVFHMIN-NEXT: sltu a3, a0, a2 +; RV32ZVFHMIN-NEXT: addi a3, a3, -1 +; RV32ZVFHMIN-NEXT: and a2, a3, a2 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: frflags a2 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: fsflags a2 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: bltu a0, a1, .LBB44_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: mv a0, a1 +; RV32ZVFHMIN-NEXT: .LBB44_2: +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: frflags a0 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsflags a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_nearbyint_nxv16f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v7, v0 +; RV64ZVFHMIN-NEXT: csrr a1, vlenb +; RV64ZVFHMIN-NEXT: li a2, 1075 +; RV64ZVFHMIN-NEXT: srli a3, a1, 3 +; RV64ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3 +; RV64ZVFHMIN-NEXT: sub a3, a0, a1 +; RV64ZVFHMIN-NEXT: slli a2, a2, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a2 +; RV64ZVFHMIN-NEXT: sltu a2, a0, a3 +; RV64ZVFHMIN-NEXT: addi a2, a2, -1 +; RV64ZVFHMIN-NEXT: and a2, a2, a3 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: frflags a2 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: fsflags a2 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: bltu a0, a1, .LBB44_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: mv a0, a1 +; RV64ZVFHMIN-NEXT: .LBB44_2: +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: frflags a0 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsflags a0 +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv16f64( %va, %m, i32 %evl) ret %v } define @vp_nearbyint_nxv16f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv16f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI45_0) -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a2) -; CHECK-NEXT: sltu a2, a0, a3 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: frflags a2 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB45_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB45_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_nearbyint_nxv16f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: csrr a1, vlenb +; RV32ZVFH-NEXT: lui a2, %hi(.LCPI45_0) +; RV32ZVFH-NEXT: sub a3, a0, a1 +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI45_0)(a2) +; RV32ZVFH-NEXT: sltu a2, a0, a3 +; RV32ZVFH-NEXT: addi a2, a2, -1 +; RV32ZVFH-NEXT: and a2, a2, a3 +; RV32ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16 +; RV32ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFH-NEXT: frflags a2 +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: fsflags a2 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: bltu a0, a1, .LBB45_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: mv a0, a1 +; RV32ZVFH-NEXT: .LBB45_2: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFH-NEXT: frflags a0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: fsflags a0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_nearbyint_nxv16f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: csrr a1, vlenb +; RV64ZVFH-NEXT: li a2, 1075 +; RV64ZVFH-NEXT: sub a3, a0, a1 +; RV64ZVFH-NEXT: slli a2, a2, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a2 +; RV64ZVFH-NEXT: sltu a2, a0, a3 +; RV64ZVFH-NEXT: addi a2, a2, -1 +; RV64ZVFH-NEXT: and a2, a2, a3 +; RV64ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16 +; RV64ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFH-NEXT: frflags a2 +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: fsflags a2 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: bltu a0, a1, .LBB45_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: mv a0, a1 +; RV64ZVFH-NEXT: .LBB45_2: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8 +; RV64ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFH-NEXT: frflags a0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: fsflags a0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_nearbyint_nxv16f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: csrr a1, vlenb +; RV32ZVFHMIN-NEXT: lui a2, %hi(.LCPI45_0) +; RV32ZVFHMIN-NEXT: sub a3, a0, a1 +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI45_0)(a2) +; RV32ZVFHMIN-NEXT: sltu a2, a0, a3 +; RV32ZVFHMIN-NEXT: addi a2, a2, -1 +; RV32ZVFHMIN-NEXT: and a2, a2, a3 +; RV32ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFHMIN-NEXT: frflags a2 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: fsflags a2 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: bltu a0, a1, .LBB45_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: mv a0, a1 +; RV32ZVFHMIN-NEXT: .LBB45_2: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFHMIN-NEXT: frflags a0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsflags a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_nearbyint_nxv16f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: csrr a1, vlenb +; RV64ZVFHMIN-NEXT: li a2, 1075 +; RV64ZVFHMIN-NEXT: sub a3, a0, a1 +; RV64ZVFHMIN-NEXT: slli a2, a2, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a2 +; RV64ZVFHMIN-NEXT: sltu a2, a0, a3 +; RV64ZVFHMIN-NEXT: addi a2, a2, -1 +; RV64ZVFHMIN-NEXT: and a2, a2, a3 +; RV64ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFHMIN-NEXT: frflags a2 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: fsflags a2 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: bltu a0, a1, .LBB45_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: mv a0, a1 +; RV64ZVFHMIN-NEXT: .LBB45_2: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFHMIN-NEXT: frflags a0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsflags a0 +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv16f64( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll index a9505dca97529..091caa6c65fd2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV32ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV64ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV32ZVFMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN declare @llvm.vp.rint.nxv1bf16(, , i32) @@ -379,10 +379,11 @@ declare @llvm.vp.rint.nxv1f16(, @vp_rint_nxv1f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma @@ -421,10 +422,11 @@ define @vp_rint_nxv1f16( %va, @vp_rint_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -457,10 +459,11 @@ declare @llvm.vp.rint.nxv2f16(, @vp_rint_nxv2f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma @@ -499,10 +502,11 @@ define @vp_rint_nxv2f16( %va, @vp_rint_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -535,10 +539,11 @@ declare @llvm.vp.rint.nxv4f16(, @vp_rint_nxv4f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -577,10 +582,11 @@ define @vp_rint_nxv4f16( %va, @vp_rint_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -615,9 +621,10 @@ define @vp_rint_nxv8f16( %va, @vp_rint_nxv8f16( %va, @vp_rint_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -695,9 +703,10 @@ define @vp_rint_nxv16f16( %va, @vp_rint_nxv16f16( %va, @vp_rint_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -775,9 +785,10 @@ define @vp_rint_nxv32f16( %va, @vp_rint_nxv32f16( %va, @vp_rint_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1110,37 +1122,125 @@ define @vp_rint_nxv16f32_unmasked( %v declare @llvm.vp.rint.nxv1f64(, , i32) define @vp_rint_nxv1f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI34_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_rint_nxv1f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI34_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI34_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_rint_nxv1f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFMIN-LABEL: vp_rint_nxv1f64: +; RV32ZVFMIN: # %bb.0: +; RV32ZVFMIN-NEXT: lui a1, %hi(.LCPI34_0) +; RV32ZVFMIN-NEXT: fld fa5, %lo(.LCPI34_0)(a1) +; RV32ZVFMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFMIN-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_rint_nxv1f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv1f64( %va, %m, i32 %evl) ret %v } define @vp_rint_nxv1f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv1f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI35_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_rint_nxv1f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI35_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI35_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_rint_nxv1f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFMIN-LABEL: vp_rint_nxv1f64_unmasked: +; RV32ZVFMIN: # %bb.0: +; RV32ZVFMIN-NEXT: lui a1, %hi(.LCPI35_0) +; RV32ZVFMIN-NEXT: fld fa5, %lo(.LCPI35_0)(a1) +; RV32ZVFMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_rint_nxv1f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv1f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1148,39 +1248,133 @@ define @vp_rint_nxv1f64_unmasked( %va declare @llvm.vp.rint.nxv2f64(, , i32) define @vp_rint_nxv2f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI36_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_rint_nxv2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v10, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI36_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI36_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFH-NEXT: vmv1r.v v0, v10 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_rint_nxv2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v10, v0 +; RV64ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFH-NEXT: vmv1r.v v0, v10 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFMIN-LABEL: vp_rint_nxv2f64: +; RV32ZVFMIN: # %bb.0: +; RV32ZVFMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFMIN-NEXT: vmv1r.v v10, v0 +; RV32ZVFMIN-NEXT: lui a0, %hi(.LCPI36_0) +; RV32ZVFMIN-NEXT: fld fa5, %lo(.LCPI36_0)(a0) +; RV32ZVFMIN-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFMIN-NEXT: vmv1r.v v0, v10 +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_rint_nxv2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv2f64( %va, %m, i32 %evl) ret %v } define @vp_rint_nxv2f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv2f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI37_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_rint_nxv2f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI37_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI37_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfabs.v v10, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_rint_nxv2f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfabs.v v10, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFMIN-LABEL: vp_rint_nxv2f64_unmasked: +; RV32ZVFMIN: # %bb.0: +; RV32ZVFMIN-NEXT: lui a1, %hi(.LCPI37_0) +; RV32ZVFMIN-NEXT: fld fa5, %lo(.LCPI37_0)(a1) +; RV32ZVFMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFMIN-NEXT: vfabs.v v10, v8 +; RV32ZVFMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_rint_nxv2f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv2f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1188,39 +1382,133 @@ define @vp_rint_nxv2f64_unmasked( %va declare @llvm.vp.rint.nxv4f64(, , i32) define @vp_rint_nxv4f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_rint_nxv4f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v12, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI38_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI38_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFH-NEXT: vmv1r.v v0, v12 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_rint_nxv4f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v12, v0 +; RV64ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFH-NEXT: vmv1r.v v0, v12 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFMIN-LABEL: vp_rint_nxv4f64: +; RV32ZVFMIN: # %bb.0: +; RV32ZVFMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFMIN-NEXT: vmv1r.v v12, v0 +; RV32ZVFMIN-NEXT: lui a0, %hi(.LCPI38_0) +; RV32ZVFMIN-NEXT: fld fa5, %lo(.LCPI38_0)(a0) +; RV32ZVFMIN-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFMIN-NEXT: vmv1r.v v0, v12 +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_rint_nxv4f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv4f64( %va, %m, i32 %evl) ret %v } define @vp_rint_nxv4f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv4f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI39_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_rint_nxv4f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI39_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI39_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfabs.v v12, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_rint_nxv4f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfabs.v v12, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFMIN-LABEL: vp_rint_nxv4f64_unmasked: +; RV32ZVFMIN: # %bb.0: +; RV32ZVFMIN-NEXT: lui a1, %hi(.LCPI39_0) +; RV32ZVFMIN-NEXT: fld fa5, %lo(.LCPI39_0)(a1) +; RV32ZVFMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFMIN-NEXT: vfabs.v v12, v8 +; RV32ZVFMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_rint_nxv4f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv4f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1228,39 +1516,133 @@ define @vp_rint_nxv4f64_unmasked( %va declare @llvm.vp.rint.nxv7f64(, , i32) define @vp_rint_nxv7f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv7f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI40_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_rint_nxv7f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI40_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI40_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_rint_nxv7f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFMIN-LABEL: vp_rint_nxv7f64: +; RV32ZVFMIN: # %bb.0: +; RV32ZVFMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFMIN-NEXT: lui a0, %hi(.LCPI40_0) +; RV32ZVFMIN-NEXT: fld fa5, %lo(.LCPI40_0)(a0) +; RV32ZVFMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_rint_nxv7f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv7f64( %va, %m, i32 %evl) ret %v } define @vp_rint_nxv7f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv7f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI41_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_rint_nxv7f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI41_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI41_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_rint_nxv7f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFMIN-LABEL: vp_rint_nxv7f64_unmasked: +; RV32ZVFMIN: # %bb.0: +; RV32ZVFMIN-NEXT: lui a1, %hi(.LCPI41_0) +; RV32ZVFMIN-NEXT: fld fa5, %lo(.LCPI41_0)(a1) +; RV32ZVFMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_rint_nxv7f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv7f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1268,39 +1650,133 @@ define @vp_rint_nxv7f64_unmasked( %va declare @llvm.vp.rint.nxv8f64(, , i32) define @vp_rint_nxv8f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI42_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_rint_nxv8f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI42_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI42_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_rint_nxv8f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFMIN-LABEL: vp_rint_nxv8f64: +; RV32ZVFMIN: # %bb.0: +; RV32ZVFMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFMIN-NEXT: lui a0, %hi(.LCPI42_0) +; RV32ZVFMIN-NEXT: fld fa5, %lo(.LCPI42_0)(a0) +; RV32ZVFMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_rint_nxv8f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv8f64( %va, %m, i32 %evl) ret %v } define @vp_rint_nxv8f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv8f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI43_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_rint_nxv8f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI43_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI43_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_rint_nxv8f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFMIN-LABEL: vp_rint_nxv8f64_unmasked: +; RV32ZVFMIN: # %bb.0: +; RV32ZVFMIN-NEXT: lui a1, %hi(.LCPI43_0) +; RV32ZVFMIN-NEXT: fld fa5, %lo(.LCPI43_0)(a1) +; RV32ZVFMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_rint_nxv8f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv8f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1309,79 +1785,293 @@ define @vp_rint_nxv8f64_unmasked( %va declare @llvm.vp.rint.nxv16f64(, , i32) define @vp_rint_nxv16f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI44_0) -; CHECK-NEXT: srli a3, a1, 3 -; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) -; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 -; CHECK-NEXT: sltu a3, a0, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB44_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_rint_nxv16f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v7, v0 +; RV32ZVFH-NEXT: csrr a1, vlenb +; RV32ZVFH-NEXT: lui a2, %hi(.LCPI44_0) +; RV32ZVFH-NEXT: srli a3, a1, 3 +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI44_0)(a2) +; RV32ZVFH-NEXT: sub a2, a0, a1 +; RV32ZVFH-NEXT: vslidedown.vx v6, v0, a3 +; RV32ZVFH-NEXT: sltu a3, a0, a2 +; RV32ZVFH-NEXT: addi a3, a3, -1 +; RV32ZVFH-NEXT: and a2, a3, a2 +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: bltu a0, a1, .LBB44_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: mv a0, a1 +; RV32ZVFH-NEXT: .LBB44_2: +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_rint_nxv16f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v7, v0 +; RV64ZVFH-NEXT: csrr a1, vlenb +; RV64ZVFH-NEXT: li a2, 1075 +; RV64ZVFH-NEXT: srli a3, a1, 3 +; RV64ZVFH-NEXT: vslidedown.vx v6, v0, a3 +; RV64ZVFH-NEXT: sub a3, a0, a1 +; RV64ZVFH-NEXT: slli a2, a2, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a2 +; RV64ZVFH-NEXT: sltu a2, a0, a3 +; RV64ZVFH-NEXT: addi a2, a2, -1 +; RV64ZVFH-NEXT: and a2, a2, a3 +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: bltu a0, a1, .LBB44_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: mv a0, a1 +; RV64ZVFH-NEXT: .LBB44_2: +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFMIN-LABEL: vp_rint_nxv16f64: +; RV32ZVFMIN: # %bb.0: +; RV32ZVFMIN-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32ZVFMIN-NEXT: vmv1r.v v7, v0 +; RV32ZVFMIN-NEXT: csrr a1, vlenb +; RV32ZVFMIN-NEXT: lui a2, %hi(.LCPI44_0) +; RV32ZVFMIN-NEXT: srli a3, a1, 3 +; RV32ZVFMIN-NEXT: fld fa5, %lo(.LCPI44_0)(a2) +; RV32ZVFMIN-NEXT: sub a2, a0, a1 +; RV32ZVFMIN-NEXT: vslidedown.vx v6, v0, a3 +; RV32ZVFMIN-NEXT: sltu a3, a0, a2 +; RV32ZVFMIN-NEXT: addi a3, a3, -1 +; RV32ZVFMIN-NEXT: and a2, a3, a2 +; RV32ZVFMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFMIN-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFMIN-NEXT: bltu a0, a1, .LBB44_2 +; RV32ZVFMIN-NEXT: # %bb.1: +; RV32ZVFMIN-NEXT: mv a0, a1 +; RV32ZVFMIN-NEXT: .LBB44_2: +; RV32ZVFMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_rint_nxv16f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v7, v0 +; RV64ZVFHMIN-NEXT: csrr a1, vlenb +; RV64ZVFHMIN-NEXT: li a2, 1075 +; RV64ZVFHMIN-NEXT: srli a3, a1, 3 +; RV64ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3 +; RV64ZVFHMIN-NEXT: sub a3, a0, a1 +; RV64ZVFHMIN-NEXT: slli a2, a2, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a2 +; RV64ZVFHMIN-NEXT: sltu a2, a0, a3 +; RV64ZVFHMIN-NEXT: addi a2, a2, -1 +; RV64ZVFHMIN-NEXT: and a2, a2, a3 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: bltu a0, a1, .LBB44_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: mv a0, a1 +; RV64ZVFHMIN-NEXT: .LBB44_2: +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv16f64( %va, %m, i32 %evl) ret %v } define @vp_rint_nxv16f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv16f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI45_0) -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a2) -; CHECK-NEXT: sltu a2, a0, a3 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB45_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB45_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_rint_nxv16f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: csrr a1, vlenb +; RV32ZVFH-NEXT: lui a2, %hi(.LCPI45_0) +; RV32ZVFH-NEXT: sub a3, a0, a1 +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI45_0)(a2) +; RV32ZVFH-NEXT: sltu a2, a0, a3 +; RV32ZVFH-NEXT: addi a2, a2, -1 +; RV32ZVFH-NEXT: and a2, a2, a3 +; RV32ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16 +; RV32ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: bltu a0, a1, .LBB45_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: mv a0, a1 +; RV32ZVFH-NEXT: .LBB45_2: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_rint_nxv16f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: csrr a1, vlenb +; RV64ZVFH-NEXT: li a2, 1075 +; RV64ZVFH-NEXT: sub a3, a0, a1 +; RV64ZVFH-NEXT: slli a2, a2, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a2 +; RV64ZVFH-NEXT: sltu a2, a0, a3 +; RV64ZVFH-NEXT: addi a2, a2, -1 +; RV64ZVFH-NEXT: and a2, a2, a3 +; RV64ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16 +; RV64ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: bltu a0, a1, .LBB45_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: mv a0, a1 +; RV64ZVFH-NEXT: .LBB45_2: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8 +; RV64ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFMIN-LABEL: vp_rint_nxv16f64_unmasked: +; RV32ZVFMIN: # %bb.0: +; RV32ZVFMIN-NEXT: csrr a1, vlenb +; RV32ZVFMIN-NEXT: lui a2, %hi(.LCPI45_0) +; RV32ZVFMIN-NEXT: sub a3, a0, a1 +; RV32ZVFMIN-NEXT: fld fa5, %lo(.LCPI45_0)(a2) +; RV32ZVFMIN-NEXT: sltu a2, a0, a3 +; RV32ZVFMIN-NEXT: addi a2, a2, -1 +; RV32ZVFMIN-NEXT: and a2, a2, a3 +; RV32ZVFMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFMIN-NEXT: vfabs.v v24, v16 +; RV32ZVFMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFMIN-NEXT: bltu a0, a1, .LBB45_2 +; RV32ZVFMIN-NEXT: # %bb.1: +; RV32ZVFMIN-NEXT: mv a0, a1 +; RV32ZVFMIN-NEXT: .LBB45_2: +; RV32ZVFMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFMIN-NEXT: vfabs.v v24, v8 +; RV32ZVFMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_rint_nxv16f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: csrr a1, vlenb +; RV64ZVFHMIN-NEXT: li a2, 1075 +; RV64ZVFHMIN-NEXT: sub a3, a0, a1 +; RV64ZVFHMIN-NEXT: slli a2, a2, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a2 +; RV64ZVFHMIN-NEXT: sltu a2, a0, a3 +; RV64ZVFHMIN-NEXT: addi a2, a2, -1 +; RV64ZVFHMIN-NEXT: and a2, a2, a3 +; RV64ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: bltu a0, a1, .LBB45_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: mv a0, a1 +; RV64ZVFHMIN-NEXT: .LBB45_2: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv16f64( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll index ccbc0ebb3b73e..d1ea5aa76268a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV32ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV64ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN declare @llvm.vp.round.nxv1bf16(, , i32) @@ -407,10 +407,11 @@ declare @llvm.vp.round.nxv1f16(, @vp_round_nxv1f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 4 @@ -453,10 +454,11 @@ define @vp_round_nxv1f16( %va, @vp_round_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -493,10 +495,11 @@ declare @llvm.vp.round.nxv2f16(, @vp_round_nxv2f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 4 @@ -539,10 +542,11 @@ define @vp_round_nxv2f16( %va, @vp_round_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -579,10 +583,11 @@ declare @llvm.vp.round.nxv4f16(, @vp_round_nxv4f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 4 @@ -625,10 +630,11 @@ define @vp_round_nxv4f16( %va, @vp_round_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -667,9 +673,10 @@ define @vp_round_nxv8f16( %va, @vp_round_nxv8f16( %va, @vp_round_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -755,9 +763,10 @@ define @vp_round_nxv16f16( %va, @vp_round_nxv16f16( %va, @vp_round_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -843,9 +853,10 @@ define @vp_round_nxv32f16( %va, @vp_round_nxv32f16( %va, @vp_round_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -1210,41 +1222,141 @@ define @vp_round_nxv16f32_unmasked( % declare @llvm.vp.round.nxv1f64(, , i32) define @vp_round_nxv1f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI34_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_nxv1f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI34_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI34_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_nxv1f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_nxv1f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI34_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI34_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_nxv1f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv1f64( %va, %m, i32 %evl) ret %v } define @vp_round_nxv1f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv1f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI35_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_nxv1f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI35_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI35_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_nxv1f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_nxv1f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI35_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI35_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_nxv1f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv1f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1252,43 +1364,149 @@ define @vp_round_nxv1f64_unmasked( %v declare @llvm.vp.round.nxv2f64(, , i32) define @vp_round_nxv2f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI36_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_nxv2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v10, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI36_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI36_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vmv1r.v v0, v10 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_nxv2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v10, v0 +; RV64ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vmv1r.v v0, v10 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_nxv2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI36_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI36_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_nxv2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv2f64( %va, %m, i32 %evl) ret %v } define @vp_round_nxv2f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv2f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI37_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_nxv2f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI37_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI37_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfabs.v v10, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_nxv2f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfabs.v v10, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_nxv2f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI37_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI37_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_nxv2f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv2f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1296,43 +1514,149 @@ define @vp_round_nxv2f64_unmasked( %v declare @llvm.vp.round.nxv4f64(, , i32) define @vp_round_nxv4f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_nxv4f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v12, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI38_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI38_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vmv1r.v v0, v12 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_nxv4f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v12, v0 +; RV64ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vmv1r.v v0, v12 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_nxv4f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI38_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI38_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_nxv4f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv4f64( %va, %m, i32 %evl) ret %v } define @vp_round_nxv4f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv4f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI39_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_nxv4f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI39_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI39_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfabs.v v12, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_nxv4f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfabs.v v12, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_nxv4f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI39_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI39_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_nxv4f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv4f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1340,43 +1664,149 @@ define @vp_round_nxv4f64_unmasked( %v declare @llvm.vp.round.nxv7f64(, , i32) define @vp_round_nxv7f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv7f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI40_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_nxv7f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI40_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI40_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_nxv7f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_nxv7f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI40_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI40_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_nxv7f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv7f64( %va, %m, i32 %evl) ret %v } define @vp_round_nxv7f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv7f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI41_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_nxv7f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI41_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI41_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_nxv7f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_nxv7f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI41_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI41_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_nxv7f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv7f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1384,43 +1814,149 @@ define @vp_round_nxv7f64_unmasked( %v declare @llvm.vp.round.nxv8f64(, , i32) define @vp_round_nxv8f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI42_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_nxv8f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI42_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI42_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_nxv8f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_nxv8f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI42_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI42_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_nxv8f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv8f64( %va, %m, i32 %evl) ret %v } define @vp_round_nxv8f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv8f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI43_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_nxv8f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI43_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI43_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_nxv8f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_nxv8f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI43_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI43_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_nxv8f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv8f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1429,87 +1965,325 @@ define @vp_round_nxv8f64_unmasked( %v declare @llvm.vp.round.nxv16f64(, , i32) define @vp_round_nxv16f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI44_0) -; CHECK-NEXT: srli a3, a1, 3 -; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) -; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 -; CHECK-NEXT: sltu a3, a0, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a2, 4 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB44_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_nxv16f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v7, v0 +; RV32ZVFH-NEXT: csrr a1, vlenb +; RV32ZVFH-NEXT: lui a2, %hi(.LCPI44_0) +; RV32ZVFH-NEXT: srli a3, a1, 3 +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI44_0)(a2) +; RV32ZVFH-NEXT: sub a2, a0, a1 +; RV32ZVFH-NEXT: vslidedown.vx v6, v0, a3 +; RV32ZVFH-NEXT: sltu a3, a0, a2 +; RV32ZVFH-NEXT: addi a3, a3, -1 +; RV32ZVFH-NEXT: and a2, a3, a2 +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a2, 4 +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: fsrm a2 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: bltu a0, a1, .LBB44_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: mv a0, a1 +; RV32ZVFH-NEXT: .LBB44_2: +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_nxv16f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v7, v0 +; RV64ZVFH-NEXT: csrr a1, vlenb +; RV64ZVFH-NEXT: li a2, 1075 +; RV64ZVFH-NEXT: srli a3, a1, 3 +; RV64ZVFH-NEXT: vslidedown.vx v6, v0, a3 +; RV64ZVFH-NEXT: sub a3, a0, a1 +; RV64ZVFH-NEXT: slli a2, a2, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a2 +; RV64ZVFH-NEXT: sltu a2, a0, a3 +; RV64ZVFH-NEXT: addi a2, a2, -1 +; RV64ZVFH-NEXT: and a2, a2, a3 +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a2, 4 +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: fsrm a2 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: bltu a0, a1, .LBB44_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: mv a0, a1 +; RV64ZVFH-NEXT: .LBB44_2: +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_nxv16f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v7, v0 +; RV32ZVFHMIN-NEXT: csrr a1, vlenb +; RV32ZVFHMIN-NEXT: lui a2, %hi(.LCPI44_0) +; RV32ZVFHMIN-NEXT: srli a3, a1, 3 +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI44_0)(a2) +; RV32ZVFHMIN-NEXT: sub a2, a0, a1 +; RV32ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3 +; RV32ZVFHMIN-NEXT: sltu a3, a0, a2 +; RV32ZVFHMIN-NEXT: addi a3, a3, -1 +; RV32ZVFHMIN-NEXT: and a2, a3, a2 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a2, 4 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: fsrm a2 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: bltu a0, a1, .LBB44_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: mv a0, a1 +; RV32ZVFHMIN-NEXT: .LBB44_2: +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_nxv16f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v7, v0 +; RV64ZVFHMIN-NEXT: csrr a1, vlenb +; RV64ZVFHMIN-NEXT: li a2, 1075 +; RV64ZVFHMIN-NEXT: srli a3, a1, 3 +; RV64ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3 +; RV64ZVFHMIN-NEXT: sub a3, a0, a1 +; RV64ZVFHMIN-NEXT: slli a2, a2, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a2 +; RV64ZVFHMIN-NEXT: sltu a2, a0, a3 +; RV64ZVFHMIN-NEXT: addi a2, a2, -1 +; RV64ZVFHMIN-NEXT: and a2, a2, a3 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a2, 4 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: fsrm a2 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: bltu a0, a1, .LBB44_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: mv a0, a1 +; RV64ZVFHMIN-NEXT: .LBB44_2: +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv16f64( %va, %m, i32 %evl) ret %v } define @vp_round_nxv16f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv16f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI45_0) -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a2) -; CHECK-NEXT: sltu a2, a0, a3 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a2, 4 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB45_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB45_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_round_nxv16f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: csrr a1, vlenb +; RV32ZVFH-NEXT: lui a2, %hi(.LCPI45_0) +; RV32ZVFH-NEXT: sub a3, a0, a1 +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI45_0)(a2) +; RV32ZVFH-NEXT: sltu a2, a0, a3 +; RV32ZVFH-NEXT: addi a2, a2, -1 +; RV32ZVFH-NEXT: and a2, a2, a3 +; RV32ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16 +; RV32ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFH-NEXT: fsrmi a2, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: fsrm a2 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: bltu a0, a1, .LBB45_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: mv a0, a1 +; RV32ZVFH-NEXT: .LBB45_2: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 4 +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_round_nxv16f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: csrr a1, vlenb +; RV64ZVFH-NEXT: li a2, 1075 +; RV64ZVFH-NEXT: sub a3, a0, a1 +; RV64ZVFH-NEXT: slli a2, a2, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a2 +; RV64ZVFH-NEXT: sltu a2, a0, a3 +; RV64ZVFH-NEXT: addi a2, a2, -1 +; RV64ZVFH-NEXT: and a2, a2, a3 +; RV64ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16 +; RV64ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFH-NEXT: fsrmi a2, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: fsrm a2 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: bltu a0, a1, .LBB45_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: mv a0, a1 +; RV64ZVFH-NEXT: .LBB45_2: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8 +; RV64ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 4 +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_round_nxv16f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: csrr a1, vlenb +; RV32ZVFHMIN-NEXT: lui a2, %hi(.LCPI45_0) +; RV32ZVFHMIN-NEXT: sub a3, a0, a1 +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI45_0)(a2) +; RV32ZVFHMIN-NEXT: sltu a2, a0, a3 +; RV32ZVFHMIN-NEXT: addi a2, a2, -1 +; RV32ZVFHMIN-NEXT: and a2, a2, a3 +; RV32ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a2, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: fsrm a2 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: bltu a0, a1, .LBB45_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: mv a0, a1 +; RV32ZVFHMIN-NEXT: .LBB45_2: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 4 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_round_nxv16f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: csrr a1, vlenb +; RV64ZVFHMIN-NEXT: li a2, 1075 +; RV64ZVFHMIN-NEXT: sub a3, a0, a1 +; RV64ZVFHMIN-NEXT: slli a2, a2, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a2 +; RV64ZVFHMIN-NEXT: sltu a2, a0, a3 +; RV64ZVFHMIN-NEXT: addi a2, a2, -1 +; RV64ZVFHMIN-NEXT: and a2, a2, a3 +; RV64ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a2, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: fsrm a2 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: bltu a0, a1, .LBB45_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: mv a0, a1 +; RV64ZVFHMIN-NEXT: .LBB45_2: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 4 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv16f64( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll index 3975423e6f985..23d0e97c1c82b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV32ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV64ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN declare @llvm.vp.roundeven.nxv1bf16(, , i32) @@ -407,10 +407,11 @@ declare @llvm.vp.roundeven.nxv1f16(, @vp_roundeven_nxv1f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 0 @@ -453,10 +454,11 @@ define @vp_roundeven_nxv1f16( %va, @vp_roundeven_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -493,10 +495,11 @@ declare @llvm.vp.roundeven.nxv2f16(, @vp_roundeven_nxv2f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 0 @@ -539,10 +542,11 @@ define @vp_roundeven_nxv2f16( %va, @vp_roundeven_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -579,10 +583,11 @@ declare @llvm.vp.roundeven.nxv4f16(, @vp_roundeven_nxv4f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 0 @@ -625,10 +630,11 @@ define @vp_roundeven_nxv4f16( %va, @vp_roundeven_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -667,9 +673,10 @@ define @vp_roundeven_nxv8f16( %va, @vp_roundeven_nxv8f16( %va, @vp_roundeven_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -755,9 +763,10 @@ define @vp_roundeven_nxv16f16( %va, @vp_roundeven_nxv16f16( %va, @vp_roundeven_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -843,9 +853,10 @@ define @vp_roundeven_nxv32f16( %va, @vp_roundeven_nxv32f16( %va, @vp_roundeven_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -1210,41 +1222,141 @@ define @vp_roundeven_nxv16f32_unmasked( @llvm.vp.roundeven.nxv1f64(, , i32) define @vp_roundeven_nxv1f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI34_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_nxv1f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI34_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI34_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_nxv1f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_nxv1f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI34_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI34_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_nxv1f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv1f64( %va, %m, i32 %evl) ret %v } define @vp_roundeven_nxv1f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv1f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI35_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_nxv1f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI35_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI35_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_nxv1f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_nxv1f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI35_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI35_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_nxv1f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv1f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1252,43 +1364,149 @@ define @vp_roundeven_nxv1f64_unmasked( @llvm.vp.roundeven.nxv2f64(, , i32) define @vp_roundeven_nxv2f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI36_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_nxv2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v10, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI36_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI36_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vmv1r.v v0, v10 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_nxv2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v10, v0 +; RV64ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vmv1r.v v0, v10 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_nxv2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI36_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI36_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_nxv2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv2f64( %va, %m, i32 %evl) ret %v } define @vp_roundeven_nxv2f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv2f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI37_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_nxv2f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI37_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI37_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfabs.v v10, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_nxv2f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfabs.v v10, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_nxv2f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI37_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI37_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_nxv2f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv2f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1296,43 +1514,149 @@ define @vp_roundeven_nxv2f64_unmasked( @llvm.vp.roundeven.nxv4f64(, , i32) define @vp_roundeven_nxv4f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_nxv4f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v12, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI38_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI38_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vmv1r.v v0, v12 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_nxv4f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v12, v0 +; RV64ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vmv1r.v v0, v12 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_nxv4f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI38_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI38_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_nxv4f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv4f64( %va, %m, i32 %evl) ret %v } define @vp_roundeven_nxv4f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv4f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI39_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_nxv4f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI39_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI39_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfabs.v v12, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_nxv4f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfabs.v v12, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_nxv4f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI39_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI39_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_nxv4f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv4f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1340,43 +1664,149 @@ define @vp_roundeven_nxv4f64_unmasked( @llvm.vp.roundeven.nxv7f64(, , i32) define @vp_roundeven_nxv7f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv7f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI40_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_nxv7f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI40_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI40_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_nxv7f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_nxv7f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI40_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI40_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_nxv7f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv7f64( %va, %m, i32 %evl) ret %v } define @vp_roundeven_nxv7f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv7f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI41_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_nxv7f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI41_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI41_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_nxv7f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_nxv7f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI41_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI41_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_nxv7f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv7f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1384,43 +1814,149 @@ define @vp_roundeven_nxv7f64_unmasked( @llvm.vp.roundeven.nxv8f64(, , i32) define @vp_roundeven_nxv8f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI42_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_nxv8f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI42_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI42_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_nxv8f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_nxv8f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI42_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI42_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_nxv8f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv8f64( %va, %m, i32 %evl) ret %v } define @vp_roundeven_nxv8f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv8f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI43_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_nxv8f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI43_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI43_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_nxv8f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_nxv8f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI43_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI43_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_nxv8f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv8f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1429,87 +1965,325 @@ define @vp_roundeven_nxv8f64_unmasked( @llvm.vp.roundeven.nxv16f64(, , i32) define @vp_roundeven_nxv16f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI44_0) -; CHECK-NEXT: srli a3, a1, 3 -; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) -; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 -; CHECK-NEXT: sltu a3, a0, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a2, 0 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB44_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_nxv16f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v7, v0 +; RV32ZVFH-NEXT: csrr a1, vlenb +; RV32ZVFH-NEXT: lui a2, %hi(.LCPI44_0) +; RV32ZVFH-NEXT: srli a3, a1, 3 +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI44_0)(a2) +; RV32ZVFH-NEXT: sub a2, a0, a1 +; RV32ZVFH-NEXT: vslidedown.vx v6, v0, a3 +; RV32ZVFH-NEXT: sltu a3, a0, a2 +; RV32ZVFH-NEXT: addi a3, a3, -1 +; RV32ZVFH-NEXT: and a2, a3, a2 +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a2, 0 +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: fsrm a2 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: bltu a0, a1, .LBB44_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: mv a0, a1 +; RV32ZVFH-NEXT: .LBB44_2: +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_nxv16f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v7, v0 +; RV64ZVFH-NEXT: csrr a1, vlenb +; RV64ZVFH-NEXT: li a2, 1075 +; RV64ZVFH-NEXT: srli a3, a1, 3 +; RV64ZVFH-NEXT: vslidedown.vx v6, v0, a3 +; RV64ZVFH-NEXT: sub a3, a0, a1 +; RV64ZVFH-NEXT: slli a2, a2, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a2 +; RV64ZVFH-NEXT: sltu a2, a0, a3 +; RV64ZVFH-NEXT: addi a2, a2, -1 +; RV64ZVFH-NEXT: and a2, a2, a3 +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a2, 0 +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: fsrm a2 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: bltu a0, a1, .LBB44_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: mv a0, a1 +; RV64ZVFH-NEXT: .LBB44_2: +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_nxv16f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v7, v0 +; RV32ZVFHMIN-NEXT: csrr a1, vlenb +; RV32ZVFHMIN-NEXT: lui a2, %hi(.LCPI44_0) +; RV32ZVFHMIN-NEXT: srli a3, a1, 3 +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI44_0)(a2) +; RV32ZVFHMIN-NEXT: sub a2, a0, a1 +; RV32ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3 +; RV32ZVFHMIN-NEXT: sltu a3, a0, a2 +; RV32ZVFHMIN-NEXT: addi a3, a3, -1 +; RV32ZVFHMIN-NEXT: and a2, a3, a2 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a2, 0 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: fsrm a2 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: bltu a0, a1, .LBB44_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: mv a0, a1 +; RV32ZVFHMIN-NEXT: .LBB44_2: +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_nxv16f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v7, v0 +; RV64ZVFHMIN-NEXT: csrr a1, vlenb +; RV64ZVFHMIN-NEXT: li a2, 1075 +; RV64ZVFHMIN-NEXT: srli a3, a1, 3 +; RV64ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3 +; RV64ZVFHMIN-NEXT: sub a3, a0, a1 +; RV64ZVFHMIN-NEXT: slli a2, a2, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a2 +; RV64ZVFHMIN-NEXT: sltu a2, a0, a3 +; RV64ZVFHMIN-NEXT: addi a2, a2, -1 +; RV64ZVFHMIN-NEXT: and a2, a2, a3 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a2, 0 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: fsrm a2 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: bltu a0, a1, .LBB44_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: mv a0, a1 +; RV64ZVFHMIN-NEXT: .LBB44_2: +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv16f64( %va, %m, i32 %evl) ret %v } define @vp_roundeven_nxv16f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv16f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI45_0) -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a2) -; CHECK-NEXT: sltu a2, a0, a3 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a2, 0 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB45_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB45_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundeven_nxv16f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: csrr a1, vlenb +; RV32ZVFH-NEXT: lui a2, %hi(.LCPI45_0) +; RV32ZVFH-NEXT: sub a3, a0, a1 +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI45_0)(a2) +; RV32ZVFH-NEXT: sltu a2, a0, a3 +; RV32ZVFH-NEXT: addi a2, a2, -1 +; RV32ZVFH-NEXT: and a2, a2, a3 +; RV32ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16 +; RV32ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFH-NEXT: fsrmi a2, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: fsrm a2 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: bltu a0, a1, .LBB45_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: mv a0, a1 +; RV32ZVFH-NEXT: .LBB45_2: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 0 +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundeven_nxv16f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: csrr a1, vlenb +; RV64ZVFH-NEXT: li a2, 1075 +; RV64ZVFH-NEXT: sub a3, a0, a1 +; RV64ZVFH-NEXT: slli a2, a2, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a2 +; RV64ZVFH-NEXT: sltu a2, a0, a3 +; RV64ZVFH-NEXT: addi a2, a2, -1 +; RV64ZVFH-NEXT: and a2, a2, a3 +; RV64ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16 +; RV64ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFH-NEXT: fsrmi a2, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: fsrm a2 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: bltu a0, a1, .LBB45_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: mv a0, a1 +; RV64ZVFH-NEXT: .LBB45_2: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8 +; RV64ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 0 +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundeven_nxv16f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: csrr a1, vlenb +; RV32ZVFHMIN-NEXT: lui a2, %hi(.LCPI45_0) +; RV32ZVFHMIN-NEXT: sub a3, a0, a1 +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI45_0)(a2) +; RV32ZVFHMIN-NEXT: sltu a2, a0, a3 +; RV32ZVFHMIN-NEXT: addi a2, a2, -1 +; RV32ZVFHMIN-NEXT: and a2, a2, a3 +; RV32ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a2, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: fsrm a2 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: bltu a0, a1, .LBB45_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: mv a0, a1 +; RV32ZVFHMIN-NEXT: .LBB45_2: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 0 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundeven_nxv16f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: csrr a1, vlenb +; RV64ZVFHMIN-NEXT: li a2, 1075 +; RV64ZVFHMIN-NEXT: sub a3, a0, a1 +; RV64ZVFHMIN-NEXT: slli a2, a2, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a2 +; RV64ZVFHMIN-NEXT: sltu a2, a0, a3 +; RV64ZVFHMIN-NEXT: addi a2, a2, -1 +; RV64ZVFHMIN-NEXT: and a2, a2, a3 +; RV64ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a2, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: fsrm a2 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: bltu a0, a1, .LBB45_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: mv a0, a1 +; RV64ZVFHMIN-NEXT: .LBB45_2: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 0 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv16f64( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll index 7f617f48862c4..4d8066d12c9ad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV32ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFH +; RUN: --check-prefixes=CHECK,ZVFH,RV64ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN declare @llvm.vp.roundtozero.nxv1bf16(, , i32) @@ -407,10 +407,11 @@ declare @llvm.vp.roundtozero.nxv1f16(, @vp_roundtozero_nxv1f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 1 @@ -453,10 +454,11 @@ define @vp_roundtozero_nxv1f16( %va, @vp_roundtozero_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -493,10 +495,11 @@ declare @llvm.vp.roundtozero.nxv2f16(, @vp_roundtozero_nxv2f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 1 @@ -539,10 +542,11 @@ define @vp_roundtozero_nxv2f16( %va, @vp_roundtozero_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -579,10 +583,11 @@ declare @llvm.vp.roundtozero.nxv4f16(, @vp_roundtozero_nxv4f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 1 @@ -625,10 +630,11 @@ define @vp_roundtozero_nxv4f16( %va, @vp_roundtozero_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -667,9 +673,10 @@ define @vp_roundtozero_nxv8f16( %va, @vp_roundtozero_nxv8f16( %va, @vp_roundtozero_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -755,9 +763,10 @@ define @vp_roundtozero_nxv16f16( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vmv1r.v v12, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI20_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a0) ; ZVFH-NEXT: vfabs.v v16, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 1 @@ -801,10 +810,11 @@ define @vp_roundtozero_nxv16f16( %va, < define @vp_roundtozero_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -843,9 +853,10 @@ define @vp_roundtozero_nxv32f16( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vmv1r.v v16, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI22_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a0) ; ZVFH-NEXT: vfabs.v v24, v8, v0.t +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 1 @@ -922,10 +933,11 @@ define @vp_roundtozero_nxv32f16( %va, < define @vp_roundtozero_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: li a0, 25 +; ZVFH-NEXT: slli a0, a0, 10 +; ZVFH-NEXT: fmv.h.x fa5, a0 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -1210,41 +1222,141 @@ define @vp_roundtozero_nxv16f32_unmasked( @llvm.vp.roundtozero.nxv1f64(, , i32) define @vp_roundtozero_nxv1f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI34_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_nxv1f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI34_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI34_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_nxv1f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_nxv1f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI34_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI34_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_nxv1f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv1f64( %va, %m, i32 %evl) ret %v } define @vp_roundtozero_nxv1f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv1f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI35_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_nxv1f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI35_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI35_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFH-NEXT: vfabs.v v9, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_nxv1f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFH-NEXT: vfabs.v v9, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_nxv1f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI35_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI35_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_nxv1f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v9, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv1f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1252,43 +1364,149 @@ define @vp_roundtozero_nxv1f64_unmasked( @llvm.vp.roundtozero.nxv2f64(, , i32) define @vp_roundtozero_nxv2f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI36_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_nxv2f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v10, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI36_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI36_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vmv1r.v v0, v10 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_nxv2f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v10, v0 +; RV64ZVFH-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vmv1r.v v0, v10 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_nxv2f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI36_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI36_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_nxv2f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v10, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v10 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv2f64( %va, %m, i32 %evl) ret %v } define @vp_roundtozero_nxv2f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv2f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI37_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_nxv2f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI37_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI37_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFH-NEXT: vfabs.v v10, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_nxv2f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFH-NEXT: vfabs.v v10, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_nxv2f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI37_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI37_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_nxv2f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v10, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv2f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1296,43 +1514,149 @@ define @vp_roundtozero_nxv2f64_unmasked( @llvm.vp.roundtozero.nxv4f64(, , i32) define @vp_roundtozero_nxv4f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_nxv4f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v12, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI38_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI38_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vmv1r.v v0, v12 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_nxv4f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v12, v0 +; RV64ZVFH-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vmv1r.v v0, v12 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_nxv4f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI38_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI38_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_nxv4f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v12, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v12 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv4f64( %va, %m, i32 %evl) ret %v } define @vp_roundtozero_nxv4f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv4f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI39_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_nxv4f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI39_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI39_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFH-NEXT: vfabs.v v12, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_nxv4f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFH-NEXT: vfabs.v v12, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_nxv4f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI39_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI39_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_nxv4f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v12, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv4f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1340,43 +1664,149 @@ define @vp_roundtozero_nxv4f64_unmasked( @llvm.vp.roundtozero.nxv7f64(, , i32) define @vp_roundtozero_nxv7f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv7f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI40_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_nxv7f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI40_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI40_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_nxv7f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_nxv7f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI40_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI40_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_nxv7f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv7f64( %va, %m, i32 %evl) ret %v } define @vp_roundtozero_nxv7f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv7f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI41_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_nxv7f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI41_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI41_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_nxv7f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_nxv7f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI41_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI41_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_nxv7f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv7f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1384,43 +1814,149 @@ define @vp_roundtozero_nxv7f64_unmasked( @llvm.vp.roundtozero.nxv8f64(, , i32) define @vp_roundtozero_nxv8f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI42_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_nxv8f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v16, v0 +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI42_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI42_0)(a0) +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vmv1r.v v0, v16 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_nxv8f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v16, v0 +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vmv1r.v v0, v16 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_nxv8f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV32ZVFHMIN-NEXT: lui a0, %hi(.LCPI42_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI42_0)(a0) +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_nxv8f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v16, v0 +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv8f64( %va, %m, i32 %evl) ret %v } define @vp_roundtozero_nxv8f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv8f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI43_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_nxv8f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a1, %hi(.LCPI43_0) +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI43_0)(a1) +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v16, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_nxv8f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v16, v8 +; RV64ZVFH-NEXT: li a0, 1075 +; RV64ZVFH-NEXT: slli a0, a0, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a0 +; RV64ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_nxv8f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a1, %hi(.LCPI43_0) +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI43_0)(a1) +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_nxv8f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v16, v8 +; RV64ZVFHMIN-NEXT: li a0, 1075 +; RV64ZVFHMIN-NEXT: slli a0, a0, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a0 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv8f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1429,87 +1965,325 @@ define @vp_roundtozero_nxv8f64_unmasked( @llvm.vp.roundtozero.nxv16f64(, , i32) define @vp_roundtozero_nxv16f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI44_0) -; CHECK-NEXT: srli a3, a1, 3 -; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) -; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 -; CHECK-NEXT: sltu a3, a0, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a2, 1 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB44_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_nxv16f64: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32ZVFH-NEXT: vmv1r.v v7, v0 +; RV32ZVFH-NEXT: csrr a1, vlenb +; RV32ZVFH-NEXT: lui a2, %hi(.LCPI44_0) +; RV32ZVFH-NEXT: srli a3, a1, 3 +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI44_0)(a2) +; RV32ZVFH-NEXT: sub a2, a0, a1 +; RV32ZVFH-NEXT: vslidedown.vx v6, v0, a3 +; RV32ZVFH-NEXT: sltu a3, a0, a2 +; RV32ZVFH-NEXT: addi a3, a3, -1 +; RV32ZVFH-NEXT: and a2, a3, a2 +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a2, 1 +; RV32ZVFH-NEXT: vmv1r.v v0, v6 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: fsrm a2 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: bltu a0, a1, .LBB44_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: mv a0, a1 +; RV32ZVFH-NEXT: .LBB44_2: +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vmv1r.v v0, v7 +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_nxv16f64: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64ZVFH-NEXT: vmv1r.v v7, v0 +; RV64ZVFH-NEXT: csrr a1, vlenb +; RV64ZVFH-NEXT: li a2, 1075 +; RV64ZVFH-NEXT: srli a3, a1, 3 +; RV64ZVFH-NEXT: vslidedown.vx v6, v0, a3 +; RV64ZVFH-NEXT: sub a3, a0, a1 +; RV64ZVFH-NEXT: slli a2, a2, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a2 +; RV64ZVFH-NEXT: sltu a2, a0, a3 +; RV64ZVFH-NEXT: addi a2, a2, -1 +; RV64ZVFH-NEXT: and a2, a2, a3 +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a2, 1 +; RV64ZVFH-NEXT: vmv1r.v v0, v6 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: fsrm a2 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: bltu a0, a1, .LBB44_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: mv a0, a1 +; RV64ZVFH-NEXT: .LBB44_2: +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vmv1r.v v0, v7 +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_nxv16f64: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32ZVFHMIN-NEXT: vmv1r.v v7, v0 +; RV32ZVFHMIN-NEXT: csrr a1, vlenb +; RV32ZVFHMIN-NEXT: lui a2, %hi(.LCPI44_0) +; RV32ZVFHMIN-NEXT: srli a3, a1, 3 +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI44_0)(a2) +; RV32ZVFHMIN-NEXT: sub a2, a0, a1 +; RV32ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3 +; RV32ZVFHMIN-NEXT: sltu a3, a0, a2 +; RV32ZVFHMIN-NEXT: addi a3, a3, -1 +; RV32ZVFHMIN-NEXT: and a2, a3, a2 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a2, 1 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: fsrm a2 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: bltu a0, a1, .LBB44_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: mv a0, a1 +; RV32ZVFHMIN-NEXT: .LBB44_2: +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_nxv16f64: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64ZVFHMIN-NEXT: vmv1r.v v7, v0 +; RV64ZVFHMIN-NEXT: csrr a1, vlenb +; RV64ZVFHMIN-NEXT: li a2, 1075 +; RV64ZVFHMIN-NEXT: srli a3, a1, 3 +; RV64ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3 +; RV64ZVFHMIN-NEXT: sub a3, a0, a1 +; RV64ZVFHMIN-NEXT: slli a2, a2, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a2 +; RV64ZVFHMIN-NEXT: sltu a2, a0, a3 +; RV64ZVFHMIN-NEXT: addi a2, a2, -1 +; RV64ZVFHMIN-NEXT: and a2, a2, a3 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v6, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a2, 1 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v6 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: fsrm a2 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: bltu a0, a1, .LBB44_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: mv a0, a1 +; RV64ZVFHMIN-NEXT: .LBB44_2: +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vmflt.vf v7, v24, fa5, v0.t +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vmv1r.v v0, v7 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv16f64( %va, %m, i32 %evl) ret %v } define @vp_roundtozero_nxv16f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv16f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI45_0) -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a2) -; CHECK-NEXT: sltu a2, a0, a3 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a2, 1 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB45_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB45_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vp_roundtozero_nxv16f64_unmasked: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: csrr a1, vlenb +; RV32ZVFH-NEXT: lui a2, %hi(.LCPI45_0) +; RV32ZVFH-NEXT: sub a3, a0, a1 +; RV32ZVFH-NEXT: fld fa5, %lo(.LCPI45_0)(a2) +; RV32ZVFH-NEXT: sltu a2, a0, a3 +; RV32ZVFH-NEXT: addi a2, a2, -1 +; RV32ZVFH-NEXT: and a2, a2, a3 +; RV32ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v16 +; RV32ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFH-NEXT: fsrmi a2, 1 +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFH-NEXT: fsrm a2 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFH-NEXT: bltu a0, a1, .LBB45_2 +; RV32ZVFH-NEXT: # %bb.1: +; RV32ZVFH-NEXT: mv a0, a1 +; RV32ZVFH-NEXT: .LBB45_2: +; RV32ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFH-NEXT: vfabs.v v24, v8 +; RV32ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFH-NEXT: fsrmi a0, 1 +; RV32ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFH-NEXT: fsrm a0 +; RV32ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_roundtozero_nxv16f64_unmasked: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: csrr a1, vlenb +; RV64ZVFH-NEXT: li a2, 1075 +; RV64ZVFH-NEXT: sub a3, a0, a1 +; RV64ZVFH-NEXT: slli a2, a2, 52 +; RV64ZVFH-NEXT: fmv.d.x fa5, a2 +; RV64ZVFH-NEXT: sltu a2, a0, a3 +; RV64ZVFH-NEXT: addi a2, a2, -1 +; RV64ZVFH-NEXT: and a2, a2, a3 +; RV64ZVFH-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v16 +; RV64ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFH-NEXT: fsrmi a2, 1 +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFH-NEXT: fsrm a2 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFH-NEXT: bltu a0, a1, .LBB45_2 +; RV64ZVFH-NEXT: # %bb.1: +; RV64ZVFH-NEXT: mv a0, a1 +; RV64ZVFH-NEXT: .LBB45_2: +; RV64ZVFH-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFH-NEXT: vfabs.v v24, v8 +; RV64ZVFH-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFH-NEXT: fsrmi a0, 1 +; RV64ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFH-NEXT: fsrm a0 +; RV64ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFH-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_roundtozero_nxv16f64_unmasked: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: csrr a1, vlenb +; RV32ZVFHMIN-NEXT: lui a2, %hi(.LCPI45_0) +; RV32ZVFHMIN-NEXT: sub a3, a0, a1 +; RV32ZVFHMIN-NEXT: fld fa5, %lo(.LCPI45_0)(a2) +; RV32ZVFHMIN-NEXT: sltu a2, a0, a3 +; RV32ZVFHMIN-NEXT: addi a2, a2, -1 +; RV32ZVFHMIN-NEXT: and a2, a2, a3 +; RV32ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a2, 1 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV32ZVFHMIN-NEXT: fsrm a2 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV32ZVFHMIN-NEXT: bltu a0, a1, .LBB45_2 +; RV32ZVFHMIN-NEXT: # %bb.1: +; RV32ZVFHMIN-NEXT: mv a0, a1 +; RV32ZVFHMIN-NEXT: .LBB45_2: +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV32ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV32ZVFHMIN-NEXT: fsrmi a0, 1 +; RV32ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV32ZVFHMIN-NEXT: fsrm a0 +; RV32ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_roundtozero_nxv16f64_unmasked: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: csrr a1, vlenb +; RV64ZVFHMIN-NEXT: li a2, 1075 +; RV64ZVFHMIN-NEXT: sub a3, a0, a1 +; RV64ZVFHMIN-NEXT: slli a2, a2, 52 +; RV64ZVFHMIN-NEXT: fmv.d.x fa5, a2 +; RV64ZVFHMIN-NEXT: sltu a2, a0, a3 +; RV64ZVFHMIN-NEXT: addi a2, a2, -1 +; RV64ZVFHMIN-NEXT: and a2, a2, a3 +; RV64ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v16 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a2, 1 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; RV64ZVFHMIN-NEXT: fsrm a2 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; RV64ZVFHMIN-NEXT: bltu a0, a1, .LBB45_2 +; RV64ZVFHMIN-NEXT: # %bb.1: +; RV64ZVFHMIN-NEXT: mv a0, a1 +; RV64ZVFHMIN-NEXT: .LBB45_2: +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfabs.v v24, v8 +; RV64ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; RV64ZVFHMIN-NEXT: fsrmi a0, 1 +; RV64ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; RV64ZVFHMIN-NEXT: fsrm a0 +; RV64ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; RV64ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv16f64( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp-combine.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp-combine.ll index 03e6e6b7a624d..7e580d1057525 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp-combine.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp-combine.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v,+m -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v,+m -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s declare @llvm.vp.fma.nxv1f64(, , , , i32) declare @llvm.vp.fneg.nxv1f64(, , i32) @@ -24,17 +24,30 @@ define @test1( %a, (fmul x, c1+c2) define @test2( %a, %m, i32 zeroext %evl) { -; CHECK-LABEL: test2: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI1_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: lui a1, %hi(.LCPI1_1) -; CHECK-NEXT: fld fa4, %lo(.LCPI1_1)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v9, fa5 -; CHECK-NEXT: vfadd.vf v9, v9, fa4, v0.t -; CHECK-NEXT: vfmul.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: test2: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, %hi(.LCPI1_0) +; RV32-NEXT: fld fa5, %lo(.LCPI1_0)(a1) +; RV32-NEXT: lui a1, %hi(.LCPI1_1) +; RV32-NEXT: fld fa4, %lo(.LCPI1_1)(a1) +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vfmv.v.f v9, fa5 +; RV32-NEXT: vfadd.vf v9, v9, fa4, v0.t +; RV32-NEXT: vfmul.vv v8, v8, v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: test2: +; RV64: # %bb.0: +; RV64-NEXT: li a1, 1025 +; RV64-NEXT: slli a1, a1, 52 +; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a1 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: slli a0, a0, 62 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vfadd.vf v9, v9, fa5, v0.t +; RV64-NEXT: vfmul.vv v8, v8, v9, v0.t +; RV64-NEXT: ret %t = call @llvm.vp.fmul.nxv1f64( %a, splat (double 2.0), %m, i32 %evl) %v = call fast @llvm.vp.fma.nxv1f64( %a, splat (double 4.0), %t, %m, i32 %evl) ret %v @@ -42,18 +55,32 @@ define @test2( %a, ; (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) define @test3( %a, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: test3: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI2_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI2_0)(a1) -; CHECK-NEXT: lui a1, %hi(.LCPI2_1) -; CHECK-NEXT: fld fa4, %lo(.LCPI2_1)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v10, fa5 -; CHECK-NEXT: vfmul.vf v10, v10, fa4, v0.t -; CHECK-NEXT: vfmadd.vv v10, v8, v9, v0.t -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret +; RV32-LABEL: test3: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, %hi(.LCPI2_0) +; RV32-NEXT: fld fa5, %lo(.LCPI2_0)(a1) +; RV32-NEXT: lui a1, %hi(.LCPI2_1) +; RV32-NEXT: fld fa4, %lo(.LCPI2_1)(a1) +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vfmv.v.f v10, fa5 +; RV32-NEXT: vfmul.vf v10, v10, fa4, v0.t +; RV32-NEXT: vfmadd.vv v10, v8, v9, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: test3: +; RV64: # %bb.0: +; RV64-NEXT: li a1, 1025 +; RV64-NEXT: slli a1, a1, 52 +; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v10, a1 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: slli a0, a0, 62 +; RV64-NEXT: fmv.d.x fa5, a0 +; RV64-NEXT: vfmul.vf v10, v10, fa5, v0.t +; RV64-NEXT: vfmadd.vv v10, v8, v9, v0.t +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret %t = call @llvm.vp.fmul.nxv1f64( %a, splat (double 2.0), %m, i32 %evl) %v = call fast @llvm.vp.fma.nxv1f64( %t, splat (double 4.0), %b, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll index e269b13137d44..93b12ad14d7e1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll @@ -100,8 +100,9 @@ define half @vreduce_fminimum_nxv4f16( %val) { ; ZVFH-NEXT: vcpop.m a0, v9 ; ZVFH-NEXT: beqz a0, .LBB4_2 ; ZVFH-NEXT: # %bb.1: -; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa0, %lo(.LCPI4_0)(a0) +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -512 +; ZVFH-NEXT: fmv.h.x fa0, a0 ; ZVFH-NEXT: ret ; ZVFH-NEXT: .LBB4_2: ; ZVFH-NEXT: vfredmin.vs v8, v8, v8 @@ -138,8 +139,9 @@ define half @vreduce_fmaximum_nxv4f16( %val) { ; ZVFH-NEXT: vcpop.m a0, v9 ; ZVFH-NEXT: beqz a0, .LBB5_2 ; ZVFH-NEXT: # %bb.1: -; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa0, %lo(.LCPI5_0)(a0) +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -512 +; ZVFH-NEXT: fmv.h.x fa0, a0 ; ZVFH-NEXT: ret ; ZVFH-NEXT: .LBB5_2: ; ZVFH-NEXT: vfredmax.vs v8, v8, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll index 78aae96242fd3..861998a2ba51a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -998,13 +998,13 @@ declare half @llvm.vector.reduce.fmin.nxv10f16() define half @vreduce_fmin_nxv10f16( %v) { ; CHECK-LABEL: vreduce_fmin_nxv10f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI73_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI73_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v12, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a1, a0, 2 ; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: lui a1, 8 +; CHECK-NEXT: addi a1, a1, -512 +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v12, a1 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfredmin.vs v12, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v12 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll index 8993bf8a767d8..7fb26fb6f6258 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll @@ -124,8 +124,9 @@ define half @vpreduce_fminimum_nxv4f16(half %start, %val, %val, @test4(i64 %avl, i8 zeroext %cond, @test6(i64 %avl, i8 zeroext %cond,