170 changes: 77 additions & 93 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -610,29 +610,29 @@ define <8 x i64> @vp_bswap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
; RV32-NEXT: vand.vx v16, v8, a2, v0.t
; RV32-NEXT: li a3, 40
; RV32-NEXT: vsll.vx v16, v16, a3, v0.t
; RV32-NEXT: vor.vv v16, v12, v16, v0.t
; RV32-NEXT: vor.vv v12, v12, v16, v0.t
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v12, v8, a4, v0.t
; RV32-NEXT: vsll.vi v20, v12, 24, v0.t
; RV32-NEXT: vand.vx v16, v8, a4, v0.t
; RV32-NEXT: vsll.vi v16, v16, 24, v0.t
; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a5), zero
; RV32-NEXT: vlse64.v v20, (a5), zero
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vand.vv v24, v8, v12, v0.t
; RV32-NEXT: vand.vv v24, v8, v20, v0.t
; RV32-NEXT: vsll.vi v24, v24, 8, v0.t
; RV32-NEXT: vor.vv v20, v20, v24, v0.t
; RV32-NEXT: vor.vv v16, v16, v20, v0.t
; RV32-NEXT: vsrl.vx v20, v8, a1, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
; RV32-NEXT: vor.vv v12, v12, v16, v0.t
; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t
; RV32-NEXT: vand.vx v24, v24, a2, v0.t
; RV32-NEXT: vor.vv v20, v24, v20, v0.t
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
; RV32-NEXT: vand.vx v24, v24, a4, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
; RV32-NEXT: vand.vv v8, v8, v12, v0.t
; RV32-NEXT: vand.vv v8, v8, v20, v0.t
; RV32-NEXT: vor.vv v8, v8, v24, v0.t
; RV32-NEXT: vor.vv v8, v8, v20, v0.t
; RV32-NEXT: vor.vv v8, v16, v8, v0.t
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
; RV32-NEXT: vor.vv v8, v12, v8, v0.t
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
Expand Down Expand Up @@ -781,7 +781,9 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v16, v8, a4, v0.t
; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
; RV32-NEXT: vsll.vi v16, v16, 24, v0.t
; RV32-NEXT: addi a5, sp, 16
; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
Expand All @@ -791,36 +793,38 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev
; RV32-NEXT: addi a5, a5, 16
; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: vand.vv v24, v8, v16, v0.t
; RV32-NEXT: vsll.vi v24, v24, 8, v0.t
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v24, v16, v24, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v24, v16, v24, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t
; RV32-NEXT: vand.vx v16, v24, a2, v0.t
; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t
; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t
; RV32-NEXT: vand.vx v16, v16, a2, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
; RV32-NEXT: vand.vx v24, v24, a4, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: vor.vv v8, v8, v24, v0.t
; RV32-NEXT: vand.vv v16, v24, v16, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
; RV32-NEXT: vand.vx v8, v8, a4, v0.t
; RV32-NEXT: vor.vv v8, v16, v8, v0.t
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
Expand Down Expand Up @@ -857,27 +861,27 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev
; RV64-NEXT: addi a2, sp, 16
; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; RV64-NEXT: li a2, 56
; RV64-NEXT: vsll.vx v24, v8, a2, v0.t
; RV64-NEXT: lui a3, 16
; RV64-NEXT: addiw a3, a3, -256
; RV64-NEXT: vand.vx v24, v8, a3, v0.t
; RV64-NEXT: li a4, 40
; RV64-NEXT: vand.vx v16, v8, a3, v0.t
; RV64-NEXT: vsll.vx v16, v16, a4, v0.t
; RV64-NEXT: vor.vv v16, v24, v16, v0.t
; RV64-NEXT: vsll.vx v24, v24, a4, v0.t
; RV64-NEXT: vsll.vx v16, v8, a2, v0.t
; RV64-NEXT: vor.vv v16, v16, v24, v0.t
; RV64-NEXT: addi a5, sp, 16
; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
; RV64-NEXT: vor.vv v16, v16, v24, v0.t
; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t
; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t
; RV64-NEXT: vand.vx v16, v16, a3, v0.t
; RV64-NEXT: vor.vv v24, v16, v24, v0.t
; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t
; RV64-NEXT: vand.vx v16, v16, a1, v0.t
; RV64-NEXT: vsrl.vx v24, v8, a4, v0.t
; RV64-NEXT: vand.vx v24, v24, a3, v0.t
; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t
; RV64-NEXT: vor.vv v16, v24, v16, v0.t
; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t
; RV64-NEXT: vand.vx v24, v24, a1, v0.t
; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
; RV64-NEXT: vand.vx v8, v8, a0, v0.t
; RV64-NEXT: vor.vv v8, v8, v16, v0.t
; RV64-NEXT: vor.vv v8, v8, v24, v0.t
; RV64-NEXT: vor.vv v8, v8, v16, v0.t
; RV64-NEXT: addi a0, sp, 16
; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV64-NEXT: vor.vv v8, v16, v8, v0.t
Expand Down Expand Up @@ -1012,7 +1016,9 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v16, v8, a4, v0.t
; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
; RV32-NEXT: vsll.vi v16, v16, 24, v0.t
; RV32-NEXT: addi a5, sp, 16
; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
Expand All @@ -1022,36 +1028,38 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev
; RV32-NEXT: addi a5, a5, 16
; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: vand.vv v24, v8, v16, v0.t
; RV32-NEXT: vsll.vi v24, v24, 8, v0.t
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v24, v16, v24, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v24, v16, v24, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t
; RV32-NEXT: vand.vx v16, v24, a2, v0.t
; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t
; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t
; RV32-NEXT: vand.vx v16, v16, a2, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
; RV32-NEXT: vand.vx v24, v24, a4, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: vor.vv v8, v8, v24, v0.t
; RV32-NEXT: vand.vv v16, v24, v16, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
; RV32-NEXT: vand.vx v8, v8, a4, v0.t
; RV32-NEXT: vor.vv v8, v16, v8, v0.t
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
Expand Down Expand Up @@ -1088,27 +1096,27 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev
; RV64-NEXT: addi a2, sp, 16
; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; RV64-NEXT: li a2, 56
; RV64-NEXT: vsll.vx v24, v8, a2, v0.t
; RV64-NEXT: lui a3, 16
; RV64-NEXT: addiw a3, a3, -256
; RV64-NEXT: vand.vx v24, v8, a3, v0.t
; RV64-NEXT: li a4, 40
; RV64-NEXT: vand.vx v16, v8, a3, v0.t
; RV64-NEXT: vsll.vx v16, v16, a4, v0.t
; RV64-NEXT: vor.vv v16, v24, v16, v0.t
; RV64-NEXT: vsll.vx v24, v24, a4, v0.t
; RV64-NEXT: vsll.vx v16, v8, a2, v0.t
; RV64-NEXT: vor.vv v16, v16, v24, v0.t
; RV64-NEXT: addi a5, sp, 16
; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
; RV64-NEXT: vor.vv v16, v16, v24, v0.t
; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t
; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t
; RV64-NEXT: vand.vx v16, v16, a3, v0.t
; RV64-NEXT: vor.vv v24, v16, v24, v0.t
; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t
; RV64-NEXT: vand.vx v16, v16, a1, v0.t
; RV64-NEXT: vsrl.vx v24, v8, a4, v0.t
; RV64-NEXT: vand.vx v24, v24, a3, v0.t
; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t
; RV64-NEXT: vor.vv v16, v24, v16, v0.t
; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t
; RV64-NEXT: vand.vx v24, v24, a1, v0.t
; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
; RV64-NEXT: vand.vx v8, v8, a0, v0.t
; RV64-NEXT: vor.vv v8, v8, v16, v0.t
; RV64-NEXT: vor.vv v8, v8, v24, v0.t
; RV64-NEXT: vor.vv v8, v8, v16, v0.t
; RV64-NEXT: addi a0, sp, 16
; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV64-NEXT: vor.vv v8, v16, v8, v0.t
Expand Down Expand Up @@ -1217,51 +1225,27 @@ declare <128 x i16> @llvm.vp.bswap.v128i16(<128 x i16>, <128 x i1>, i32)
define <128 x i16> @vp_bswap_v128i16(<128 x i16> %va, <128 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_bswap_v128i16:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; CHECK-NEXT: li a2, 64
; CHECK-NEXT: vslidedown.vi v24, v0, 8
; CHECK-NEXT: vslidedown.vi v7, v0, 8
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
; CHECK-NEXT: vsrl.vi v24, v8, 8, v0.t
; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vor.vv v8, v8, v24, v0.t
; CHECK-NEXT: addi a1, a0, -64
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
; CHECK-NEXT: vor.vv v16, v8, v16, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsrl.vi v24, v16, 8, v0.t
; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t
; CHECK-NEXT: vor.vv v16, v16, v24, v0.t
; CHECK-NEXT: ret
%v = call <128 x i16> @llvm.vp.bswap.v128i16(<128 x i16> %va, <128 x i1> %m, i32 %evl)
ret <128 x i16> %v
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,14 +126,14 @@ define <4 x i32> @udiv_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-NEXT: vsub.vv v10, v8, v9
; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: lui a0, 524288
; CHECK-NEXT: lui a1, %hi(.LCPI4_1)
; CHECK-NEXT: addi a1, a1, %lo(.LCPI4_1)
; CHECK-NEXT: vle32.v v12, (a1)
; CHECK-NEXT: vslide1down.vx v11, v11, a0
; CHECK-NEXT: lui a0, %hi(.LCPI4_1)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_1)
; CHECK-NEXT: vle32.v v12, (a0)
; CHECK-NEXT: vmulhu.vv v10, v10, v11
; CHECK-NEXT: vadd.vv v9, v10, v9
; CHECK-NEXT: vmv.v.i v0, 4
; CHECK-NEXT: vsrl.vv v9, v9, v12
; CHECK-NEXT: vmv.v.i v0, 4
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: ret
%e0 = udiv i32 %a, 23
Expand Down
182 changes: 79 additions & 103 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -194,23 +194,23 @@ define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
;
; ZVFHMIN-LABEL: vp_ceil_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vmv1r.v v9, v0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t
; ZVFHMIN-NEXT: vfabs.v v8, v10, v0.t
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t
; ZVFHMIN-NEXT: vmv1r.v v13, v0
; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: fsrmi a0, 3
; ZVFHMIN-NEXT: vmv1r.v v0, v9
; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t
; ZVFHMIN-NEXT: vmv1r.v v0, v13
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: fsrm a0
; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
Expand Down Expand Up @@ -261,42 +261,42 @@ declare <16 x half> @llvm.vp.ceil.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_ceil_v16f16:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI6_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
; ZVFH-NEXT: vfabs.v v10, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t
; ZVFH-NEXT: vmv1r.v v13, v0
; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFH-NEXT: fsrmi a0, 3
; ZVFH-NEXT: vmv1r.v v0, v10
; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t
; ZVFH-NEXT: vmv1r.v v0, v13
; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t
; ZVFH-NEXT: fsrm a0
; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t
; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vp_ceil_v16f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vmv1r.v v10, v0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t
; ZVFHMIN-NEXT: vfabs.v v8, v12, v0.t
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t
; ZVFHMIN-NEXT: vmv1r.v v17, v0
; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: fsrmi a0, 3
; ZVFHMIN-NEXT: vmv1r.v v0, v10
; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t
; ZVFHMIN-NEXT: vmv1r.v v0, v17
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t
; ZVFHMIN-NEXT: fsrm a0
; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t
; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
Expand Down Expand Up @@ -431,21 +431,21 @@ declare <8 x float> @llvm.vp.ceil.v8f32(<8 x float>, <8 x i1>, i32)
define <8 x float> @vp_ceil_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8, v0.t
; CHECK-NEXT: lui a0, 307200
; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: vmv1r.v v13, v0
; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v13
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
Expand Down Expand Up @@ -475,21 +475,21 @@ declare <16 x float> @llvm.vp.ceil.v16f32(<16 x float>, <16 x i1>, i32)
define <16 x float> @vp_ceil_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: lui a0, 307200
; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: vmv1r.v v17, v0
; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v17
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
Expand Down Expand Up @@ -561,21 +561,21 @@ declare <4 x double> @llvm.vp.ceil.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_ceil_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: vmv1r.v v13, v0
; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v13
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
Expand Down Expand Up @@ -605,21 +605,21 @@ declare <8 x double> @llvm.vp.ceil.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_ceil_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: vmv1r.v v17, v0
; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v17
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
Expand Down Expand Up @@ -649,21 +649,21 @@ declare <15 x double> @llvm.vp.ceil.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_ceil_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v15f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <15 x double> @llvm.vp.ceil.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
ret <15 x double> %v
Expand Down Expand Up @@ -693,21 +693,21 @@ declare <16 x double> @llvm.vp.ceil.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_ceil_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
Expand Down Expand Up @@ -737,21 +737,9 @@ declare <32 x double> @llvm.vp.ceil.v32f64(<32 x double>, <32 x i1>, i32)
define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: vslidedown.vi v24, v0, 2
; CHECK-NEXT: vslidedown.vi v7, v0, 2
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
Expand All @@ -760,48 +748,36 @@ define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t
; CHECK-NEXT: vmv1r.v v5, v0
; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a1, 3
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v5
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfabs.v v8, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t
; CHECK-NEXT: vmv1r.v v6, v7
; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: ret
%v = call <32 x double> @llvm.vp.ceil.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
ret <32 x double> %v
Expand Down
616 changes: 194 additions & 422 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll

Large diffs are not rendered by default.

140 changes: 47 additions & 93 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1528,72 +1528,50 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
; RV32-NEXT: addi a2, sp, 40
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: li a4, 24
; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 48
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vlse64.v v8, (a2), zero
; RV32-NEXT: vlse64.v v24, (a2), zero
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: slli a2, a2, 5
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 48
; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: li a3, 24
; RV32-NEXT: mul a2, a2, a3
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 48
; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vsub.vv v24, v8, v16, v0.t
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
; RV32-NEXT: addi a2, sp, 32
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v8, (a2), zero
; RV32-NEXT: vlse64.v v16, (a2), zero
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: li a3, 24
; RV32-NEXT: mul a2, a2, a3
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 48
; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v24, v8, v0.t
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: slli a2, a2, 4
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 48
; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: slli a2, a2, 4
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 48
; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vadd.vv v16, v8, v16, v0.t
; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t
; RV32-NEXT: vadd.vv v16, v16, v8, v0.t
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vand.vv v24, v8, v16, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
; RV32-NEXT: addi a2, sp, 24
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v8, (a2), zero
; RV32-NEXT: vlse64.v v16, (a2), zero
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: slli a2, a2, 4
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 48
; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vand.vv v8, v16, v8, v0.t
; RV32-NEXT: vand.vv v24, v8, v16, v0.t
; RV32-NEXT: addi a2, sp, 16
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a2), zero
; RV32-NEXT: vlse64.v v8, (a2), zero
; RV32-NEXT: addi a2, sp, 48
; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
; RV32-NEXT: vmul.vv v24, v24, v8, v0.t
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t
; RV32-NEXT: vsrl.vx v8, v24, a1, v0.t
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: slli a2, a2, 3
; RV32-NEXT: add a2, sp, a2
Expand All @@ -1610,25 +1588,25 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
; RV32-NEXT: mul a0, a0, a2
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v8, v24, v8, v0.t
; RV32-NEXT: vsub.vv v8, v16, v8, v0.t
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v24, v24, v16, v0.t
; RV32-NEXT: vsub.vv v24, v8, v24, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a2, 24
; RV32-NEXT: mul a0, a0, a2
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v24, v8, v16, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
; RV32-NEXT: vand.vv v8, v24, v16, v0.t
; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t
; RV32-NEXT: vand.vv v16, v24, v16, v0.t
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
Expand All @@ -1655,43 +1633,32 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
;
; RV64-LABEL: vp_ctpop_v32i64:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: slli a1, a1, 4
; RV64-NEXT: sub sp, sp, a1
; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64-NEXT: li a2, 16
; RV64-NEXT: vslidedown.vi v24, v0, 2
; RV64-NEXT: vslidedown.vi v7, v0, 2
; RV64-NEXT: mv a1, a0
; RV64-NEXT: bltu a0, a2, .LBB34_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a1, 16
; RV64-NEXT: .LBB34_2:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
; RV64-NEXT: vsrl.vi v24, v8, 1, v0.t
; RV64-NEXT: lui a1, 349525
; RV64-NEXT: addiw a1, a1, 1365
; RV64-NEXT: slli a2, a1, 32
; RV64-NEXT: add a1, a1, a2
; RV64-NEXT: vand.vx v16, v16, a1, v0.t
; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
; RV64-NEXT: vand.vx v24, v24, a1, v0.t
; RV64-NEXT: vsub.vv v8, v8, v24, v0.t
; RV64-NEXT: lui a2, 209715
; RV64-NEXT: addiw a2, a2, 819
; RV64-NEXT: slli a3, a2, 32
; RV64-NEXT: add a2, a2, a3
; RV64-NEXT: vand.vx v16, v8, a2, v0.t
; RV64-NEXT: vand.vx v24, v8, a2, v0.t
; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
; RV64-NEXT: vand.vx v8, v8, a2, v0.t
; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
; RV64-NEXT: vadd.vv v8, v24, v8, v0.t
; RV64-NEXT: vsrl.vi v24, v8, 4, v0.t
; RV64-NEXT: vadd.vv v8, v8, v24, v0.t
; RV64-NEXT: lui a3, 61681
; RV64-NEXT: addiw a3, a3, -241
; RV64-NEXT: slli a4, a3, 32
Expand All @@ -1704,37 +1671,24 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
; RV64-NEXT: vmul.vx v8, v8, a4, v0.t
; RV64-NEXT: li a5, 56
; RV64-NEXT: vsrl.vx v8, v8, a5, v0.t
; RV64-NEXT: addi a6, sp, 16
; RV64-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill
; RV64-NEXT: addi a6, a0, -16
; RV64-NEXT: sltu a0, a0, a6
; RV64-NEXT: addi a0, a0, -1
; RV64-NEXT: and a0, a0, a6
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 3
; RV64-NEXT: add a0, sp, a0
; RV64-NEXT: addi a0, a0, 16
; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
; RV64-NEXT: vand.vx v16, v16, a1, v0.t
; RV64-NEXT: vsub.vv v16, v8, v16, v0.t
; RV64-NEXT: vand.vx v8, v16, a2, v0.t
; RV64-NEXT: vmv1r.v v0, v7
; RV64-NEXT: vsrl.vi v24, v16, 1, v0.t
; RV64-NEXT: vand.vx v24, v24, a1, v0.t
; RV64-NEXT: vsub.vv v16, v16, v24, v0.t
; RV64-NEXT: vand.vx v24, v16, a2, v0.t
; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t
; RV64-NEXT: vand.vx v16, v16, a2, v0.t
; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
; RV64-NEXT: vand.vx v8, v8, a3, v0.t
; RV64-NEXT: vmul.vx v8, v8, a4, v0.t
; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t
; RV64-NEXT: addi a0, sp, 16
; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 4
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: vadd.vv v16, v24, v16, v0.t
; RV64-NEXT: vsrl.vi v24, v16, 4, v0.t
; RV64-NEXT: vadd.vv v16, v16, v24, v0.t
; RV64-NEXT: vand.vx v16, v16, a3, v0.t
; RV64-NEXT: vmul.vx v16, v16, a4, v0.t
; RV64-NEXT: vsrl.vx v16, v16, a5, v0.t
; RV64-NEXT: ret
%v = call <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl)
ret <32 x i64> %v
Expand Down
596 changes: 184 additions & 412 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll

Large diffs are not rendered by default.

16 changes: 8 additions & 8 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ define void @cttz_v16i8(ptr %x, ptr %y) nounwind {
; RVF-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; RVF-NEXT: vnsrl.wi v9, v10, 0
; RVF-NEXT: li a1, 127
; RVF-NEXT: vsub.vx v9, v9, a1
; RVF-NEXT: vmseq.vi v0, v8, 0
; RVF-NEXT: vsub.vx v8, v9, a1
; RVF-NEXT: vmerge.vim v8, v8, 8, v0
; RVF-NEXT: vmerge.vim v8, v9, 8, v0
; RVF-NEXT: vse8.v v8, (a0)
; RVF-NEXT: ret
;
Expand All @@ -64,9 +64,9 @@ define void @cttz_v16i8(ptr %x, ptr %y) nounwind {
; RVD-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; RVD-NEXT: vnsrl.wi v9, v10, 0
; RVD-NEXT: li a1, 127
; RVD-NEXT: vsub.vx v9, v9, a1
; RVD-NEXT: vmseq.vi v0, v8, 0
; RVD-NEXT: vsub.vx v8, v9, a1
; RVD-NEXT: vmerge.vim v8, v8, 8, v0
; RVD-NEXT: vmerge.vim v8, v9, 8, v0
; RVD-NEXT: vse8.v v8, (a0)
; RVD-NEXT: ret
;
Expand Down Expand Up @@ -444,9 +444,9 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind {
; RVF-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; RVF-NEXT: vnsrl.wi v10, v12, 0
; RVF-NEXT: li a1, 127
; RVF-NEXT: vsub.vx v10, v10, a1
; RVF-NEXT: vmseq.vi v0, v8, 0
; RVF-NEXT: vsub.vx v8, v10, a1
; RVF-NEXT: vmerge.vim v8, v8, 8, v0
; RVF-NEXT: vmerge.vim v8, v10, 8, v0
; RVF-NEXT: vse8.v v8, (a0)
; RVF-NEXT: ret
;
Expand All @@ -464,9 +464,9 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind {
; RVD-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; RVD-NEXT: vnsrl.wi v10, v12, 0
; RVD-NEXT: li a1, 127
; RVD-NEXT: vsub.vx v10, v10, a1
; RVD-NEXT: vmseq.vi v0, v8, 0
; RVD-NEXT: vsub.vx v8, v10, a1
; RVD-NEXT: vmerge.vim v8, v8, 8, v0
; RVD-NEXT: vmerge.vim v8, v10, 8, v0
; RVD-NEXT: vse8.v v8, (a0)
; RVD-NEXT: ret
;
Expand Down
76 changes: 38 additions & 38 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ define i1 @extractelt_v1i1(ptr %x, i64 %idx) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vmseq.vi v0, v8, 0
; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vmerge.vim v8, v9, 1, v0
; CHECK-NEXT: vslidedown.vx v8, v8, a1
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
Expand Down Expand Up @@ -328,17 +328,17 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV32-NEXT: li a2, 128
; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV32-NEXT: addi a2, a0, 128
; RV32-NEXT: vle8.v v16, (a2)
; RV32-NEXT: vle8.v v24, (a0)
; RV32-NEXT: vle8.v v8, (a2)
; RV32-NEXT: vle8.v v16, (a0)
; RV32-NEXT: mv a0, sp
; RV32-NEXT: add a1, a0, a1
; RV32-NEXT: vmseq.vi v8, v16, 0
; RV32-NEXT: vmseq.vi v0, v24, 0
; RV32-NEXT: vmv.v.i v16, 0
; RV32-NEXT: vmerge.vim v24, v16, 1, v0
; RV32-NEXT: vse8.v v24, (a0)
; RV32-NEXT: vmv1r.v v0, v8
; RV32-NEXT: vmerge.vim v8, v16, 1, v0
; RV32-NEXT: vmseq.vi v24, v8, 0
; RV32-NEXT: vmseq.vi v0, v16, 0
; RV32-NEXT: vmv.v.i v8, 0
; RV32-NEXT: vmerge.vim v16, v8, 1, v0
; RV32-NEXT: vse8.v v16, (a0)
; RV32-NEXT: vmv1r.v v0, v24
; RV32-NEXT: vmerge.vim v8, v8, 1, v0
; RV32-NEXT: addi a0, sp, 128
; RV32-NEXT: vse8.v v8, (a0)
; RV32-NEXT: lbu a0, 0(a1)
Expand All @@ -359,17 +359,17 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV64-NEXT: li a2, 128
; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV64-NEXT: addi a2, a0, 128
; RV64-NEXT: vle8.v v16, (a2)
; RV64-NEXT: vle8.v v24, (a0)
; RV64-NEXT: vle8.v v8, (a2)
; RV64-NEXT: vle8.v v16, (a0)
; RV64-NEXT: mv a0, sp
; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: vmseq.vi v8, v16, 0
; RV64-NEXT: vmseq.vi v0, v24, 0
; RV64-NEXT: vmv.v.i v16, 0
; RV64-NEXT: vmerge.vim v24, v16, 1, v0
; RV64-NEXT: vse8.v v24, (a0)
; RV64-NEXT: vmv1r.v v0, v8
; RV64-NEXT: vmerge.vim v8, v16, 1, v0
; RV64-NEXT: vmseq.vi v24, v8, 0
; RV64-NEXT: vmseq.vi v0, v16, 0
; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: vmerge.vim v16, v8, 1, v0
; RV64-NEXT: vse8.v v16, (a0)
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-NEXT: addi a0, sp, 128
; RV64-NEXT: vse8.v v8, (a0)
; RV64-NEXT: lbu a0, 0(a1)
Expand All @@ -390,17 +390,17 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV32ZBS-NEXT: li a2, 128
; RV32ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV32ZBS-NEXT: addi a2, a0, 128
; RV32ZBS-NEXT: vle8.v v16, (a2)
; RV32ZBS-NEXT: vle8.v v24, (a0)
; RV32ZBS-NEXT: vle8.v v8, (a2)
; RV32ZBS-NEXT: vle8.v v16, (a0)
; RV32ZBS-NEXT: mv a0, sp
; RV32ZBS-NEXT: add a1, a0, a1
; RV32ZBS-NEXT: vmseq.vi v8, v16, 0
; RV32ZBS-NEXT: vmseq.vi v0, v24, 0
; RV32ZBS-NEXT: vmv.v.i v16, 0
; RV32ZBS-NEXT: vmerge.vim v24, v16, 1, v0
; RV32ZBS-NEXT: vse8.v v24, (a0)
; RV32ZBS-NEXT: vmv1r.v v0, v8
; RV32ZBS-NEXT: vmerge.vim v8, v16, 1, v0
; RV32ZBS-NEXT: vmseq.vi v24, v8, 0
; RV32ZBS-NEXT: vmseq.vi v0, v16, 0
; RV32ZBS-NEXT: vmv.v.i v8, 0
; RV32ZBS-NEXT: vmerge.vim v16, v8, 1, v0
; RV32ZBS-NEXT: vse8.v v16, (a0)
; RV32ZBS-NEXT: vmv1r.v v0, v24
; RV32ZBS-NEXT: vmerge.vim v8, v8, 1, v0
; RV32ZBS-NEXT: addi a0, sp, 128
; RV32ZBS-NEXT: vse8.v v8, (a0)
; RV32ZBS-NEXT: lbu a0, 0(a1)
Expand All @@ -421,17 +421,17 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV64ZBS-NEXT: li a2, 128
; RV64ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV64ZBS-NEXT: addi a2, a0, 128
; RV64ZBS-NEXT: vle8.v v16, (a2)
; RV64ZBS-NEXT: vle8.v v24, (a0)
; RV64ZBS-NEXT: vle8.v v8, (a2)
; RV64ZBS-NEXT: vle8.v v16, (a0)
; RV64ZBS-NEXT: mv a0, sp
; RV64ZBS-NEXT: add a1, a0, a1
; RV64ZBS-NEXT: vmseq.vi v8, v16, 0
; RV64ZBS-NEXT: vmseq.vi v0, v24, 0
; RV64ZBS-NEXT: vmv.v.i v16, 0
; RV64ZBS-NEXT: vmerge.vim v24, v16, 1, v0
; RV64ZBS-NEXT: vse8.v v24, (a0)
; RV64ZBS-NEXT: vmv1r.v v0, v8
; RV64ZBS-NEXT: vmerge.vim v8, v16, 1, v0
; RV64ZBS-NEXT: vmseq.vi v24, v8, 0
; RV64ZBS-NEXT: vmseq.vi v0, v16, 0
; RV64ZBS-NEXT: vmv.v.i v8, 0
; RV64ZBS-NEXT: vmerge.vim v16, v8, 1, v0
; RV64ZBS-NEXT: vse8.v v16, (a0)
; RV64ZBS-NEXT: vmv1r.v v0, v24
; RV64ZBS-NEXT: vmerge.vim v8, v8, 1, v0
; RV64ZBS-NEXT: addi a0, sp, 128
; RV64ZBS-NEXT: vse8.v v8, (a0)
; RV64ZBS-NEXT: lbu a0, 0(a1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ define <1 x half> @ceil_v1f16(<1 x half> %x) strictfp {
; CHECK-LABEL: ceil_v1f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
Expand All @@ -30,9 +30,9 @@ define <2 x half> @ceil_v2f16(<2 x half> %x) strictfp {
; CHECK-LABEL: ceil_v2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI1_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
Expand All @@ -52,9 +52,9 @@ define <4 x half> @ceil_v4f16(<4 x half> %x) strictfp {
; CHECK-LABEL: ceil_v4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI2_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
Expand All @@ -74,9 +74,9 @@ define <8 x half> @ceil_v8f16(<8 x half> %x) strictfp {
; CHECK-LABEL: ceil_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
Expand All @@ -96,9 +96,9 @@ define <16 x half> @ceil_v16f16(<16 x half> %x) strictfp {
; CHECK-LABEL: ceil_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
Expand All @@ -119,9 +119,9 @@ define <32 x half> @ceil_v32f16(<32 x half> %x) strictfp {
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
Expand Down Expand Up @@ -251,9 +251,9 @@ define <1 x double> @ceil_v1f64(<1 x double> %x) strictfp {
; CHECK-LABEL: ceil_v1f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI11_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
Expand All @@ -273,9 +273,9 @@ define <2 x double> @ceil_v2f64(<2 x double> %x) strictfp {
; CHECK-LABEL: ceil_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI12_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
Expand All @@ -295,9 +295,9 @@ define <4 x double> @ceil_v4f64(<4 x double> %x) strictfp {
; CHECK-LABEL: ceil_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI13_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
Expand All @@ -317,9 +317,9 @@ define <8 x double> @ceil_v8f64(<8 x double> %x) strictfp {
; CHECK-LABEL: ceil_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ define <1 x half> @floor_v1f16(<1 x half> %x) strictfp {
; CHECK-LABEL: floor_v1f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
Expand All @@ -30,9 +30,9 @@ define <2 x half> @floor_v2f16(<2 x half> %x) strictfp {
; CHECK-LABEL: floor_v2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI1_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
Expand All @@ -52,9 +52,9 @@ define <4 x half> @floor_v4f16(<4 x half> %x) strictfp {
; CHECK-LABEL: floor_v4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI2_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
Expand All @@ -74,9 +74,9 @@ define <8 x half> @floor_v8f16(<8 x half> %x) strictfp {
; CHECK-LABEL: floor_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
Expand All @@ -96,9 +96,9 @@ define <16 x half> @floor_v16f16(<16 x half> %x) strictfp {
; CHECK-LABEL: floor_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
Expand All @@ -119,9 +119,9 @@ define <32 x half> @floor_v32f16(<32 x half> %x) strictfp {
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
Expand Down Expand Up @@ -251,9 +251,9 @@ define <1 x double> @floor_v1f64(<1 x double> %x) strictfp {
; CHECK-LABEL: floor_v1f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI11_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
Expand All @@ -273,9 +273,9 @@ define <2 x double> @floor_v2f64(<2 x double> %x) strictfp {
; CHECK-LABEL: floor_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI12_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
Expand All @@ -295,9 +295,9 @@ define <4 x double> @floor_v4f64(<4 x double> %x) strictfp {
; CHECK-LABEL: floor_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI13_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
Expand All @@ -317,9 +317,9 @@ define <8 x double> @floor_v8f64(<8 x double> %x) strictfp {
; CHECK-LABEL: floor_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
Expand Down
182 changes: 79 additions & 103 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -194,23 +194,23 @@ define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
;
; ZVFHMIN-LABEL: vp_floor_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vmv1r.v v9, v0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t
; ZVFHMIN-NEXT: vfabs.v v8, v10, v0.t
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t
; ZVFHMIN-NEXT: vmv1r.v v13, v0
; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: fsrmi a0, 2
; ZVFHMIN-NEXT: vmv1r.v v0, v9
; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t
; ZVFHMIN-NEXT: vmv1r.v v0, v13
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: fsrm a0
; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
Expand Down Expand Up @@ -261,42 +261,42 @@ declare <16 x half> @llvm.vp.floor.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_floor_v16f16:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI6_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
; ZVFH-NEXT: vfabs.v v10, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t
; ZVFH-NEXT: vmv1r.v v13, v0
; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFH-NEXT: fsrmi a0, 2
; ZVFH-NEXT: vmv1r.v v0, v10
; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t
; ZVFH-NEXT: vmv1r.v v0, v13
; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t
; ZVFH-NEXT: fsrm a0
; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t
; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vp_floor_v16f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vmv1r.v v10, v0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t
; ZVFHMIN-NEXT: vfabs.v v8, v12, v0.t
; ZVFHMIN-NEXT: lui a0, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t
; ZVFHMIN-NEXT: vmv1r.v v17, v0
; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: fsrmi a0, 2
; ZVFHMIN-NEXT: vmv1r.v v0, v10
; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t
; ZVFHMIN-NEXT: vmv1r.v v0, v17
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t
; ZVFHMIN-NEXT: fsrm a0
; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t
; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
Expand Down Expand Up @@ -431,21 +431,21 @@ declare <8 x float> @llvm.vp.floor.v8f32(<8 x float>, <8 x i1>, i32)
define <8 x float> @vp_floor_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8, v0.t
; CHECK-NEXT: lui a0, 307200
; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: vmv1r.v v13, v0
; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v13
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
Expand Down Expand Up @@ -475,21 +475,21 @@ declare <16 x float> @llvm.vp.floor.v16f32(<16 x float>, <16 x i1>, i32)
define <16 x float> @vp_floor_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: lui a0, 307200
; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: vmv1r.v v17, v0
; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v17
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
Expand Down Expand Up @@ -561,21 +561,21 @@ declare <4 x double> @llvm.vp.floor.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_floor_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: vmv1r.v v13, v0
; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v13
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
Expand Down Expand Up @@ -605,21 +605,21 @@ declare <8 x double> @llvm.vp.floor.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_floor_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: vmv1r.v v17, v0
; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v17
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
Expand Down Expand Up @@ -649,21 +649,21 @@ declare <15 x double> @llvm.vp.floor.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_floor_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v15f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <15 x double> @llvm.vp.floor.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
ret <15 x double> %v
Expand Down Expand Up @@ -693,21 +693,21 @@ declare <16 x double> @llvm.vp.floor.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_floor_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
Expand Down Expand Up @@ -737,21 +737,9 @@ declare <32 x double> @llvm.vp.floor.v32f64(<32 x double>, <32 x i1>, i32)
define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: vslidedown.vi v24, v0, 2
; CHECK-NEXT: vslidedown.vi v7, v0, 2
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
Expand All @@ -760,48 +748,36 @@ define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t
; CHECK-NEXT: vmv1r.v v5, v0
; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a1, 2
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v5
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfabs.v v8, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t
; CHECK-NEXT: vmv1r.v v6, v7
; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: ret
%v = call <32 x double> @llvm.vp.floor.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
ret <32 x double> %v
Expand Down
158 changes: 69 additions & 89 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,10 @@ define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v10, v9, v9
; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
; ZVFH-NEXT: vmv1r.v v0, v10
; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0
; ZVFH-NEXT: vmfeq.vv v0, v9, v9
; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
; ZVFH-NEXT: vfmax.vv v8, v8, v11
; ZVFH-NEXT: vfmax.vv v8, v8, v10
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_v2f16_vv:
Expand All @@ -29,11 +28,10 @@ define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) {
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11
; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0
; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0
; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
Expand All @@ -48,11 +46,10 @@ define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v10, v9, v9
; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
; ZVFH-NEXT: vmv1r.v v0, v10
; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0
; ZVFH-NEXT: vmfeq.vv v0, v9, v9
; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
; ZVFH-NEXT: vfmax.vv v8, v8, v11
; ZVFH-NEXT: vfmax.vv v8, v8, v10
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_v4f16_vv:
Expand All @@ -62,11 +59,10 @@ define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) {
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
; ZVFHMIN-NEXT: vmv.v.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11
; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0
; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0
; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
Expand All @@ -81,11 +77,10 @@ define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v10, v9, v9
; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
; ZVFH-NEXT: vmv.v.v v0, v10
; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0
; ZVFH-NEXT: vmfeq.vv v0, v9, v9
; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
; ZVFH-NEXT: vfmax.vv v8, v8, v11
; ZVFH-NEXT: vfmax.vv v8, v8, v10
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_v8f16_vv:
Expand All @@ -95,11 +90,10 @@ define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) {
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
; ZVFHMIN-NEXT: vmerge.vvm v14, v12, v10, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0
; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14
; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0
; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v12, v0
; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
Expand All @@ -114,11 +108,10 @@ define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v12, v10, v10
; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0
; ZVFH-NEXT: vmv1r.v v0, v12
; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0
; ZVFH-NEXT: vmfeq.vv v0, v10, v10
; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
; ZVFH-NEXT: vfmax.vv v8, v8, v14
; ZVFH-NEXT: vfmax.vv v8, v8, v12
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_v16f16_vv:
Expand All @@ -128,11 +121,10 @@ define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) {
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16
; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12
; ZVFHMIN-NEXT: vmerge.vvm v20, v16, v12, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0
; ZVFHMIN-NEXT: vfmax.vv v12, v8, v20
; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0
; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
; ZVFHMIN-NEXT: vmerge.vvm v12, v12, v16, v0
; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
Expand All @@ -147,11 +139,10 @@ define <2 x float> @vfmax_v2f32_vv(<2 x float> %a, <2 x float> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v10, v9, v9
; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0
; CHECK-NEXT: vmfeq.vv v0, v9, v9
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v11
; CHECK-NEXT: vfmax.vv v8, v8, v10
; CHECK-NEXT: ret
%v = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
ret <2 x float> %v
Expand All @@ -164,11 +155,10 @@ define <4 x float> @vfmax_v4f32_vv(<4 x float> %a, <4 x float> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v10, v9, v9
; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
; CHECK-NEXT: vmv.v.v v0, v10
; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0
; CHECK-NEXT: vmfeq.vv v0, v9, v9
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v11
; CHECK-NEXT: vfmax.vv v8, v8, v10
; CHECK-NEXT: ret
%v = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b)
ret <4 x float> %v
Expand All @@ -181,11 +171,10 @@ define <8 x float> @vfmax_v8f32_vv(<8 x float> %a, <8 x float> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v12, v10, v10
; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0
; CHECK-NEXT: vmfeq.vv v0, v10, v10
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v14
; CHECK-NEXT: vfmax.vv v8, v8, v12
; CHECK-NEXT: ret
%v = call <8 x float> @llvm.maximum.v8f32(<8 x float> %a, <8 x float> %b)
ret <8 x float> %v
Expand All @@ -198,11 +187,10 @@ define <16 x float> @vfmax_v16f32_vv(<16 x float> %a, <16 x float> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v16, v12, v12
; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0
; CHECK-NEXT: vmfeq.vv v0, v12, v12
; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v20
; CHECK-NEXT: vfmax.vv v8, v8, v16
; CHECK-NEXT: ret
%v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %a, <16 x float> %b)
ret <16 x float> %v
Expand All @@ -215,11 +203,10 @@ define <2 x double> @vfmax_v2f64_vv(<2 x double> %a, <2 x double> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v10, v9, v9
; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
; CHECK-NEXT: vmv.v.v v0, v10
; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0
; CHECK-NEXT: vmfeq.vv v0, v9, v9
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v11
; CHECK-NEXT: vfmax.vv v8, v8, v10
; CHECK-NEXT: ret
%v = call <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b)
ret <2 x double> %v
Expand All @@ -232,11 +219,10 @@ define <4 x double> @vfmax_v4f64_vv(<4 x double> %a, <4 x double> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v12, v10, v10
; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0
; CHECK-NEXT: vmfeq.vv v0, v10, v10
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v14
; CHECK-NEXT: vfmax.vv v8, v8, v12
; CHECK-NEXT: ret
%v = call <4 x double> @llvm.maximum.v4f64(<4 x double> %a, <4 x double> %b)
ret <4 x double> %v
Expand All @@ -249,11 +235,10 @@ define <8 x double> @vfmax_v8f64_vv(<8 x double> %a, <8 x double> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v16, v12, v12
; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0
; CHECK-NEXT: vmfeq.vv v0, v12, v12
; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v20
; CHECK-NEXT: vfmax.vv v8, v8, v16
; CHECK-NEXT: ret
%v = call <8 x double> @llvm.maximum.v8f64(<8 x double> %a, <8 x double> %b)
ret <8 x double> %v
Expand All @@ -266,9 +251,8 @@ define <16 x double> @vfmax_v16f64_vv(<16 x double> %a, <16 x double> %b) nounwi
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v7, v16, v16
; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vmfeq.vv v0, v16, v16
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v24
; CHECK-NEXT: ret
Expand Down Expand Up @@ -302,13 +286,12 @@ define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
; ZVFH-LABEL: vfmax_v2f16_vv_nnana:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFH-NEXT: vfadd.vv v10, v8, v8
; ZVFH-NEXT: vfadd.vv v8, v8, v8
; ZVFH-NEXT: vmfeq.vv v0, v9, v9
; ZVFH-NEXT: vmfeq.vv v8, v10, v10
; ZVFH-NEXT: vmerge.vvm v11, v9, v10, v0
; ZVFH-NEXT: vmv1r.v v0, v8
; ZVFH-NEXT: vmerge.vvm v8, v10, v9, v0
; ZVFH-NEXT: vfmax.vv v8, v11, v8
; ZVFH-NEXT: vmerge.vvm v10, v9, v8, v0
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmerge.vvm v8, v8, v9, v0
; ZVFH-NEXT: vfmax.vv v8, v10, v8
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnana:
Expand All @@ -319,16 +302,15 @@ define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11
; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v8, v9, v9
; ZVFHMIN-NEXT: vmerge.vvm v10, v11, v9, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0
; ZVFHMIN-NEXT: vmerge.vvm v10, v8, v9, v0
; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v8, v0
; ZVFHMIN-NEXT: vfmax.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
Expand All @@ -343,13 +325,12 @@ define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
; ZVFH-LABEL: vfmax_v2f16_vv_nnanb:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFH-NEXT: vfadd.vv v10, v9, v9
; ZVFH-NEXT: vfadd.vv v9, v9, v9
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v9, v10, v10
; ZVFH-NEXT: vmerge.vvm v11, v8, v10, v0
; ZVFH-NEXT: vmv1r.v v0, v9
; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
; ZVFH-NEXT: vfmax.vv v8, v8, v11
; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0
; ZVFH-NEXT: vmfeq.vv v0, v9, v9
; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
; ZVFH-NEXT: vfmax.vv v8, v8, v10
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnanb:
Expand All @@ -364,12 +345,11 @@ define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11
; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v11, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v9, v0
; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v8, v0
; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v9, v0
; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
Expand Down
158 changes: 69 additions & 89 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,10 @@ define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v10, v9, v9
; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
; ZVFH-NEXT: vmv1r.v v0, v10
; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0
; ZVFH-NEXT: vmfeq.vv v0, v9, v9
; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
; ZVFH-NEXT: vfmin.vv v8, v8, v11
; ZVFH-NEXT: vfmin.vv v8, v8, v10
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmin_v2f16_vv:
Expand All @@ -29,11 +28,10 @@ define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) {
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
; ZVFHMIN-NEXT: vfmin.vv v9, v8, v11
; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0
; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0
; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
Expand All @@ -48,11 +46,10 @@ define <4 x half> @vfmin_v4f16_vv(<4 x half> %a, <4 x half> %b) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v10, v9, v9
; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
; ZVFH-NEXT: vmv1r.v v0, v10
; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0
; ZVFH-NEXT: vmfeq.vv v0, v9, v9
; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
; ZVFH-NEXT: vfmin.vv v8, v8, v11
; ZVFH-NEXT: vfmin.vv v8, v8, v10
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmin_v4f16_vv:
Expand All @@ -62,11 +59,10 @@ define <4 x half> @vfmin_v4f16_vv(<4 x half> %a, <4 x half> %b) {
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
; ZVFHMIN-NEXT: vmv.v.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
; ZVFHMIN-NEXT: vfmin.vv v9, v8, v11
; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0
; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0
; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
Expand All @@ -81,11 +77,10 @@ define <8 x half> @vfmin_v8f16_vv(<8 x half> %a, <8 x half> %b) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v10, v9, v9
; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
; ZVFH-NEXT: vmv.v.v v0, v10
; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0
; ZVFH-NEXT: vmfeq.vv v0, v9, v9
; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
; ZVFH-NEXT: vfmin.vv v8, v8, v11
; ZVFH-NEXT: vfmin.vv v8, v8, v10
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmin_v8f16_vv:
Expand All @@ -95,11 +90,10 @@ define <8 x half> @vfmin_v8f16_vv(<8 x half> %a, <8 x half> %b) {
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
; ZVFHMIN-NEXT: vmerge.vvm v14, v12, v10, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0
; ZVFHMIN-NEXT: vfmin.vv v10, v8, v14
; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0
; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v12, v0
; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
Expand All @@ -114,11 +108,10 @@ define <16 x half> @vfmin_v16f16_vv(<16 x half> %a, <16 x half> %b) {
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v12, v10, v10
; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0
; ZVFH-NEXT: vmv1r.v v0, v12
; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0
; ZVFH-NEXT: vmfeq.vv v0, v10, v10
; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
; ZVFH-NEXT: vfmin.vv v8, v8, v14
; ZVFH-NEXT: vfmin.vv v8, v8, v12
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmin_v16f16_vv:
Expand All @@ -128,11 +121,10 @@ define <16 x half> @vfmin_v16f16_vv(<16 x half> %a, <16 x half> %b) {
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16
; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12
; ZVFHMIN-NEXT: vmerge.vvm v20, v16, v12, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0
; ZVFHMIN-NEXT: vfmin.vv v12, v8, v20
; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0
; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
; ZVFHMIN-NEXT: vmerge.vvm v12, v12, v16, v0
; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
Expand All @@ -147,11 +139,10 @@ define <2 x float> @vfmin_v2f32_vv(<2 x float> %a, <2 x float> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v10, v9, v9
; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0
; CHECK-NEXT: vmfeq.vv v0, v9, v9
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: vfmin.vv v8, v8, v11
; CHECK-NEXT: vfmin.vv v8, v8, v10
; CHECK-NEXT: ret
%v = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
ret <2 x float> %v
Expand All @@ -164,11 +155,10 @@ define <4 x float> @vfmin_v4f32_vv(<4 x float> %a, <4 x float> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v10, v9, v9
; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
; CHECK-NEXT: vmv.v.v v0, v10
; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0
; CHECK-NEXT: vmfeq.vv v0, v9, v9
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: vfmin.vv v8, v8, v11
; CHECK-NEXT: vfmin.vv v8, v8, v10
; CHECK-NEXT: ret
%v = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b)
ret <4 x float> %v
Expand All @@ -181,11 +171,10 @@ define <8 x float> @vfmin_v8f32_vv(<8 x float> %a, <8 x float> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v12, v10, v10
; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0
; CHECK-NEXT: vmfeq.vv v0, v10, v10
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
; CHECK-NEXT: vfmin.vv v8, v8, v14
; CHECK-NEXT: vfmin.vv v8, v8, v12
; CHECK-NEXT: ret
%v = call <8 x float> @llvm.minimum.v8f32(<8 x float> %a, <8 x float> %b)
ret <8 x float> %v
Expand All @@ -198,11 +187,10 @@ define <16 x float> @vfmin_v16f32_vv(<16 x float> %a, <16 x float> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v16, v12, v12
; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0
; CHECK-NEXT: vmfeq.vv v0, v12, v12
; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
; CHECK-NEXT: vfmin.vv v8, v8, v20
; CHECK-NEXT: vfmin.vv v8, v8, v16
; CHECK-NEXT: ret
%v = call <16 x float> @llvm.minimum.v16f32(<16 x float> %a, <16 x float> %b)
ret <16 x float> %v
Expand All @@ -215,11 +203,10 @@ define <2 x double> @vfmin_v2f64_vv(<2 x double> %a, <2 x double> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v10, v9, v9
; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
; CHECK-NEXT: vmv.v.v v0, v10
; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0
; CHECK-NEXT: vmfeq.vv v0, v9, v9
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: vfmin.vv v8, v8, v11
; CHECK-NEXT: vfmin.vv v8, v8, v10
; CHECK-NEXT: ret
%v = call <2 x double> @llvm.minimum.v2f64(<2 x double> %a, <2 x double> %b)
ret <2 x double> %v
Expand All @@ -232,11 +219,10 @@ define <4 x double> @vfmin_v4f64_vv(<4 x double> %a, <4 x double> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v12, v10, v10
; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0
; CHECK-NEXT: vmfeq.vv v0, v10, v10
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
; CHECK-NEXT: vfmin.vv v8, v8, v14
; CHECK-NEXT: vfmin.vv v8, v8, v12
; CHECK-NEXT: ret
%v = call <4 x double> @llvm.minimum.v4f64(<4 x double> %a, <4 x double> %b)
ret <4 x double> %v
Expand All @@ -249,11 +235,10 @@ define <8 x double> @vfmin_v8f64_vv(<8 x double> %a, <8 x double> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v16, v12, v12
; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0
; CHECK-NEXT: vmfeq.vv v0, v12, v12
; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
; CHECK-NEXT: vfmin.vv v8, v8, v20
; CHECK-NEXT: vfmin.vv v8, v8, v16
; CHECK-NEXT: ret
%v = call <8 x double> @llvm.minimum.v8f64(<8 x double> %a, <8 x double> %b)
ret <8 x double> %v
Expand All @@ -266,9 +251,8 @@ define <16 x double> @vfmin_v16f64_vv(<16 x double> %a, <16 x double> %b) nounwi
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v7, v16, v16
; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vmfeq.vv v0, v16, v16
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
; CHECK-NEXT: vfmin.vv v8, v8, v24
; CHECK-NEXT: ret
Expand Down Expand Up @@ -302,13 +286,12 @@ define <2 x half> @vfmin_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
; ZVFH-LABEL: vfmin_v2f16_vv_nnana:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFH-NEXT: vfadd.vv v10, v8, v8
; ZVFH-NEXT: vfadd.vv v8, v8, v8
; ZVFH-NEXT: vmfeq.vv v0, v9, v9
; ZVFH-NEXT: vmfeq.vv v8, v10, v10
; ZVFH-NEXT: vmerge.vvm v11, v9, v10, v0
; ZVFH-NEXT: vmv1r.v v0, v8
; ZVFH-NEXT: vmerge.vvm v8, v10, v9, v0
; ZVFH-NEXT: vfmin.vv v8, v11, v8
; ZVFH-NEXT: vmerge.vvm v10, v9, v8, v0
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmerge.vvm v8, v8, v9, v0
; ZVFH-NEXT: vfmin.vv v8, v10, v8
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnana:
Expand All @@ -319,16 +302,15 @@ define <2 x half> @vfmin_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11
; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v8, v9, v9
; ZVFHMIN-NEXT: vmerge.vvm v10, v11, v9, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0
; ZVFHMIN-NEXT: vmerge.vvm v10, v8, v9, v0
; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v8, v0
; ZVFHMIN-NEXT: vfmin.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
Expand All @@ -343,13 +325,12 @@ define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
; ZVFH-LABEL: vfmin_v2f16_vv_nnanb:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; ZVFH-NEXT: vfadd.vv v10, v9, v9
; ZVFH-NEXT: vfadd.vv v9, v9, v9
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v9, v10, v10
; ZVFH-NEXT: vmerge.vvm v11, v8, v10, v0
; ZVFH-NEXT: vmv1r.v v0, v9
; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
; ZVFH-NEXT: vfmin.vv v8, v8, v11
; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0
; ZVFH-NEXT: vmfeq.vv v0, v9, v9
; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
; ZVFH-NEXT: vfmin.vv v8, v8, v10
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnanb:
Expand All @@ -364,12 +345,11 @@ define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11
; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v11, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v9, v0
; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v8, v0
; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v9, v0
; ZVFHMIN-NEXT: vfmin.vv v9, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ define <2 x half> @nearbyint_v2f16(<2 x half> %v) strictfp {
; CHECK-LABEL: nearbyint_v2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
Expand All @@ -33,9 +33,9 @@ define <4 x half> @nearbyint_v4f16(<4 x half> %v) strictfp {
; CHECK-LABEL: nearbyint_v4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI1_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
Expand All @@ -56,9 +56,9 @@ define <8 x half> @nearbyint_v8f16(<8 x half> %v) strictfp {
; CHECK-LABEL: nearbyint_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI2_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
Expand All @@ -79,9 +79,9 @@ define <16 x half> @nearbyint_v16f16(<16 x half> %v) strictfp {
; CHECK-LABEL: nearbyint_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
Expand All @@ -103,9 +103,9 @@ define <32 x half> @nearbyint_v32f16(<32 x half> %v) strictfp {
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
Expand Down Expand Up @@ -218,9 +218,9 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %v) strictfp {
; CHECK-LABEL: nearbyint_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI9_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
Expand All @@ -241,9 +241,9 @@ define <4 x double> @nearbyint_v4f64(<4 x double> %v) strictfp {
; CHECK-LABEL: nearbyint_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI10_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI10_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
Expand All @@ -264,9 +264,9 @@ define <8 x double> @nearbyint_v8f64(<8 x double> %v) strictfp {
; CHECK-LABEL: nearbyint_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: lui a0, %hi(.LCPI11_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0)
; CHECK-NEXT: vmfne.vv v0, v8, v8
; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1407,8 +1407,8 @@ define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float
; CHECK-NEXT: vfmv.v.f v8, fa4
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: ret
%v0 = insertelement <8 x float> poison, float %e0, i64 0
Expand Down Expand Up @@ -1458,8 +1458,8 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d
; CHECK-NEXT: vfmv.v.f v8, fa4
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: ret
%v0 = insertelement <8 x double> poison, double %e0, i64 0
Expand Down
Loading