190 changes: 87 additions & 103 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -631,59 +631,46 @@ define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1>
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a4, 24
; CHECK-NEXT: mul a1, a1, a4
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: li a4, 128
; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v24, (a0)
; CHECK-NEXT: vmv1r.v v2, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: mv a1, a3
; CHECK-NEXT: bltu a3, a4, .LBB51_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: .LBB51_2:
; CHECK-NEXT: li a4, 0
; CHECK-NEXT: vlm.v v24, (a2)
; CHECK-NEXT: vle8.v v16, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: addi a0, a3, -128
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vmseq.vv v1, v8, v16, v0.t
; CHECK-NEXT: bltu a3, a0, .LBB51_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a4, a0
; CHECK-NEXT: .LBB51_4:
; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vlm.v v0, (a2)
; CHECK-NEXT: addi a2, a0, 128
; CHECK-NEXT: vle8.v v8, (a2)
; CHECK-NEXT: addi a2, a3, -128
; CHECK-NEXT: sltu a4, a3, a2
; CHECK-NEXT: addi a4, a4, -1
; CHECK-NEXT: vle8.v v24, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: and a0, a4, a2
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
; CHECK-NEXT: vmseq.vv v1, v16, v8, v0.t
; CHECK-NEXT: bltu a3, a1, .LBB51_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: .LBB51_2:
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmseq.vv v8, v16, v24, v0.t
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vmv1r.v v8, v1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
Expand All @@ -694,23 +681,20 @@ define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1>
define <256 x i1> @icmp_eq_vx_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: icmp_eq_vx_v256i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; CHECK-NEXT: vlm.v v25, (a1)
; CHECK-NEXT: addi a4, a2, -128
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: bltu a2, a4, .LBB52_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a4
; CHECK-NEXT: .LBB52_2:
; CHECK-NEXT: vlm.v v0, (a1)
; CHECK-NEXT: addi a1, a2, -128
; CHECK-NEXT: sltu a4, a2, a1
; CHECK-NEXT: addi a4, a4, -1
; CHECK-NEXT: and a1, a4, a1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t
; CHECK-NEXT: bltu a2, a3, .LBB52_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: bltu a2, a3, .LBB52_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: .LBB52_4:
; CHECK-NEXT: .LBB52_2:
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
Expand All @@ -726,23 +710,20 @@ define <256 x i1> @icmp_eq_vx_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 z
define <256 x i1> @icmp_eq_vx_swap_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: icmp_eq_vx_swap_v256i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; CHECK-NEXT: vlm.v v25, (a1)
; CHECK-NEXT: addi a4, a2, -128
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: bltu a2, a4, .LBB53_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a4
; CHECK-NEXT: .LBB53_2:
; CHECK-NEXT: vlm.v v0, (a1)
; CHECK-NEXT: addi a1, a2, -128
; CHECK-NEXT: sltu a4, a2, a1
; CHECK-NEXT: addi a4, a4, -1
; CHECK-NEXT: and a1, a4, a1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t
; CHECK-NEXT: bltu a2, a3, .LBB53_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: bltu a2, a3, .LBB53_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: .LBB53_4:
; CHECK-NEXT: .LBB53_2:
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
Expand Down Expand Up @@ -1338,41 +1319,48 @@ define <64 x i1> @icmp_eq_vv_v64i32(<64 x i32> %va, <64 x i32> %vb, <64 x i1> %m
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: addi a4, a0, 128
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v24, (a4)
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: addi a3, sp, 16
; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; CHECK-NEXT: addi a4, a2, -32
; CHECK-NEXT: addi a0, a2, -32
; CHECK-NEXT: sltu a3, a2, a0
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a0, a3, a0
; CHECK-NEXT: vslidedown.vi v0, v0, 4
; CHECK-NEXT: bltu a2, a4, .LBB99_2
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vmseq.vv v1, v16, v8, v0.t
; CHECK-NEXT: bltu a2, a1, .LBB99_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a3, a4
; CHECK-NEXT: .LBB99_2:
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: vmseq.vv v2, v16, v24, v0.t
; CHECK-NEXT: bltu a2, a1, .LBB99_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a2, 32
; CHECK-NEXT: .LBB99_4:
; CHECK-NEXT: .LBB99_2:
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmseq.vv v16, v24, v8, v0.t
; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v16, v2, 4
; CHECK-NEXT: vslideup.vi v16, v1, 4
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
Expand All @@ -1384,21 +1372,19 @@ define <64 x i1> @icmp_eq_vx_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 ze
; CHECK-LABEL: icmp_eq_vx_v64i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; CHECK-NEXT: addi a3, a1, -32
; CHECK-NEXT: vslidedown.vi v0, v0, 4
; CHECK-NEXT: bltu a1, a3, .LBB100_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a3
; CHECK-NEXT: .LBB100_2:
; CHECK-NEXT: addi a2, a1, -32
; CHECK-NEXT: sltu a3, a1, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; CHECK-NEXT: li a2, 32
; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t
; CHECK-NEXT: bltu a1, a2, .LBB100_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: bltu a1, a2, .LBB100_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: .LBB100_4:
; CHECK-NEXT: .LBB100_2:
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
Expand All @@ -1416,21 +1402,19 @@ define <64 x i1> @icmp_eq_vx_swap_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i
; CHECK-LABEL: icmp_eq_vx_swap_v64i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; CHECK-NEXT: addi a3, a1, -32
; CHECK-NEXT: vslidedown.vi v0, v0, 4
; CHECK-NEXT: bltu a1, a3, .LBB101_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a3
; CHECK-NEXT: .LBB101_2:
; CHECK-NEXT: addi a2, a1, -32
; CHECK-NEXT: sltu a3, a1, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; CHECK-NEXT: li a2, 32
; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t
; CHECK-NEXT: bltu a1, a2, .LBB101_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: bltu a1, a2, .LBB101_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: .LBB101_4:
; CHECK-NEXT: .LBB101_2:
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
Expand Down
32 changes: 14 additions & 18 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -154,23 +154,21 @@ define <32 x i64> @vsext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext
; CHECK-LABEL: vsext_v32i64_v32i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: bltu a0, a2, .LBB12_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB12_2:
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a2, a0, a1
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; CHECK-NEXT: vslidedown.vi v24, v8, 16
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vsext.vf2 v16, v24, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB12_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: bltu a0, a1, .LBB12_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB12_4:
; CHECK-NEXT: .LBB12_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vsext.vf2 v24, v8, v0.t
Expand All @@ -183,21 +181,19 @@ define <32 x i64> @vsext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext
define <32 x i64> @vsext_v32i64_v32i32_unmasked(<32 x i32> %va, i32 zeroext %evl) {
; CHECK-LABEL: vsext_v32i64_v32i32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: bltu a0, a2, .LBB13_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB13_2:
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a2, a0, a1
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; CHECK-NEXT: vslidedown.vi v24, v8, 16
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vsext.vf2 v16, v24
; CHECK-NEXT: bltu a0, a1, .LBB13_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: bltu a0, a1, .LBB13_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB13_4:
; CHECK-NEXT: .LBB13_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vsext.vf2 v24, v8
; CHECK-NEXT: vmv.v.v v8, v24
Expand Down
39 changes: 18 additions & 21 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -309,21 +309,19 @@ define <32 x double> @vsitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 ze
; CHECK-LABEL: vsitofp_v32f64_v32i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: bltu a0, a2, .LBB25_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB25_2:
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a2, a0, a1
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB25_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: bltu a0, a1, .LBB25_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB25_4:
; CHECK-NEXT: .LBB25_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
Expand All @@ -335,21 +333,20 @@ define <32 x double> @vsitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 ze
define <32 x double> @vsitofp_v32f64_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
; CHECK-LABEL: vsitofp_v32f64_v32i64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: bltu a0, a1, .LBB26_2
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfcvt.f.x.v v16, v16
; CHECK-NEXT: bltu a0, a1, .LBB26_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB26_4:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.f.x.v v8, v8
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.f.x.v v16, v16
; CHECK-NEXT: ret
%v = call <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl)
ret <32 x double> %v
Expand Down
194 changes: 90 additions & 104 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
Original file line number Diff line number Diff line change
Expand Up @@ -605,50 +605,48 @@ declare <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0f64.i32(double*,
define <32 x double> @strided_vpload_v32f64(double* %ptr, i32 signext %stride, <32 x i1> %m, i32 zeroext %evl) nounwind {
; CHECK-RV32-LABEL: strided_vpload_v32f64:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: addi a4, a2, -16
; CHECK-RV32-NEXT: li a4, 16
; CHECK-RV32-NEXT: vmv1r.v v8, v0
; CHECK-RV32-NEXT: li a3, 0
; CHECK-RV32-NEXT: mv a3, a2
; CHECK-RV32-NEXT: bltu a2, a4, .LBB33_2
; CHECK-RV32-NEXT: # %bb.1:
; CHECK-RV32-NEXT: mv a3, a4
; CHECK-RV32-NEXT: li a3, 16
; CHECK-RV32-NEXT: .LBB33_2:
; CHECK-RV32-NEXT: li a4, 16
; CHECK-RV32-NEXT: bltu a2, a4, .LBB33_4
; CHECK-RV32-NEXT: # %bb.3:
; CHECK-RV32-NEXT: li a2, 16
; CHECK-RV32-NEXT: .LBB33_4:
; CHECK-RV32-NEXT: mul a4, a2, a1
; CHECK-RV32-NEXT: mul a4, a3, a1
; CHECK-RV32-NEXT: add a4, a0, a4
; CHECK-RV32-NEXT: addi a5, a2, -16
; CHECK-RV32-NEXT: sltu a2, a2, a5
; CHECK-RV32-NEXT: addi a2, a2, -1
; CHECK-RV32-NEXT: and a2, a2, a5
; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2
; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV32-NEXT: vlse64.v v16, (a4), a1, v0.t
; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-RV32-NEXT: vlse64.v v16, (a4), a1, v0.t
; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV32-NEXT: vmv1r.v v0, v8
; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1, v0.t
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64-LABEL: strided_vpload_v32f64:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: addi a4, a2, -16
; CHECK-RV64-NEXT: li a4, 16
; CHECK-RV64-NEXT: vmv1r.v v8, v0
; CHECK-RV64-NEXT: li a3, 0
; CHECK-RV64-NEXT: mv a3, a2
; CHECK-RV64-NEXT: bltu a2, a4, .LBB33_2
; CHECK-RV64-NEXT: # %bb.1:
; CHECK-RV64-NEXT: mv a3, a4
; CHECK-RV64-NEXT: li a3, 16
; CHECK-RV64-NEXT: .LBB33_2:
; CHECK-RV64-NEXT: li a4, 16
; CHECK-RV64-NEXT: bltu a2, a4, .LBB33_4
; CHECK-RV64-NEXT: # %bb.3:
; CHECK-RV64-NEXT: li a2, 16
; CHECK-RV64-NEXT: .LBB33_4:
; CHECK-RV64-NEXT: mul a4, a2, a1
; CHECK-RV64-NEXT: mul a4, a3, a1
; CHECK-RV64-NEXT: add a4, a0, a4
; CHECK-RV64-NEXT: addi a5, a2, -16
; CHECK-RV64-NEXT: sltu a2, a2, a5
; CHECK-RV64-NEXT: addi a2, a2, -1
; CHECK-RV64-NEXT: and a2, a2, a5
; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2
; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV64-NEXT: vlse64.v v16, (a4), a1, v0.t
; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-RV64-NEXT: vlse64.v v16, (a4), a1, v0.t
; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV64-NEXT: vmv1r.v v0, v8
; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1, v0.t
; CHECK-RV64-NEXT: ret
Expand All @@ -659,43 +657,41 @@ define <32 x double> @strided_vpload_v32f64(double* %ptr, i32 signext %stride, <
define <32 x double> @strided_vpload_v32f64_allones_mask(double* %ptr, i32 signext %stride, i32 zeroext %evl) nounwind {
; CHECK-RV32-LABEL: strided_vpload_v32f64_allones_mask:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: addi a4, a2, -16
; CHECK-RV32-NEXT: li a3, 0
; CHECK-RV32-NEXT: li a4, 16
; CHECK-RV32-NEXT: mv a3, a2
; CHECK-RV32-NEXT: bltu a2, a4, .LBB34_2
; CHECK-RV32-NEXT: # %bb.1:
; CHECK-RV32-NEXT: mv a3, a4
; CHECK-RV32-NEXT: li a3, 16
; CHECK-RV32-NEXT: .LBB34_2:
; CHECK-RV32-NEXT: li a4, 16
; CHECK-RV32-NEXT: bltu a2, a4, .LBB34_4
; CHECK-RV32-NEXT: # %bb.3:
; CHECK-RV32-NEXT: li a2, 16
; CHECK-RV32-NEXT: .LBB34_4:
; CHECK-RV32-NEXT: mul a4, a2, a1
; CHECK-RV32-NEXT: mul a4, a3, a1
; CHECK-RV32-NEXT: add a4, a0, a4
; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV32-NEXT: vlse64.v v16, (a4), a1
; CHECK-RV32-NEXT: addi a5, a2, -16
; CHECK-RV32-NEXT: sltu a2, a2, a5
; CHECK-RV32-NEXT: addi a2, a2, -1
; CHECK-RV32-NEXT: and a2, a2, a5
; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-RV32-NEXT: vlse64.v v16, (a4), a1
; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64-LABEL: strided_vpload_v32f64_allones_mask:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: addi a4, a2, -16
; CHECK-RV64-NEXT: li a3, 0
; CHECK-RV64-NEXT: li a4, 16
; CHECK-RV64-NEXT: mv a3, a2
; CHECK-RV64-NEXT: bltu a2, a4, .LBB34_2
; CHECK-RV64-NEXT: # %bb.1:
; CHECK-RV64-NEXT: mv a3, a4
; CHECK-RV64-NEXT: li a3, 16
; CHECK-RV64-NEXT: .LBB34_2:
; CHECK-RV64-NEXT: li a4, 16
; CHECK-RV64-NEXT: bltu a2, a4, .LBB34_4
; CHECK-RV64-NEXT: # %bb.3:
; CHECK-RV64-NEXT: li a2, 16
; CHECK-RV64-NEXT: .LBB34_4:
; CHECK-RV64-NEXT: mul a4, a2, a1
; CHECK-RV64-NEXT: mul a4, a3, a1
; CHECK-RV64-NEXT: add a4, a0, a4
; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV64-NEXT: vlse64.v v16, (a4), a1
; CHECK-RV64-NEXT: addi a5, a2, -16
; CHECK-RV64-NEXT: sltu a2, a2, a5
; CHECK-RV64-NEXT: addi a2, a2, -1
; CHECK-RV64-NEXT: and a2, a2, a5
; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-RV64-NEXT: vlse64.v v16, (a4), a1
; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1
; CHECK-RV64-NEXT: ret
%one = insertelement <32 x i1> poison, i1 true, i32 0
Expand All @@ -717,51 +713,46 @@ define <33 x double> @strided_load_v33f64(double* %ptr, i64 %stride, <33 x i1> %
; CHECK-RV32-NEXT: # %bb.1:
; CHECK-RV32-NEXT: li a3, 32
; CHECK-RV32-NEXT: .LBB35_2:
; CHECK-RV32-NEXT: addi a5, a3, -16
; CHECK-RV32-NEXT: li a7, 0
; CHECK-RV32-NEXT: bltu a3, a5, .LBB35_4
; CHECK-RV32-NEXT: mul a5, a3, a2
; CHECK-RV32-NEXT: addi a6, a4, -32
; CHECK-RV32-NEXT: sltu a4, a4, a6
; CHECK-RV32-NEXT: addi a4, a4, -1
; CHECK-RV32-NEXT: and a6, a4, a6
; CHECK-RV32-NEXT: li a4, 16
; CHECK-RV32-NEXT: add a5, a1, a5
; CHECK-RV32-NEXT: bltu a6, a4, .LBB35_4
; CHECK-RV32-NEXT: # %bb.3:
; CHECK-RV32-NEXT: mv a7, a5
; CHECK-RV32-NEXT: .LBB35_4:
; CHECK-RV32-NEXT: li a6, 16
; CHECK-RV32-NEXT: mv a5, a3
; CHECK-RV32-NEXT: bltu a3, a6, .LBB35_6
; CHECK-RV32-NEXT: .LBB35_4:
; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4
; CHECK-RV32-NEXT: vsetvli zero, a6, e64, m8, ta, ma
; CHECK-RV32-NEXT: vlse64.v v16, (a5), a2, v0.t
; CHECK-RV32-NEXT: addi a5, a3, -16
; CHECK-RV32-NEXT: sltu a6, a3, a5
; CHECK-RV32-NEXT: addi a6, a6, -1
; CHECK-RV32-NEXT: and a5, a6, a5
; CHECK-RV32-NEXT: bltu a3, a4, .LBB35_6
; CHECK-RV32-NEXT: # %bb.5:
; CHECK-RV32-NEXT: li a5, 16
; CHECK-RV32-NEXT: li a3, 16
; CHECK-RV32-NEXT: .LBB35_6:
; CHECK-RV32-NEXT: mul t0, a5, a2
; CHECK-RV32-NEXT: add t0, a1, t0
; CHECK-RV32-NEXT: mul a4, a3, a2
; CHECK-RV32-NEXT: add a4, a1, a4
; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2
; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma
; CHECK-RV32-NEXT: vlse64.v v16, (t0), a2, v0.t
; CHECK-RV32-NEXT: addi t0, a4, -32
; CHECK-RV32-NEXT: li a7, 0
; CHECK-RV32-NEXT: bltu a4, t0, .LBB35_8
; CHECK-RV32-NEXT: # %bb.7:
; CHECK-RV32-NEXT: mv a7, t0
; CHECK-RV32-NEXT: .LBB35_8:
; CHECK-RV32-NEXT: bltu a7, a6, .LBB35_10
; CHECK-RV32-NEXT: # %bb.9:
; CHECK-RV32-NEXT: li a7, 16
; CHECK-RV32-NEXT: .LBB35_10:
; CHECK-RV32-NEXT: mul a3, a3, a2
; CHECK-RV32-NEXT: add a3, a1, a3
; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4
; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma
; CHECK-RV32-NEXT: vlse64.v v24, (a3), a2, v0.t
; CHECK-RV32-NEXT: vsetvli zero, a5, e64, m8, ta, ma
; CHECK-RV32-NEXT: vlse64.v v24, (a4), a2, v0.t
; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV32-NEXT: vmv1r.v v0, v8
; CHECK-RV32-NEXT: vlse64.v v8, (a1), a2, v0.t
; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-RV32-NEXT: vse64.v v8, (a0)
; CHECK-RV32-NEXT: addi a1, a0, 256
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
; CHECK-RV32-NEXT: vse64.v v24, (a1)
; CHECK-RV32-NEXT: vse64.v v16, (a1)
; CHECK-RV32-NEXT: addi a0, a0, 128
; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-RV32-NEXT: vse64.v v16, (a0)
; CHECK-RV32-NEXT: vse64.v v24, (a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64-LABEL: strided_load_v33f64:
Expand All @@ -773,51 +764,46 @@ define <33 x double> @strided_load_v33f64(double* %ptr, i64 %stride, <33 x i1> %
; CHECK-RV64-NEXT: # %bb.1:
; CHECK-RV64-NEXT: li a4, 32
; CHECK-RV64-NEXT: .LBB35_2:
; CHECK-RV64-NEXT: addi a5, a4, -16
; CHECK-RV64-NEXT: li a7, 0
; CHECK-RV64-NEXT: bltu a4, a5, .LBB35_4
; CHECK-RV64-NEXT: mul a5, a4, a2
; CHECK-RV64-NEXT: addi a6, a3, -32
; CHECK-RV64-NEXT: sltu a3, a3, a6
; CHECK-RV64-NEXT: addi a3, a3, -1
; CHECK-RV64-NEXT: and a6, a3, a6
; CHECK-RV64-NEXT: li a3, 16
; CHECK-RV64-NEXT: add a5, a1, a5
; CHECK-RV64-NEXT: bltu a6, a3, .LBB35_4
; CHECK-RV64-NEXT: # %bb.3:
; CHECK-RV64-NEXT: mv a7, a5
; CHECK-RV64-NEXT: .LBB35_4:
; CHECK-RV64-NEXT: li a6, 16
; CHECK-RV64-NEXT: mv a5, a4
; CHECK-RV64-NEXT: bltu a4, a6, .LBB35_6
; CHECK-RV64-NEXT: .LBB35_4:
; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4
; CHECK-RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma
; CHECK-RV64-NEXT: vlse64.v v16, (a5), a2, v0.t
; CHECK-RV64-NEXT: addi a5, a4, -16
; CHECK-RV64-NEXT: sltu a6, a4, a5
; CHECK-RV64-NEXT: addi a6, a6, -1
; CHECK-RV64-NEXT: and a5, a6, a5
; CHECK-RV64-NEXT: bltu a4, a3, .LBB35_6
; CHECK-RV64-NEXT: # %bb.5:
; CHECK-RV64-NEXT: li a5, 16
; CHECK-RV64-NEXT: li a4, 16
; CHECK-RV64-NEXT: .LBB35_6:
; CHECK-RV64-NEXT: mul t0, a5, a2
; CHECK-RV64-NEXT: add t0, a1, t0
; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2
; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma
; CHECK-RV64-NEXT: vlse64.v v16, (t0), a2, v0.t
; CHECK-RV64-NEXT: addi t0, a3, -32
; CHECK-RV64-NEXT: li a7, 0
; CHECK-RV64-NEXT: bltu a3, t0, .LBB35_8
; CHECK-RV64-NEXT: # %bb.7:
; CHECK-RV64-NEXT: mv a7, t0
; CHECK-RV64-NEXT: .LBB35_8:
; CHECK-RV64-NEXT: bltu a7, a6, .LBB35_10
; CHECK-RV64-NEXT: # %bb.9:
; CHECK-RV64-NEXT: li a7, 16
; CHECK-RV64-NEXT: .LBB35_10:
; CHECK-RV64-NEXT: mul a3, a4, a2
; CHECK-RV64-NEXT: add a3, a1, a3
; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4
; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma
; CHECK-RV64-NEXT: vlse64.v v24, (a3), a2, v0.t
; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2
; CHECK-RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma
; CHECK-RV64-NEXT: vlse64.v v24, (a3), a2, v0.t
; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma
; CHECK-RV64-NEXT: vmv1r.v v0, v8
; CHECK-RV64-NEXT: vlse64.v v8, (a1), a2, v0.t
; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-RV64-NEXT: vse64.v v8, (a0)
; CHECK-RV64-NEXT: addi a1, a0, 256
; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m8, ta, ma
; CHECK-RV64-NEXT: vse64.v v24, (a1)
; CHECK-RV64-NEXT: vse64.v v16, (a1)
; CHECK-RV64-NEXT: addi a0, a0, 128
; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-RV64-NEXT: vse64.v v16, (a0)
; CHECK-RV64-NEXT: vse64.v v24, (a0)
; CHECK-RV64-NEXT: ret
%v = call <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0f64.i64(double* %ptr, i64 %stride, <33 x i1> %mask, i32 %evl)
ret <33 x double> %v
Expand Down
64 changes: 28 additions & 36 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -503,19 +503,17 @@ define void @strided_store_v32f64(<32 x double> %v, double* %ptr, i32 signext %s
; CHECK-RV32-NEXT: # %bb.1:
; CHECK-RV32-NEXT: li a3, 16
; CHECK-RV32-NEXT: .LBB27_2:
; CHECK-RV32-NEXT: li a4, 0
; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV32-NEXT: addi a5, a2, -16
; CHECK-RV32-NEXT: vsse64.v v8, (a0), a1, v0.t
; CHECK-RV32-NEXT: bltu a2, a5, .LBB27_4
; CHECK-RV32-NEXT: # %bb.3:
; CHECK-RV32-NEXT: mv a4, a5
; CHECK-RV32-NEXT: .LBB27_4:
; CHECK-RV32-NEXT: mul a2, a3, a1
; CHECK-RV32-NEXT: add a0, a0, a2
; CHECK-RV32-NEXT: mul a3, a3, a1
; CHECK-RV32-NEXT: add a0, a0, a3
; CHECK-RV32-NEXT: addi a3, a2, -16
; CHECK-RV32-NEXT: sltu a2, a2, a3
; CHECK-RV32-NEXT: addi a2, a2, -1
; CHECK-RV32-NEXT: and a2, a2, a3
; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-RV32-NEXT: vslidedown.vi v0, v0, 2
; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma
; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-RV32-NEXT: vsse64.v v16, (a0), a1, v0.t
; CHECK-RV32-NEXT: ret
;
Expand All @@ -527,19 +525,17 @@ define void @strided_store_v32f64(<32 x double> %v, double* %ptr, i32 signext %s
; CHECK-RV64-NEXT: # %bb.1:
; CHECK-RV64-NEXT: li a3, 16
; CHECK-RV64-NEXT: .LBB27_2:
; CHECK-RV64-NEXT: li a4, 0
; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV64-NEXT: addi a5, a2, -16
; CHECK-RV64-NEXT: vsse64.v v8, (a0), a1, v0.t
; CHECK-RV64-NEXT: bltu a2, a5, .LBB27_4
; CHECK-RV64-NEXT: # %bb.3:
; CHECK-RV64-NEXT: mv a4, a5
; CHECK-RV64-NEXT: .LBB27_4:
; CHECK-RV64-NEXT: mul a2, a3, a1
; CHECK-RV64-NEXT: add a0, a0, a2
; CHECK-RV64-NEXT: mul a3, a3, a1
; CHECK-RV64-NEXT: add a0, a0, a3
; CHECK-RV64-NEXT: addi a3, a2, -16
; CHECK-RV64-NEXT: sltu a2, a2, a3
; CHECK-RV64-NEXT: addi a2, a2, -1
; CHECK-RV64-NEXT: and a2, a2, a3
; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-RV64-NEXT: vslidedown.vi v0, v0, 2
; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma
; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-RV64-NEXT: vsse64.v v16, (a0), a1, v0.t
; CHECK-RV64-NEXT: ret
call void @llvm.experimental.vp.strided.store.v32f64.p0f64.i32(<32 x double> %v, double* %ptr, i32 %stride, <32 x i1> %mask, i32 %evl)
Expand All @@ -555,17 +551,15 @@ define void @strided_store_v32f64_allones_mask(<32 x double> %v, double *%ptr, i
; CHECK-RV32-NEXT: # %bb.1:
; CHECK-RV32-NEXT: li a3, 16
; CHECK-RV32-NEXT: .LBB28_2:
; CHECK-RV32-NEXT: li a4, 0
; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV32-NEXT: addi a5, a2, -16
; CHECK-RV32-NEXT: vsse64.v v8, (a0), a1
; CHECK-RV32-NEXT: bltu a2, a5, .LBB28_4
; CHECK-RV32-NEXT: # %bb.3:
; CHECK-RV32-NEXT: mv a4, a5
; CHECK-RV32-NEXT: .LBB28_4:
; CHECK-RV32-NEXT: mul a2, a3, a1
; CHECK-RV32-NEXT: add a0, a0, a2
; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma
; CHECK-RV32-NEXT: mul a3, a3, a1
; CHECK-RV32-NEXT: add a0, a0, a3
; CHECK-RV32-NEXT: addi a3, a2, -16
; CHECK-RV32-NEXT: sltu a2, a2, a3
; CHECK-RV32-NEXT: addi a2, a2, -1
; CHECK-RV32-NEXT: and a2, a2, a3
; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-RV32-NEXT: vsse64.v v16, (a0), a1
; CHECK-RV32-NEXT: ret
;
Expand All @@ -577,17 +571,15 @@ define void @strided_store_v32f64_allones_mask(<32 x double> %v, double *%ptr, i
; CHECK-RV64-NEXT: # %bb.1:
; CHECK-RV64-NEXT: li a3, 16
; CHECK-RV64-NEXT: .LBB28_2:
; CHECK-RV64-NEXT: li a4, 0
; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV64-NEXT: addi a5, a2, -16
; CHECK-RV64-NEXT: vsse64.v v8, (a0), a1
; CHECK-RV64-NEXT: bltu a2, a5, .LBB28_4
; CHECK-RV64-NEXT: # %bb.3:
; CHECK-RV64-NEXT: mv a4, a5
; CHECK-RV64-NEXT: .LBB28_4:
; CHECK-RV64-NEXT: mul a2, a3, a1
; CHECK-RV64-NEXT: add a0, a0, a2
; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma
; CHECK-RV64-NEXT: mul a3, a3, a1
; CHECK-RV64-NEXT: add a0, a0, a3
; CHECK-RV64-NEXT: addi a3, a2, -16
; CHECK-RV64-NEXT: sltu a2, a2, a3
; CHECK-RV64-NEXT: addi a2, a2, -1
; CHECK-RV64-NEXT: and a2, a2, a3
; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-RV64-NEXT: vsse64.v v16, (a0), a1
; CHECK-RV64-NEXT: ret
%one = insertelement <32 x i1> poison, i1 true, i32 0
Expand Down
39 changes: 18 additions & 21 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -309,21 +309,19 @@ define <32 x double> @vuitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 ze
; CHECK-LABEL: vuitofp_v32f64_v32i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: bltu a0, a2, .LBB25_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB25_2:
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a2, a0, a1
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB25_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: bltu a0, a1, .LBB25_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB25_4:
; CHECK-NEXT: .LBB25_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
Expand All @@ -335,21 +333,20 @@ define <32 x double> @vuitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 ze
define <32 x double> @vuitofp_v32f64_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
; CHECK-LABEL: vuitofp_v32f64_v32i64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: bltu a0, a1, .LBB26_2
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfcvt.f.xu.v v16, v16
; CHECK-NEXT: bltu a0, a1, .LBB26_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB26_4:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.f.xu.v v8, v8
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.f.xu.v v16, v16
; CHECK-NEXT: ret
%v = call <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl)
ret <32 x double> %v
Expand Down
126 changes: 58 additions & 68 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -413,23 +413,20 @@ declare <256 x i8> @llvm.vp.add.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32)
define <256 x i8> @vadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vadd_vi_v258i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vlm.v v25, (a0)
; CHECK-NEXT: addi a3, a1, -128
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: bltu a1, a3, .LBB32_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a3
; CHECK-NEXT: .LBB32_2:
; CHECK-NEXT: vlm.v v0, (a0)
; CHECK-NEXT: addi a0, a1, -128
; CHECK-NEXT: sltu a3, a1, a0
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a0, a3, a0
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
; CHECK-NEXT: bltu a1, a2, .LBB32_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: bltu a1, a2, .LBB32_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: .LBB32_4:
; CHECK-NEXT: .LBB32_2:
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
Expand All @@ -443,21 +440,20 @@ define <256 x i8> @vadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %ev
define <256 x i8> @vadd_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vadd_vi_v258i8_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, a0, -128
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: bltu a0, a1, .LBB33_2
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB33_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB33_2:
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: vadd.vi v16, v16, -1
; CHECK-NEXT: bltu a0, a1, .LBB33_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a0, 128
; CHECK-NEXT: .LBB33_4:
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
; CHECK-NEXT: .LBB33_2:
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vadd.vi v8, v8, -1
; CHECK-NEXT: addi a1, a0, -128
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
; CHECK-NEXT: vadd.vi v16, v16, -1
; CHECK-NEXT: ret
%elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
%vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
Expand Down Expand Up @@ -1533,24 +1529,22 @@ define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
; RV32-LABEL: vadd_vx_v32i64:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v1, v0
; RV32-NEXT: li a1, 0
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: addi a2, a0, -16
; RV32-NEXT: li a1, 32
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vmv.v.i v24, -1
; RV32-NEXT: bltu a0, a2, .LBB108_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a1, a2
; RV32-NEXT: .LBB108_2:
; RV32-NEXT: addi a1, a0, -16
; RV32-NEXT: sltu a2, a0, a1
; RV32-NEXT: addi a2, a2, -1
; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: li a1, 16
; RV32-NEXT: vadd.vv v16, v16, v24, v0.t
; RV32-NEXT: bltu a0, a1, .LBB108_4
; RV32-NEXT: # %bb.3:
; RV32-NEXT: bltu a0, a1, .LBB108_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: li a0, 16
; RV32-NEXT: .LBB108_4:
; RV32-NEXT: .LBB108_2:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
Expand All @@ -1559,21 +1553,19 @@ define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
; RV64-LABEL: vadd_vx_v32i64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v24, v0
; RV64-NEXT: li a1, 0
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64-NEXT: addi a2, a0, -16
; RV64-NEXT: vslidedown.vi v0, v0, 2
; RV64-NEXT: bltu a0, a2, .LBB108_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a1, a2
; RV64-NEXT: .LBB108_2:
; RV64-NEXT: addi a1, a0, -16
; RV64-NEXT: sltu a2, a0, a1
; RV64-NEXT: addi a2, a2, -1
; RV64-NEXT: and a1, a2, a1
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: li a1, 16
; RV64-NEXT: vadd.vi v16, v16, -1, v0.t
; RV64-NEXT: bltu a0, a1, .LBB108_4
; RV64-NEXT: # %bb.3:
; RV64-NEXT: bltu a0, a1, .LBB108_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a0, 16
; RV64-NEXT: .LBB108_4:
; RV64-NEXT: .LBB108_2:
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vadd.vi v8, v8, -1, v0.t
Expand All @@ -1587,43 +1579,41 @@ define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vadd_vi_v32i64_unmasked:
; RV32: # %bb.0:
; RV32-NEXT: li a1, 0
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: addi a2, a0, -16
; RV32-NEXT: li a1, 32
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: li a2, 16
; RV32-NEXT: vmv.v.i v24, -1
; RV32-NEXT: mv a1, a0
; RV32-NEXT: bltu a0, a2, .LBB109_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a1, a2
; RV32-NEXT: li a1, 16
; RV32-NEXT: .LBB109_2:
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: li a1, 16
; RV32-NEXT: vadd.vv v16, v16, v24
; RV32-NEXT: bltu a0, a1, .LBB109_4
; RV32-NEXT: # %bb.3:
; RV32-NEXT: li a0, 16
; RV32-NEXT: .LBB109_4:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vadd.vv v8, v8, v24
; RV32-NEXT: addi a1, a0, -16
; RV32-NEXT: sltu a0, a0, a1
; RV32-NEXT: addi a0, a0, -1
; RV32-NEXT: and a0, a0, a1
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vadd.vv v16, v16, v24
; RV32-NEXT: ret
;
; RV64-LABEL: vadd_vi_v32i64_unmasked:
; RV64: # %bb.0:
; RV64-NEXT: addi a1, a0, -16
; RV64-NEXT: li a2, 0
; RV64-NEXT: bltu a0, a1, .LBB109_2
; RV64-NEXT: li a2, 16
; RV64-NEXT: mv a1, a0
; RV64-NEXT: bltu a0, a2, .LBB109_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a2, a1
; RV64-NEXT: .LBB109_2:
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: li a1, 16
; RV64-NEXT: vadd.vi v16, v16, -1
; RV64-NEXT: bltu a0, a1, .LBB109_4
; RV64-NEXT: # %bb.3:
; RV64-NEXT: li a0, 16
; RV64-NEXT: .LBB109_4:
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: .LBB109_2:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vadd.vi v8, v8, -1
; RV64-NEXT: addi a1, a0, -16
; RV64-NEXT: sltu a0, a0, a1
; RV64-NEXT: addi a0, a0, -1
; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vadd.vi v16, v16, -1
; RV64-NEXT: ret
%elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
%vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
Expand Down
79 changes: 32 additions & 47 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -324,59 +324,45 @@ define <32 x double> @vfsgnj_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a3, 24
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a3, a2, -16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: bltu a2, a3, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a3
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vle64.v v8, (a1)
; CHECK-NEXT: addi a1, a2, -16
; CHECK-NEXT: sltu a3, a2, a1
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a1, a3, a1
; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vfsgnj.vv v16, v16, v8, v0.t
; CHECK-NEXT: bltu a2, a0, .LBB26_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: bltu a2, a0, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: .LBB26_4:
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfsgnj.vv v8, v8, v24, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
Expand All @@ -390,22 +376,21 @@ define <32 x double> @vfsgnj_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: addi a3, a2, -16
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: bltu a2, a3, .LBB27_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a3
; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: vle64.v v0, (a0)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: bltu a2, a1, .LBB27_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: vfsgnj.vv v16, v16, v24
; CHECK-NEXT: bltu a2, a0, .LBB27_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: .LBB27_4:
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfsgnj.vv v8, v8, v0
; CHECK-NEXT: addi a0, a2, -16
; CHECK-NEXT: sltu a1, a2, a0
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a0, a1, a0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfsgnj.vv v16, v16, v24
; CHECK-NEXT: ret
%head = insertelement <32 x i1> poison, i1 true, i32 0
%m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
Expand Down
39 changes: 18 additions & 21 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -322,21 +322,19 @@ define <32 x double> @vfabs_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe
; CHECK-LABEL: vfabs_vv_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a2, a0, a1
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfabs.v v16, v16, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB26_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: bltu a0, a1, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB26_4:
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfabs.v v8, v8, v0.t
Expand All @@ -348,21 +346,20 @@ define <32 x double> @vfabs_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe
define <32 x double> @vfabs_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_v32f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: bltu a0, a1, .LBB27_2
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB27_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfabs.v v16, v16
; CHECK-NEXT: bltu a0, a1, .LBB27_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB27_4:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v8, v8
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v16
; CHECK-NEXT: ret
%head = insertelement <32 x i1> poison, i1 true, i32 0
%m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
Expand Down
156 changes: 63 additions & 93 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -657,109 +657,80 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a3, 48
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: addi a1, a2, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a3, 24
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a3, 40
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a3, a4, -16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: li a3, 24
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: bltu a4, a3, .LBB50_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a3
; CHECK-NEXT: .LBB50_2:
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v8, (a2)
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 4
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a2, 24
; CHECK-NEXT: mul a1, a1, a2
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, a2, 128
; CHECK-NEXT: addi a2, a4, -16
; CHECK-NEXT: sltu a3, a4, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: addi a3, a0, 128
; CHECK-NEXT: vle64.v v16, (a1)
; CHECK-NEXT: vle64.v v8, (a3)
; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a2, 40
; CHECK-NEXT: mul a1, a1, a2
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a2, 40
; CHECK-NEXT: mul a1, a1, a2
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a4, a0, .LBB50_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a4, a0, .LBB50_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a4, 16
; CHECK-NEXT: .LBB50_4:
; CHECK-NEXT: .LBB50_2:
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t
; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 40
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 48
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
Expand All @@ -780,50 +751,49 @@ define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: addi a3, a4, -16
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vle64.v v24, (a2)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: bltu a4, a3, .LBB51_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a3
; CHECK-NEXT: .LBB51_2:
; CHECK-NEXT: vle64.v v8, (a2)
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vle64.v v0, (a0)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: mv a0, a4
; CHECK-NEXT: bltu a4, a1, .LBB51_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v24, v16, v8
; CHECK-NEXT: bltu a4, a0, .LBB51_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a4, 16
; CHECK-NEXT: .LBB51_4:
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
; CHECK-NEXT: .LBB51_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v0, v8, v24
; CHECK-NEXT: addi a0, a4, -16
; CHECK-NEXT: sltu a1, a4, a0
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a0, a1, a0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v0, v16, v8
; CHECK-NEXT: vmv.v.v v8, v0
; CHECK-NEXT: vmv8r.v v16, v24
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v24, v16, v8
; CHECK-NEXT: vmv8r.v v8, v0
; CHECK-NEXT: vmv.v.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
Expand Down
79 changes: 32 additions & 47 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -324,59 +324,45 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a3, 24
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a3, a2, -16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: bltu a2, a3, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a3
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vle64.v v8, (a1)
; CHECK-NEXT: addi a1, a2, -16
; CHECK-NEXT: sltu a3, a2, a1
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a1, a3, a1
; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vfmax.vv v16, v16, v8, v0.t
; CHECK-NEXT: bltu a2, a0, .LBB26_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: bltu a2, a0, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: .LBB26_4:
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmax.vv v8, v24, v8, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
Expand All @@ -390,22 +376,21 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: addi a3, a2, -16
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: bltu a2, a3, .LBB27_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a3
; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: vle64.v v0, (a0)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: bltu a2, a1, .LBB27_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: vfmax.vv v16, v16, v24
; CHECK-NEXT: bltu a2, a0, .LBB27_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: .LBB27_4:
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfmax.vv v8, v8, v0
; CHECK-NEXT: addi a0, a2, -16
; CHECK-NEXT: sltu a1, a2, a0
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a0, a1, a0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfmax.vv v16, v16, v24
; CHECK-NEXT: ret
%head = insertelement <32 x i1> poison, i1 true, i32 0
%m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
Expand Down
79 changes: 32 additions & 47 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -324,59 +324,45 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a3, 24
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a3, a2, -16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: bltu a2, a3, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a3
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vle64.v v8, (a1)
; CHECK-NEXT: addi a1, a2, -16
; CHECK-NEXT: sltu a3, a2, a1
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a1, a3, a1
; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vfmin.vv v16, v16, v8, v0.t
; CHECK-NEXT: bltu a2, a0, .LBB26_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: bltu a2, a0, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: .LBB26_4:
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmin.vv v8, v24, v8, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
Expand All @@ -390,22 +376,21 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: addi a3, a2, -16
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: bltu a2, a3, .LBB27_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a3
; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: vle64.v v0, (a0)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: bltu a2, a1, .LBB27_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: vfmin.vv v16, v16, v24
; CHECK-NEXT: bltu a2, a0, .LBB27_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: .LBB27_4:
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfmin.vv v8, v8, v0
; CHECK-NEXT: addi a0, a2, -16
; CHECK-NEXT: sltu a1, a2, a0
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a0, a1, a0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfmin.vv v16, v16, v24
; CHECK-NEXT: ret
%head = insertelement <32 x i1> poison, i1 true, i32 0
%m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
Expand Down
156 changes: 63 additions & 93 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -657,109 +657,80 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a3, 48
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: addi a1, a2, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a3, 24
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a3, 40
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a3, a4, -16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: li a3, 24
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: bltu a4, a3, .LBB50_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a3
; CHECK-NEXT: .LBB50_2:
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v8, (a2)
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 4
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a2, 24
; CHECK-NEXT: mul a1, a1, a2
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, a2, 128
; CHECK-NEXT: addi a2, a4, -16
; CHECK-NEXT: sltu a3, a4, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: addi a3, a0, 128
; CHECK-NEXT: vle64.v v16, (a1)
; CHECK-NEXT: vle64.v v8, (a3)
; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a2, 40
; CHECK-NEXT: mul a1, a1, a2
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a2, 40
; CHECK-NEXT: mul a1, a1, a2
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a4, a0, .LBB50_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a4, a0, .LBB50_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a4, 16
; CHECK-NEXT: .LBB50_4:
; CHECK-NEXT: .LBB50_2:
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t
; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 40
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 48
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
Expand All @@ -780,50 +751,49 @@ define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: addi a3, a4, -16
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vle64.v v24, (a2)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: bltu a4, a3, .LBB51_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a3
; CHECK-NEXT: .LBB51_2:
; CHECK-NEXT: vle64.v v8, (a2)
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vle64.v v0, (a0)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: mv a0, a4
; CHECK-NEXT: bltu a4, a1, .LBB51_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v24, v16, v8
; CHECK-NEXT: bltu a4, a0, .LBB51_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a4, 16
; CHECK-NEXT: .LBB51_4:
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
; CHECK-NEXT: .LBB51_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v0, v8, v24
; CHECK-NEXT: addi a0, a4, -16
; CHECK-NEXT: sltu a1, a4, a0
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a0, a1, a0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v0, v16, v8
; CHECK-NEXT: vmv.v.v v8, v0
; CHECK-NEXT: vmv8r.v v16, v24
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v24, v16, v8
; CHECK-NEXT: vmv8r.v v8, v0
; CHECK-NEXT: vmv.v.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
Expand Down
39 changes: 18 additions & 21 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -322,21 +322,19 @@ define <32 x double> @vfneg_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe
; CHECK-LABEL: vfneg_vv_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a2, a0, a1
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfneg.v v16, v16, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB26_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: bltu a0, a1, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB26_4:
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfneg.v v8, v8, v0.t
Expand All @@ -348,21 +346,20 @@ define <32 x double> @vfneg_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe
define <32 x double> @vfneg_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfneg_vv_v32f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: bltu a0, a1, .LBB27_2
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB27_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfneg.v v16, v16
; CHECK-NEXT: bltu a0, a1, .LBB27_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB27_4:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfneg.v v8, v8
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfneg.v v16, v16
; CHECK-NEXT: ret
%head = insertelement <32 x i1> poison, i1 true, i32 0
%m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
Expand Down
39 changes: 18 additions & 21 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -322,21 +322,19 @@ define <32 x double> @vfsqrt_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zero
; CHECK-LABEL: vfsqrt_vv_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a2, a0, a1
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a1, a2, a1
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfsqrt.v v16, v16, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB26_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: bltu a0, a1, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB26_4:
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
Expand All @@ -348,21 +346,20 @@ define <32 x double> @vfsqrt_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zero
define <32 x double> @vfsqrt_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfsqrt_vv_v32f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: bltu a0, a1, .LBB27_2
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB27_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfsqrt.v v16, v16
; CHECK-NEXT: bltu a0, a1, .LBB27_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB27_4:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfsqrt.v v8, v8
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfsqrt.v v16, v16
; CHECK-NEXT: ret
%head = insertelement <32 x i1> poison, i1 true, i32 0
%m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
Expand Down
80 changes: 36 additions & 44 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -288,23 +288,20 @@ declare <256 x i8> @llvm.vp.smax.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32)
define <256 x i8> @vmax_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmax_vx_v258i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; CHECK-NEXT: vlm.v v25, (a1)
; CHECK-NEXT: addi a4, a2, -128
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: bltu a2, a4, .LBB22_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a4
; CHECK-NEXT: .LBB22_2:
; CHECK-NEXT: vlm.v v0, (a1)
; CHECK-NEXT: addi a1, a2, -128
; CHECK-NEXT: sltu a4, a2, a1
; CHECK-NEXT: addi a4, a4, -1
; CHECK-NEXT: and a1, a4, a1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t
; CHECK-NEXT: bltu a2, a3, .LBB22_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: bltu a2, a3, .LBB22_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: .LBB22_4:
; CHECK-NEXT: .LBB22_2:
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t
Expand All @@ -318,21 +315,20 @@ define <256 x i8> @vmax_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zero
define <256 x i8> @vmax_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext %evl) {
; CHECK-LABEL: vmax_vx_v258i8_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, a1, -128
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: bltu a1, a2, .LBB23_2
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: bltu a1, a3, .LBB23_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a3, a2
; CHECK-NEXT: .LBB23_2:
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: vmax.vx v16, v16, a0
; CHECK-NEXT: bltu a1, a2, .LBB23_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: .LBB23_4:
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: .LBB23_2:
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmax.vx v8, v8, a0
; CHECK-NEXT: addi a2, a1, -128
; CHECK-NEXT: sltu a1, a1, a2
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a1, a1, a2
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmax.vx v16, v16, a0
; CHECK-NEXT: ret
%elt.head = insertelement <256 x i8> poison, i8 %b, i32 0
%vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
Expand Down Expand Up @@ -1100,24 +1096,22 @@ define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
; RV32-LABEL: vmax_vx_v32i64:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v1, v0
; RV32-NEXT: li a1, 0
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: addi a2, a0, -16
; RV32-NEXT: li a1, 32
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vmv.v.i v24, -1
; RV32-NEXT: bltu a0, a2, .LBB74_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a1, a2
; RV32-NEXT: .LBB74_2:
; RV32-NEXT: addi a1, a0, -16
; RV32-NEXT: sltu a2, a0, a1
; RV32-NEXT: addi a2, a2, -1
; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: li a1, 16
; RV32-NEXT: vmax.vv v16, v16, v24, v0.t
; RV32-NEXT: bltu a0, a1, .LBB74_4
; RV32-NEXT: # %bb.3:
; RV32-NEXT: bltu a0, a1, .LBB74_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: li a0, 16
; RV32-NEXT: .LBB74_4:
; RV32-NEXT: .LBB74_2:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: vmax.vv v8, v8, v24, v0.t
Expand All @@ -1126,22 +1120,20 @@ define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
; RV64-LABEL: vmax_vx_v32i64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v24, v0
; RV64-NEXT: li a2, 0
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64-NEXT: addi a1, a0, -16
; RV64-NEXT: vslidedown.vi v0, v0, 2
; RV64-NEXT: bltu a0, a1, .LBB74_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a2, a1
; RV64-NEXT: .LBB74_2:
; RV64-NEXT: addi a1, a0, -16
; RV64-NEXT: sltu a2, a0, a1
; RV64-NEXT: addi a2, a2, -1
; RV64-NEXT: and a2, a2, a1
; RV64-NEXT: li a1, -1
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: li a2, 16
; RV64-NEXT: vmax.vx v16, v16, a1, v0.t
; RV64-NEXT: bltu a0, a2, .LBB74_4
; RV64-NEXT: # %bb.3:
; RV64-NEXT: bltu a0, a2, .LBB74_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a0, 16
; RV64-NEXT: .LBB74_4:
; RV64-NEXT: .LBB74_2:
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vmax.vx v8, v8, a1, v0.t
Expand Down
80 changes: 36 additions & 44 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -287,23 +287,20 @@ declare <256 x i8> @llvm.vp.umax.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32)
define <256 x i8> @vmaxu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmaxu_vx_v258i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; CHECK-NEXT: vlm.v v25, (a1)
; CHECK-NEXT: addi a4, a2, -128
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: bltu a2, a4, .LBB22_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a4
; CHECK-NEXT: .LBB22_2:
; CHECK-NEXT: vlm.v v0, (a1)
; CHECK-NEXT: addi a1, a2, -128
; CHECK-NEXT: sltu a4, a2, a1
; CHECK-NEXT: addi a4, a4, -1
; CHECK-NEXT: and a1, a4, a1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t
; CHECK-NEXT: bltu a2, a3, .LBB22_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: bltu a2, a3, .LBB22_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: .LBB22_4:
; CHECK-NEXT: .LBB22_2:
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t
Expand All @@ -317,21 +314,20 @@ define <256 x i8> @vmaxu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zer
define <256 x i8> @vmaxu_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext %evl) {
; CHECK-LABEL: vmaxu_vx_v258i8_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, a1, -128
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: bltu a1, a2, .LBB23_2
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: bltu a1, a3, .LBB23_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a3, a2
; CHECK-NEXT: .LBB23_2:
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: vmaxu.vx v16, v16, a0
; CHECK-NEXT: bltu a1, a2, .LBB23_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: .LBB23_4:
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: .LBB23_2:
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmaxu.vx v8, v8, a0
; CHECK-NEXT: addi a2, a1, -128
; CHECK-NEXT: sltu a1, a1, a2
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a1, a1, a2
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmaxu.vx v16, v16, a0
; CHECK-NEXT: ret
%elt.head = insertelement <256 x i8> poison, i8 %b, i32 0
%vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
Expand Down Expand Up @@ -1099,24 +1095,22 @@ define <32 x i64> @vmaxu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
; RV32-LABEL: vmaxu_vx_v32i64:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v1, v0
; RV32-NEXT: li a1, 0
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: addi a2, a0, -16
; RV32-NEXT: li a1, 32
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vmv.v.i v24, -1
; RV32-NEXT: bltu a0, a2, .LBB74_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a1, a2
; RV32-NEXT: .LBB74_2:
; RV32-NEXT: addi a1, a0, -16
; RV32-NEXT: sltu a2, a0, a1
; RV32-NEXT: addi a2, a2, -1
; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: li a1, 16
; RV32-NEXT: vmaxu.vv v16, v16, v24, v0.t
; RV32-NEXT: bltu a0, a1, .LBB74_4
; RV32-NEXT: # %bb.3:
; RV32-NEXT: bltu a0, a1, .LBB74_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: li a0, 16
; RV32-NEXT: .LBB74_4:
; RV32-NEXT: .LBB74_2:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: vmaxu.vv v8, v8, v24, v0.t
Expand All @@ -1125,22 +1119,20 @@ define <32 x i64> @vmaxu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
; RV64-LABEL: vmaxu_vx_v32i64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v24, v0
; RV64-NEXT: li a2, 0
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64-NEXT: addi a1, a0, -16
; RV64-NEXT: vslidedown.vi v0, v0, 2
; RV64-NEXT: bltu a0, a1, .LBB74_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a2, a1
; RV64-NEXT: .LBB74_2:
; RV64-NEXT: addi a1, a0, -16
; RV64-NEXT: sltu a2, a0, a1
; RV64-NEXT: addi a2, a2, -1
; RV64-NEXT: and a2, a2, a1
; RV64-NEXT: li a1, -1
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: li a2, 16
; RV64-NEXT: vmaxu.vx v16, v16, a1, v0.t
; RV64-NEXT: bltu a0, a2, .LBB74_4
; RV64-NEXT: # %bb.3:
; RV64-NEXT: bltu a0, a2, .LBB74_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a0, 16
; RV64-NEXT: .LBB74_4:
; RV64-NEXT: .LBB74_2:
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vmaxu.vx v8, v8, a1, v0.t
Expand Down
80 changes: 36 additions & 44 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -288,23 +288,20 @@ declare <256 x i8> @llvm.vp.smin.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32)
define <256 x i8> @vmin_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmin_vx_v258i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; CHECK-NEXT: vlm.v v25, (a1)
; CHECK-NEXT: addi a4, a2, -128
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: bltu a2, a4, .LBB22_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a4
; CHECK-NEXT: .LBB22_2:
; CHECK-NEXT: vlm.v v0, (a1)
; CHECK-NEXT: addi a1, a2, -128
; CHECK-NEXT: sltu a4, a2, a1
; CHECK-NEXT: addi a4, a4, -1
; CHECK-NEXT: and a1, a4, a1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t
; CHECK-NEXT: bltu a2, a3, .LBB22_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: bltu a2, a3, .LBB22_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: .LBB22_4:
; CHECK-NEXT: .LBB22_2:
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t
Expand All @@ -318,21 +315,20 @@ define <256 x i8> @vmin_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zero
define <256 x i8> @vmin_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext %evl) {
; CHECK-LABEL: vmin_vx_v258i8_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, a1, -128
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: bltu a1, a2, .LBB23_2
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: bltu a1, a3, .LBB23_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a3, a2
; CHECK-NEXT: .LBB23_2:
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: vmin.vx v16, v16, a0
; CHECK-NEXT: bltu a1, a2, .LBB23_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: .LBB23_4:
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: .LBB23_2:
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: addi a2, a1, -128
; CHECK-NEXT: sltu a1, a1, a2
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a1, a1, a2
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmin.vx v16, v16, a0
; CHECK-NEXT: ret
%elt.head = insertelement <256 x i8> poison, i8 %b, i32 0
%vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
Expand Down Expand Up @@ -1100,24 +1096,22 @@ define <32 x i64> @vmin_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
; RV32-LABEL: vmin_vx_v32i64:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v1, v0
; RV32-NEXT: li a1, 0
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: addi a2, a0, -16
; RV32-NEXT: li a1, 32
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vmv.v.i v24, -1
; RV32-NEXT: bltu a0, a2, .LBB74_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a1, a2
; RV32-NEXT: .LBB74_2:
; RV32-NEXT: addi a1, a0, -16
; RV32-NEXT: sltu a2, a0, a1
; RV32-NEXT: addi a2, a2, -1
; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: li a1, 16
; RV32-NEXT: vmin.vv v16, v16, v24, v0.t
; RV32-NEXT: bltu a0, a1, .LBB74_4
; RV32-NEXT: # %bb.3:
; RV32-NEXT: bltu a0, a1, .LBB74_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: li a0, 16
; RV32-NEXT: .LBB74_4:
; RV32-NEXT: .LBB74_2:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: vmin.vv v8, v8, v24, v0.t
Expand All @@ -1126,22 +1120,20 @@ define <32 x i64> @vmin_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
; RV64-LABEL: vmin_vx_v32i64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v24, v0
; RV64-NEXT: li a2, 0
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64-NEXT: addi a1, a0, -16
; RV64-NEXT: vslidedown.vi v0, v0, 2
; RV64-NEXT: bltu a0, a1, .LBB74_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a2, a1
; RV64-NEXT: .LBB74_2:
; RV64-NEXT: addi a1, a0, -16
; RV64-NEXT: sltu a2, a0, a1
; RV64-NEXT: addi a2, a2, -1
; RV64-NEXT: and a2, a2, a1
; RV64-NEXT: li a1, -1
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: li a2, 16
; RV64-NEXT: vmin.vx v16, v16, a1, v0.t
; RV64-NEXT: bltu a0, a2, .LBB74_4
; RV64-NEXT: # %bb.3:
; RV64-NEXT: bltu a0, a2, .LBB74_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a0, 16
; RV64-NEXT: .LBB74_4:
; RV64-NEXT: .LBB74_2:
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vmin.vx v8, v8, a1, v0.t
Expand Down
80 changes: 36 additions & 44 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -287,23 +287,20 @@ declare <256 x i8> @llvm.vp.umin.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32)
define <256 x i8> @vminu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vminu_vx_v258i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; CHECK-NEXT: vlm.v v25, (a1)
; CHECK-NEXT: addi a4, a2, -128
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: bltu a2, a4, .LBB22_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a4
; CHECK-NEXT: .LBB22_2:
; CHECK-NEXT: vlm.v v0, (a1)
; CHECK-NEXT: addi a1, a2, -128
; CHECK-NEXT: sltu a4, a2, a1
; CHECK-NEXT: addi a4, a4, -1
; CHECK-NEXT: and a1, a4, a1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t
; CHECK-NEXT: bltu a2, a3, .LBB22_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: bltu a2, a3, .LBB22_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: .LBB22_4:
; CHECK-NEXT: .LBB22_2:
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
Expand All @@ -317,21 +314,20 @@ define <256 x i8> @vminu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zer
define <256 x i8> @vminu_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext %evl) {
; CHECK-LABEL: vminu_vx_v258i8_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, a1, -128
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: bltu a1, a2, .LBB23_2
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: bltu a1, a3, .LBB23_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a3, a2
; CHECK-NEXT: .LBB23_2:
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: vminu.vx v16, v16, a0
; CHECK-NEXT: bltu a1, a2, .LBB23_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: .LBB23_4:
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: .LBB23_2:
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vminu.vx v8, v8, a0
; CHECK-NEXT: addi a2, a1, -128
; CHECK-NEXT: sltu a1, a1, a2
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a1, a1, a2
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vminu.vx v16, v16, a0
; CHECK-NEXT: ret
%elt.head = insertelement <256 x i8> poison, i8 %b, i32 0
%vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
Expand Down Expand Up @@ -1099,24 +1095,22 @@ define <32 x i64> @vminu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
; RV32-LABEL: vminu_vx_v32i64:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v1, v0
; RV32-NEXT: li a1, 0
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v0, v0, 2
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: addi a2, a0, -16
; RV32-NEXT: li a1, 32
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vmv.v.i v24, -1
; RV32-NEXT: bltu a0, a2, .LBB74_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a1, a2
; RV32-NEXT: .LBB74_2:
; RV32-NEXT: addi a1, a0, -16
; RV32-NEXT: sltu a2, a0, a1
; RV32-NEXT: addi a2, a2, -1
; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: li a1, 16
; RV32-NEXT: vminu.vv v16, v16, v24, v0.t
; RV32-NEXT: bltu a0, a1, .LBB74_4
; RV32-NEXT: # %bb.3:
; RV32-NEXT: bltu a0, a1, .LBB74_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: li a0, 16
; RV32-NEXT: .LBB74_4:
; RV32-NEXT: .LBB74_2:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: vminu.vv v8, v8, v24, v0.t
Expand All @@ -1125,22 +1119,20 @@ define <32 x i64> @vminu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
; RV64-LABEL: vminu_vx_v32i64:
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v24, v0
; RV64-NEXT: li a2, 0
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64-NEXT: addi a1, a0, -16
; RV64-NEXT: vslidedown.vi v0, v0, 2
; RV64-NEXT: bltu a0, a1, .LBB74_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a2, a1
; RV64-NEXT: .LBB74_2:
; RV64-NEXT: addi a1, a0, -16
; RV64-NEXT: sltu a2, a0, a1
; RV64-NEXT: addi a2, a2, -1
; RV64-NEXT: and a2, a2, a1
; RV64-NEXT: li a1, -1
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: li a2, 16
; RV64-NEXT: vminu.vx v16, v16, a1, v0.t
; RV64-NEXT: bltu a0, a2, .LBB74_4
; RV64-NEXT: # %bb.3:
; RV64-NEXT: bltu a0, a2, .LBB74_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a0, 16
; RV64-NEXT: .LBB74_4:
; RV64-NEXT: .LBB74_2:
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vminu.vx v8, v8, a1, v0.t
Expand Down
Loading