1,118 changes: 637 additions & 481 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll

Large diffs are not rendered by default.

337 changes: 2 additions & 335 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+v,+m,+zbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+zbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s

define i32 @reduce_sum_2xi32(<2 x i32> %v) {
; CHECK-LABEL: reduce_sum_2xi32:
Expand Down Expand Up @@ -448,336 +448,3 @@ define i32 @reduce_sum_16xi32_prefix15(ptr %p) {
%add13 = add i32 %add12, %e14
ret i32 %add13
}

;; Most of the cornercases are exercised above, the following just
;; makes sure that other opcodes work as expected.

define i32 @reduce_xor_16xi32_prefix2(ptr %p) {
; CHECK-LABEL: reduce_xor_16xi32_prefix2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredxor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
%e0 = extractelement <16 x i32> %v, i32 0
%e1 = extractelement <16 x i32> %v, i32 1
%xor0 = xor i32 %e0, %e1
ret i32 %xor0
}

define i32 @reduce_xor_16xi32_prefix5(ptr %p) {
; CHECK-LABEL: reduce_xor_16xi32_prefix5:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 224
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vmv.v.i v8, -1
; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v10, (a0)
; CHECK-NEXT: vsext.vf4 v12, v8
; CHECK-NEXT: vand.vv v8, v10, v12
; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vredxor.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
%e0 = extractelement <16 x i32> %v, i32 0
%e1 = extractelement <16 x i32> %v, i32 1
%e2 = extractelement <16 x i32> %v, i32 2
%e3 = extractelement <16 x i32> %v, i32 3
%e4 = extractelement <16 x i32> %v, i32 4
%xor0 = xor i32 %e0, %e1
%xor1 = xor i32 %xor0, %e2
%xor2 = xor i32 %xor1, %e3
%xor3 = xor i32 %xor2, %e4
ret i32 %xor3
}

define i32 @reduce_and_16xi32_prefix2(ptr %p) {
; CHECK-LABEL: reduce_and_16xi32_prefix2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vredand.vs v8, v8, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
%e0 = extractelement <16 x i32> %v, i32 0
%e1 = extractelement <16 x i32> %v, i32 1
%and0 = and i32 %e0, %e1
ret i32 %and0
}

define i32 @reduce_and_16xi32_prefix5(ptr %p) {
; CHECK-LABEL: reduce_and_16xi32_prefix5:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, -1
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v10, (a0)
; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v10, v8, 5
; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v10, v8, 6
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vslideup.vi v10, v8, 7
; CHECK-NEXT: vredand.vs v8, v10, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
%e0 = extractelement <16 x i32> %v, i32 0
%e1 = extractelement <16 x i32> %v, i32 1
%e2 = extractelement <16 x i32> %v, i32 2
%e3 = extractelement <16 x i32> %v, i32 3
%e4 = extractelement <16 x i32> %v, i32 4
%and0 = and i32 %e0, %e1
%and1 = and i32 %and0, %e2
%and2 = and i32 %and1, %e3
%and3 = and i32 %and2, %e4
ret i32 %and3
}

define i32 @reduce_or_16xi32_prefix2(ptr %p) {
; CHECK-LABEL: reduce_or_16xi32_prefix2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vredor.vs v8, v8, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
%e0 = extractelement <16 x i32> %v, i32 0
%e1 = extractelement <16 x i32> %v, i32 1
%or0 = or i32 %e0, %e1
ret i32 %or0
}

define i32 @reduce_or_16xi32_prefix5(ptr %p) {
; CHECK-LABEL: reduce_or_16xi32_prefix5:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 224
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vmv.v.i v8, -1
; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v10, (a0)
; CHECK-NEXT: vsext.vf4 v12, v8
; CHECK-NEXT: vand.vv v8, v10, v12
; CHECK-NEXT: vredor.vs v8, v8, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
%e0 = extractelement <16 x i32> %v, i32 0
%e1 = extractelement <16 x i32> %v, i32 1
%e2 = extractelement <16 x i32> %v, i32 2
%e3 = extractelement <16 x i32> %v, i32 3
%e4 = extractelement <16 x i32> %v, i32 4
%or0 = or i32 %e0, %e1
%or1 = or i32 %or0, %e2
%or2 = or i32 %or1, %e3
%or3 = or i32 %or2, %e4
ret i32 %or3
}

declare i32 @llvm.smax.i32(i32 %a, i32 %b)
declare i32 @llvm.smin.i32(i32 %a, i32 %b)
declare i32 @llvm.umax.i32(i32 %a, i32 %b)
declare i32 @llvm.umin.i32(i32 %a, i32 %b)

define i32 @reduce_smax_16xi32_prefix2(ptr %p) {
; CHECK-LABEL: reduce_smax_16xi32_prefix2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vredmax.vs v8, v8, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
%e0 = extractelement <16 x i32> %v, i32 0
%e1 = extractelement <16 x i32> %v, i32 1
%smax0 = call i32 @llvm.smax.i32(i32 %e0, i32 %e1)
ret i32 %smax0
}

define i32 @reduce_smax_16xi32_prefix5(ptr %p) {
; CHECK-LABEL: reduce_smax_16xi32_prefix5:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, 524288
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 5
; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 6
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 7
; CHECK-NEXT: vredmax.vs v8, v8, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
%e0 = extractelement <16 x i32> %v, i32 0
%e1 = extractelement <16 x i32> %v, i32 1
%e2 = extractelement <16 x i32> %v, i32 2
%e3 = extractelement <16 x i32> %v, i32 3
%e4 = extractelement <16 x i32> %v, i32 4
%smax0 = call i32 @llvm.smax.i32(i32 %e0, i32 %e1)
%smax1 = call i32 @llvm.smax.i32(i32 %smax0, i32 %e2)
%smax2 = call i32 @llvm.smax.i32(i32 %smax1, i32 %e3)
%smax3 = call i32 @llvm.smax.i32(i32 %smax2, i32 %e4)
ret i32 %smax3
}

define i32 @reduce_smin_16xi32_prefix2(ptr %p) {
; CHECK-LABEL: reduce_smin_16xi32_prefix2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vredmin.vs v8, v8, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
%e0 = extractelement <16 x i32> %v, i32 0
%e1 = extractelement <16 x i32> %v, i32 1
%smin0 = call i32 @llvm.smin.i32(i32 %e0, i32 %e1)
ret i32 %smin0
}

define i32 @reduce_smin_16xi32_prefix5(ptr %p) {
; RV32-LABEL: reduce_smin_16xi32_prefix5:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, 524288
; RV32-NEXT: addi a1, a1, -1
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle32.v v8, (a0)
; RV32-NEXT: vmv.s.x v10, a1
; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV32-NEXT: vslideup.vi v8, v10, 5
; RV32-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV32-NEXT: vslideup.vi v8, v10, 6
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vslideup.vi v8, v10, 7
; RV32-NEXT: vredmin.vs v8, v8, v8
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
;
; RV64-LABEL: reduce_smin_16xi32_prefix5:
; RV64: # %bb.0:
; RV64-NEXT: lui a1, 524288
; RV64-NEXT: addiw a1, a1, -1
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
; RV64-NEXT: vmv.s.x v10, a1
; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64-NEXT: vslideup.vi v8, v10, 5
; RV64-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64-NEXT: vslideup.vi v8, v10, 6
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64-NEXT: vslideup.vi v8, v10, 7
; RV64-NEXT: vredmin.vs v8, v8, v8
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
%e0 = extractelement <16 x i32> %v, i32 0
%e1 = extractelement <16 x i32> %v, i32 1
%e2 = extractelement <16 x i32> %v, i32 2
%e3 = extractelement <16 x i32> %v, i32 3
%e4 = extractelement <16 x i32> %v, i32 4
%smin0 = call i32 @llvm.smin.i32(i32 %e0, i32 %e1)
%smin1 = call i32 @llvm.smin.i32(i32 %smin0, i32 %e2)
%smin2 = call i32 @llvm.smin.i32(i32 %smin1, i32 %e3)
%smin3 = call i32 @llvm.smin.i32(i32 %smin2, i32 %e4)
ret i32 %smin3
}

define i32 @reduce_umax_16xi32_prefix2(ptr %p) {
; CHECK-LABEL: reduce_umax_16xi32_prefix2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vredmaxu.vs v8, v8, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
%e0 = extractelement <16 x i32> %v, i32 0
%e1 = extractelement <16 x i32> %v, i32 1
%umax0 = call i32 @llvm.umax.i32(i32 %e0, i32 %e1)
ret i32 %umax0
}

define i32 @reduce_umax_16xi32_prefix5(ptr %p) {
; CHECK-LABEL: reduce_umax_16xi32_prefix5:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 224
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vmv.v.i v8, -1
; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v10, (a0)
; CHECK-NEXT: vsext.vf4 v12, v8
; CHECK-NEXT: vand.vv v8, v10, v12
; CHECK-NEXT: vredmaxu.vs v8, v8, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
%e0 = extractelement <16 x i32> %v, i32 0
%e1 = extractelement <16 x i32> %v, i32 1
%e2 = extractelement <16 x i32> %v, i32 2
%e3 = extractelement <16 x i32> %v, i32 3
%e4 = extractelement <16 x i32> %v, i32 4
%umax0 = call i32 @llvm.umax.i32(i32 %e0, i32 %e1)
%umax1 = call i32 @llvm.umax.i32(i32 %umax0, i32 %e2)
%umax2 = call i32 @llvm.umax.i32(i32 %umax1, i32 %e3)
%umax3 = call i32 @llvm.umax.i32(i32 %umax2, i32 %e4)
ret i32 %umax3
}

define i32 @reduce_umin_16xi32_prefix2(ptr %p) {
; CHECK-LABEL: reduce_umin_16xi32_prefix2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vredminu.vs v8, v8, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
%e0 = extractelement <16 x i32> %v, i32 0
%e1 = extractelement <16 x i32> %v, i32 1
%umin0 = call i32 @llvm.umin.i32(i32 %e0, i32 %e1)
ret i32 %umin0
}

define i32 @reduce_umin_16xi32_prefix5(ptr %p) {
; CHECK-LABEL: reduce_umin_16xi32_prefix5:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, -1
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v10, (a0)
; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v10, v8, 5
; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v10, v8, 6
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vslideup.vi v10, v8, 7
; CHECK-NEXT: vredminu.vs v8, v10, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
%e0 = extractelement <16 x i32> %v, i32 0
%e1 = extractelement <16 x i32> %v, i32 1
%e2 = extractelement <16 x i32> %v, i32 2
%e3 = extractelement <16 x i32> %v, i32 3
%e4 = extractelement <16 x i32> %v, i32 4
%umin0 = call i32 @llvm.umin.i32(i32 %e0, i32 %e1)
%umin1 = call i32 @llvm.umin.i32(i32 %umin0, i32 %e2)
%umin2 = call i32 @llvm.umin.i32(i32 %umin1, i32 %e3)
%umin3 = call i32 @llvm.umin.i32(i32 %umin2, i32 %e4)
ret i32 %umin3
}