409 changes: 40 additions & 369 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12849,63 +12849,11 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m

; TODO: This should be a strided load with zero stride
define <4 x i32> @mgather_broadcast_load_unmasked(ptr %base) {
; RV32-LABEL: mgather_broadcast_load_unmasked:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vlse32.v v8, (a0), zero
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_broadcast_load_unmasked:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64V-NEXT: vmv.v.i v10, 0
; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64V-NEXT: vluxei64.v v8, (a0), v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_broadcast_load_unmasked:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmset.m v8
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: # implicit-def: $v8
; RV64ZVE32F-NEXT: beqz zero, .LBB99_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB99_6
; RV64ZVE32F-NEXT: .LBB99_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB99_7
; RV64ZVE32F-NEXT: .LBB99_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB99_8
; RV64ZVE32F-NEXT: .LBB99_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB99_5: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB99_2
; RV64ZVE32F-NEXT: .LBB99_6: # %cond.load1
; RV64ZVE32F-NEXT: lw a2, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB99_3
; RV64ZVE32F-NEXT: .LBB99_7: # %cond.load4
; RV64ZVE32F-NEXT: lw a2, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB99_4
; RV64ZVE32F-NEXT: .LBB99_8: # %cond.load7
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
; CHECK-LABEL: mgather_broadcast_load_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vlse32.v v8, (a0), zero
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> zeroinitializer
Expand All @@ -12929,63 +12877,11 @@ define <4 x i32> @mgather_broadcast_load_unmasked2(ptr %base) {
}

define <4 x i32> @mgather_broadcast_load_masked(ptr %base, <4 x i1> %m) {
; RV32-LABEL: mgather_broadcast_load_masked:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vlse32.v v8, (a0), zero, v0.t
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_broadcast_load_masked:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64V-NEXT: vmv.v.i v10, 0
; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64V-NEXT: vluxei64.v v8, (a0), v10, v0.t
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_broadcast_load_masked:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: # implicit-def: $v8
; RV64ZVE32F-NEXT: bnez a2, .LBB101_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB101_6
; RV64ZVE32F-NEXT: .LBB101_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB101_7
; RV64ZVE32F-NEXT: .LBB101_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB101_8
; RV64ZVE32F-NEXT: .LBB101_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB101_5: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB101_2
; RV64ZVE32F-NEXT: .LBB101_6: # %cond.load1
; RV64ZVE32F-NEXT: lw a2, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB101_3
; RV64ZVE32F-NEXT: .LBB101_7: # %cond.load4
; RV64ZVE32F-NEXT: lw a2, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB101_4
; RV64ZVE32F-NEXT: .LBB101_8: # %cond.load7
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
; CHECK-LABEL: mgather_broadcast_load_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vlse32.v v8, (a0), zero, v0.t
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> zeroinitializer
Expand All @@ -12996,64 +12892,11 @@ define <4 x i32> @mgather_broadcast_load_masked(ptr %base, <4 x i1> %m) {

; TODO: Should be recognized as a unit stride load
define <4 x i32> @mgather_unit_stride_load(ptr %base) {
; RV32-LABEL: mgather_unit_stride_load:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vle32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_unit_stride_load:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64V-NEXT: vle32.v v8, (a0)
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_unit_stride_load:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmset.m v8
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: # implicit-def: $v8
; RV64ZVE32F-NEXT: beqz zero, .LBB102_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB102_6
; RV64ZVE32F-NEXT: .LBB102_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB102_7
; RV64ZVE32F-NEXT: .LBB102_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB102_8
; RV64ZVE32F-NEXT: .LBB102_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB102_5: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB102_2
; RV64ZVE32F-NEXT: .LBB102_6: # %cond.load1
; RV64ZVE32F-NEXT: addi a2, a0, 4
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB102_3
; RV64ZVE32F-NEXT: .LBB102_7: # %cond.load4
; RV64ZVE32F-NEXT: addi a2, a0, 8
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB102_4
; RV64ZVE32F-NEXT: .LBB102_8: # %cond.load7
; RV64ZVE32F-NEXT: addi a0, a0, 12
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
; CHECK-LABEL: mgather_unit_stride_load:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
Expand All @@ -13063,71 +12906,12 @@ define <4 x i32> @mgather_unit_stride_load(ptr %base) {

; TODO: Recognize as unit stride load with offset 16b
define <4 x i32> @mgather_unit_stride_load_with_offset(ptr %base) {
; RV32-LABEL: mgather_unit_stride_load_with_offset:
; RV32: # %bb.0:
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vle32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_unit_stride_load_with_offset:
; RV64V: # %bb.0:
; RV64V-NEXT: lui a1, 115073
; RV64V-NEXT: addiw a1, a1, 1040
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64V-NEXT: vmv.s.x v8, a1
; RV64V-NEXT: vsext.vf8 v10, v8
; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64V-NEXT: vluxei64.v v8, (a0), v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_unit_stride_load_with_offset:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmset.m v8
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: # implicit-def: $v8
; RV64ZVE32F-NEXT: beqz zero, .LBB103_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB103_6
; RV64ZVE32F-NEXT: .LBB103_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB103_7
; RV64ZVE32F-NEXT: .LBB103_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB103_8
; RV64ZVE32F-NEXT: .LBB103_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB103_5: # %cond.load
; RV64ZVE32F-NEXT: addi a2, a0, 16
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vlse32.v v8, (a2), zero
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB103_2
; RV64ZVE32F-NEXT: .LBB103_6: # %cond.load1
; RV64ZVE32F-NEXT: addi a2, a0, 20
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB103_3
; RV64ZVE32F-NEXT: .LBB103_7: # %cond.load4
; RV64ZVE32F-NEXT: addi a2, a0, 24
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB103_4
; RV64ZVE32F-NEXT: .LBB103_8: # %cond.load7
; RV64ZVE32F-NEXT: addi a0, a0, 28
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
; CHECK-LABEL: mgather_unit_stride_load_with_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
Expand All @@ -13136,64 +12920,11 @@ define <4 x i32> @mgather_unit_stride_load_with_offset(ptr %base) {
}

define <4 x i32> @mgather_unit_stride_load_narrow_idx(ptr %base) {
; RV32-LABEL: mgather_unit_stride_load_narrow_idx:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vle32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_unit_stride_load_narrow_idx:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64V-NEXT: vle32.v v8, (a0)
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_unit_stride_load_narrow_idx:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmset.m v8
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: # implicit-def: $v8
; RV64ZVE32F-NEXT: beqz zero, .LBB104_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB104_6
; RV64ZVE32F-NEXT: .LBB104_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB104_7
; RV64ZVE32F-NEXT: .LBB104_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB104_8
; RV64ZVE32F-NEXT: .LBB104_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB104_5: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB104_2
; RV64ZVE32F-NEXT: .LBB104_6: # %cond.load1
; RV64ZVE32F-NEXT: addi a2, a0, 4
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB104_3
; RV64ZVE32F-NEXT: .LBB104_7: # %cond.load4
; RV64ZVE32F-NEXT: addi a2, a0, 8
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB104_4
; RV64ZVE32F-NEXT: .LBB104_8: # %cond.load7
; RV64ZVE32F-NEXT: addi a0, a0, 12
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
; CHECK-LABEL: mgather_unit_stride_load_narrow_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%ptrs = getelementptr inbounds i32, ptr %base, <4 x i8> <i8 0, i8 1, i8 2, i8 3>
Expand All @@ -13202,64 +12933,11 @@ define <4 x i32> @mgather_unit_stride_load_narrow_idx(ptr %base) {
}

define <4 x i32> @mgather_unit_stride_load_wide_idx(ptr %base) {
; RV32-LABEL: mgather_unit_stride_load_wide_idx:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vle32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_unit_stride_load_wide_idx:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64V-NEXT: vle32.v v8, (a0)
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_unit_stride_load_wide_idx:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmset.m v8
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: # implicit-def: $v8
; RV64ZVE32F-NEXT: beqz zero, .LBB105_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB105_6
; RV64ZVE32F-NEXT: .LBB105_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB105_7
; RV64ZVE32F-NEXT: .LBB105_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB105_8
; RV64ZVE32F-NEXT: .LBB105_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB105_5: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB105_2
; RV64ZVE32F-NEXT: .LBB105_6: # %cond.load1
; RV64ZVE32F-NEXT: addi a2, a0, 4
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB105_3
; RV64ZVE32F-NEXT: .LBB105_7: # %cond.load4
; RV64ZVE32F-NEXT: addi a2, a0, 8
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB105_4
; RV64ZVE32F-NEXT: .LBB105_8: # %cond.load7
; RV64ZVE32F-NEXT: addi a0, a0, 12
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
; CHECK-LABEL: mgather_unit_stride_load_wide_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%ptrs = getelementptr inbounds i32, ptr %base, <4 x i128> <i128 0, i128 1, i128 2, i128 3>
Expand Down Expand Up @@ -13577,22 +13255,13 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
}

define <8 x i16> @mgather_shuffle_reverse(ptr %base) {
; RV32-LABEL: mgather_shuffle_reverse:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vle16.v v9, (a0)
; RV32-NEXT: vid.v v8
; RV32-NEXT: vrsub.vi v10, v8, 7
; RV32-NEXT: vrgather.vv v8, v9, v10
; RV32-NEXT: ret
;
; RV64-LABEL: mgather_shuffle_reverse:
; RV64: # %bb.0:
; RV64-NEXT: addi a0, a0, 14
; RV64-NEXT: li a1, -2
; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64-NEXT: vlse16.v v8, (a0), a1
; RV64-NEXT: ret
; CHECK-LABEL: mgather_shuffle_reverse:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, a0, 14
; CHECK-NEXT: li a1, -2
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vlse16.v v8, (a0), a1
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i16 0
%allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>
Expand Down Expand Up @@ -13831,3 +13500,5 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) {
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison)
ret <8 x i16> %v
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV64: {{.*}}
88 changes: 20 additions & 68 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11294,25 +11294,12 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
}

define void @mscatter_unit_stride(<8 x i16> %val, ptr %base) {
; RV32-LABEL: mscatter_unit_stride:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: mscatter_unit_stride:
; RV64: # %bb.0:
; RV64-NEXT: li a1, 2
; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64-NEXT: vsse16.v v8, (a0), a1
; RV64-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_unit_stride:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: li a1, 2
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vsse16.v v8, (a0), a1
; RV64ZVE32F-NEXT: ret
; CHECK-LABEL: mscatter_unit_stride:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 2
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vsse16.v v8, (a0), a1
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i16 0
%allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
Expand All @@ -11321,31 +11308,13 @@ define void @mscatter_unit_stride(<8 x i16> %val, ptr %base) {
}

define void @mscatter_unit_stride_with_offset(<8 x i16> %val, ptr %base) {
; RV32-LABEL: mscatter_unit_stride_with_offset:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vid.v v10
; RV32-NEXT: vadd.vv v10, v10, v10
; RV32-NEXT: vadd.vi v10, v10, 10
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32-NEXT: vsoxei32.v v8, (a0), v10
; RV32-NEXT: ret
;
; RV64-LABEL: mscatter_unit_stride_with_offset:
; RV64: # %bb.0:
; RV64-NEXT: addi a0, a0, 10
; RV64-NEXT: li a1, 2
; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64-NEXT: vsse16.v v8, (a0), a1
; RV64-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_unit_stride_with_offset:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: addi a0, a0, 10
; RV64ZVE32F-NEXT: li a1, 2
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vsse16.v v8, (a0), a1
; RV64ZVE32F-NEXT: ret
; CHECK-LABEL: mscatter_unit_stride_with_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, a0, 10
; CHECK-NEXT: li a1, 2
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vsse16.v v8, (a0), a1
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i16 0
%allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12>
Expand All @@ -11354,30 +11323,13 @@ define void @mscatter_unit_stride_with_offset(<8 x i16> %val, ptr %base) {
}

define void @mscatter_shuffle_reverse(<8 x i16> %val, ptr %base) {
; RV32-LABEL: mscatter_shuffle_reverse:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vid.v v9
; RV32-NEXT: vrsub.vi v9, v9, 7
; RV32-NEXT: vrgather.vv v10, v8, v9
; RV32-NEXT: vse16.v v10, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: mscatter_shuffle_reverse:
; RV64: # %bb.0:
; RV64-NEXT: addi a0, a0, 14
; RV64-NEXT: li a1, -2
; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64-NEXT: vsse16.v v8, (a0), a1
; RV64-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_shuffle_reverse:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: addi a0, a0, 14
; RV64ZVE32F-NEXT: li a1, -2
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vsse16.v v8, (a0), a1
; RV64ZVE32F-NEXT: ret
; CHECK-LABEL: mscatter_shuffle_reverse:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, a0, 14
; CHECK-NEXT: li a1, -2
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vsse16.v v8, (a0), a1
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i16 0
%allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>
Expand Down