diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index cb3ee899dde7d..fd51949c6023a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -13193,12 +13193,155 @@ define <4 x i32> @mgather_unit_stride_load_with_offset(ptr %base) { ret <4 x i32> %v } +define <4 x i32> @mgather_unit_stride_load_narrow_idx(ptr %base) { +; RV32-LABEL: mgather_unit_stride_load_narrow_idx: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vid.v v8 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vluxei32.v v8, (a0), v8 +; RV32-NEXT: ret +; +; RV64V-LABEL: mgather_unit_stride_load_narrow_idx: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64V-NEXT: vid.v v8 +; RV64V-NEXT: vsll.vi v10, v8, 2 +; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64V-NEXT: vluxei64.v v8, (a0), v10 +; RV64V-NEXT: ret +; +; RV64ZVE32F-LABEL: mgather_unit_stride_load_narrow_idx: +; RV64ZVE32F: # %bb.0: +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64ZVE32F-NEXT: vmset.m v8 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: # implicit-def: $v8 +; RV64ZVE32F-NEXT: beqz zero, .LBB104_5 +; RV64ZVE32F-NEXT: # %bb.1: # %else +; RV64ZVE32F-NEXT: andi a2, a1, 2 +; RV64ZVE32F-NEXT: bnez a2, .LBB104_6 +; RV64ZVE32F-NEXT: .LBB104_2: # %else2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: bnez a2, .LBB104_7 +; RV64ZVE32F-NEXT: .LBB104_3: # %else5 +; RV64ZVE32F-NEXT: andi a1, a1, 8 +; RV64ZVE32F-NEXT: bnez a1, .LBB104_8 +; RV64ZVE32F-NEXT: .LBB104_4: # %else8 +; RV64ZVE32F-NEXT: ret +; RV64ZVE32F-NEXT: .LBB104_5: # %cond.load +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero +; RV64ZVE32F-NEXT: andi a2, a1, 2 +; RV64ZVE32F-NEXT: beqz a2, .LBB104_2 +; RV64ZVE32F-NEXT: .LBB104_6: # %cond.load1 +; RV64ZVE32F-NEXT: addi a2, a0, 4 +; RV64ZVE32F-NEXT: lw a2, 0(a2) +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 +; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: beqz a2, .LBB104_3 +; RV64ZVE32F-NEXT: .LBB104_7: # %cond.load4 +; RV64ZVE32F-NEXT: addi a2, a0, 8 +; RV64ZVE32F-NEXT: lw a2, 0(a2) +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a1, a1, 8 +; RV64ZVE32F-NEXT: beqz a1, .LBB104_4 +; RV64ZVE32F-NEXT: .LBB104_8: # %cond.load7 +; RV64ZVE32F-NEXT: addi a0, a0, 12 +; RV64ZVE32F-NEXT: lw a0, 0(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 +; RV64ZVE32F-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %ptrs = getelementptr inbounds i32, ptr %base, <4 x i8> + %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison) + ret <4 x i32> %v +} + +define <4 x i32> @mgather_unit_stride_load_wide_idx(ptr %base) { +; RV32-LABEL: mgather_unit_stride_load_wide_idx: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vid.v v8 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vluxei32.v v8, (a0), v8 +; RV32-NEXT: ret +; +; RV64V-LABEL: mgather_unit_stride_load_wide_idx: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64V-NEXT: vid.v v8 +; RV64V-NEXT: vsll.vi v10, v8, 2 +; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64V-NEXT: vluxei64.v v8, (a0), v10 +; RV64V-NEXT: ret +; +; RV64ZVE32F-LABEL: mgather_unit_stride_load_wide_idx: +; RV64ZVE32F: # %bb.0: +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64ZVE32F-NEXT: vmset.m v8 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: # implicit-def: $v8 +; RV64ZVE32F-NEXT: beqz zero, .LBB105_5 +; RV64ZVE32F-NEXT: # %bb.1: # %else +; RV64ZVE32F-NEXT: andi a2, a1, 2 +; RV64ZVE32F-NEXT: bnez a2, .LBB105_6 +; RV64ZVE32F-NEXT: .LBB105_2: # %else2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: bnez a2, .LBB105_7 +; RV64ZVE32F-NEXT: .LBB105_3: # %else5 +; RV64ZVE32F-NEXT: andi a1, a1, 8 +; RV64ZVE32F-NEXT: bnez a1, .LBB105_8 +; RV64ZVE32F-NEXT: .LBB105_4: # %else8 +; RV64ZVE32F-NEXT: ret +; RV64ZVE32F-NEXT: .LBB105_5: # %cond.load +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero +; RV64ZVE32F-NEXT: andi a2, a1, 2 +; RV64ZVE32F-NEXT: beqz a2, .LBB105_2 +; RV64ZVE32F-NEXT: .LBB105_6: # %cond.load1 +; RV64ZVE32F-NEXT: addi a2, a0, 4 +; RV64ZVE32F-NEXT: lw a2, 0(a2) +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 +; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: beqz a2, .LBB105_3 +; RV64ZVE32F-NEXT: .LBB105_7: # %cond.load4 +; RV64ZVE32F-NEXT: addi a2, a0, 8 +; RV64ZVE32F-NEXT: lw a2, 0(a2) +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a1, a1, 8 +; RV64ZVE32F-NEXT: beqz a1, .LBB105_4 +; RV64ZVE32F-NEXT: .LBB105_8: # %cond.load7 +; RV64ZVE32F-NEXT: addi a0, a0, 12 +; RV64ZVE32F-NEXT: lw a0, 0(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 +; RV64ZVE32F-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %ptrs = getelementptr inbounds i32, ptr %base, <4 x i128> + %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison) + ret <4 x i32> %v +} + + ; TODO: Recognize as strided load with SEW=32 define <8 x i16> @mgather_strided_2xSEW(ptr %base) { ; RV32-LABEL: mgather_strided_2xSEW: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI104_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI104_0) +; RV32-NEXT: lui a1, %hi(.LCPI106_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI106_0) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vle32.v v10, (a1) ; RV32-NEXT: vluxei32.v v8, (a0), v10 @@ -13206,8 +13349,8 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) { ; ; RV64V-LABEL: mgather_strided_2xSEW: ; RV64V: # %bb.0: -; RV64V-NEXT: lui a1, %hi(.LCPI104_0) -; RV64V-NEXT: addi a1, a1, %lo(.LCPI104_0) +; RV64V-NEXT: lui a1, %hi(.LCPI106_0) +; RV64V-NEXT: addi a1, a1, %lo(.LCPI106_0) ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64V-NEXT: vle64.v v12, (a1) ; RV64V-NEXT: vluxei64.v v8, (a0), v12 @@ -13219,35 +13362,35 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vmset.m v8 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB104_9 +; RV64ZVE32F-NEXT: beqz zero, .LBB106_9 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB104_10 -; RV64ZVE32F-NEXT: .LBB104_2: # %else2 +; RV64ZVE32F-NEXT: bnez a2, .LBB106_10 +; RV64ZVE32F-NEXT: .LBB106_2: # %else2 ; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB104_11 -; RV64ZVE32F-NEXT: .LBB104_3: # %else5 +; RV64ZVE32F-NEXT: bnez a2, .LBB106_11 +; RV64ZVE32F-NEXT: .LBB106_3: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB104_12 -; RV64ZVE32F-NEXT: .LBB104_4: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB106_12 +; RV64ZVE32F-NEXT: .LBB106_4: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB104_13 -; RV64ZVE32F-NEXT: .LBB104_5: # %else11 +; RV64ZVE32F-NEXT: bnez a2, .LBB106_13 +; RV64ZVE32F-NEXT: .LBB106_5: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB104_14 -; RV64ZVE32F-NEXT: .LBB104_6: # %else14 +; RV64ZVE32F-NEXT: bnez a2, .LBB106_14 +; RV64ZVE32F-NEXT: .LBB106_6: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB104_15 -; RV64ZVE32F-NEXT: .LBB104_7: # %else17 +; RV64ZVE32F-NEXT: bnez a2, .LBB106_15 +; RV64ZVE32F-NEXT: .LBB106_7: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB104_16 -; RV64ZVE32F-NEXT: .LBB104_8: # %else20 +; RV64ZVE32F-NEXT: bnez a1, .LBB106_16 +; RV64ZVE32F-NEXT: .LBB106_8: # %else20 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB104_9: # %cond.load +; RV64ZVE32F-NEXT: .LBB106_9: # %cond.load ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB104_2 -; RV64ZVE32F-NEXT: .LBB104_10: # %cond.load1 +; RV64ZVE32F-NEXT: beqz a2, .LBB106_2 +; RV64ZVE32F-NEXT: .LBB106_10: # %cond.load1 ; RV64ZVE32F-NEXT: addi a2, a0, 2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -13255,48 +13398,48 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB104_3 -; RV64ZVE32F-NEXT: .LBB104_11: # %cond.load4 +; RV64ZVE32F-NEXT: beqz a2, .LBB106_3 +; RV64ZVE32F-NEXT: .LBB106_11: # %cond.load4 ; RV64ZVE32F-NEXT: addi a2, a0, 8 ; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB104_4 -; RV64ZVE32F-NEXT: .LBB104_12: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a2, .LBB106_4 +; RV64ZVE32F-NEXT: .LBB106_12: # %cond.load7 ; RV64ZVE32F-NEXT: addi a2, a0, 10 ; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB104_5 -; RV64ZVE32F-NEXT: .LBB104_13: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a2, .LBB106_5 +; RV64ZVE32F-NEXT: .LBB106_13: # %cond.load10 ; RV64ZVE32F-NEXT: addi a2, a0, 16 ; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB104_6 -; RV64ZVE32F-NEXT: .LBB104_14: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a2, .LBB106_6 +; RV64ZVE32F-NEXT: .LBB106_14: # %cond.load13 ; RV64ZVE32F-NEXT: addi a2, a0, 18 ; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 ; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB104_7 -; RV64ZVE32F-NEXT: .LBB104_15: # %cond.load16 +; RV64ZVE32F-NEXT: beqz a2, .LBB106_7 +; RV64ZVE32F-NEXT: .LBB106_15: # %cond.load16 ; RV64ZVE32F-NEXT: addi a2, a0, 24 ; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB104_8 -; RV64ZVE32F-NEXT: .LBB104_16: # %cond.load19 +; RV64ZVE32F-NEXT: beqz a1, .LBB106_8 +; RV64ZVE32F-NEXT: .LBB106_16: # %cond.load19 ; RV64ZVE32F-NEXT: addi a0, a0, 26 ; RV64ZVE32F-NEXT: lh a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma @@ -13314,8 +13457,8 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) { define <8 x i16> @mgather_gather_2xSEW(ptr %base) { ; RV32-LABEL: mgather_gather_2xSEW: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI105_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI105_0) +; RV32-NEXT: lui a1, %hi(.LCPI107_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI107_0) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vle32.v v10, (a1) ; RV32-NEXT: vluxei32.v v8, (a0), v10 @@ -13323,8 +13466,8 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) { ; ; RV64V-LABEL: mgather_gather_2xSEW: ; RV64V: # %bb.0: -; RV64V-NEXT: lui a1, %hi(.LCPI105_0) -; RV64V-NEXT: addi a1, a1, %lo(.LCPI105_0) +; RV64V-NEXT: lui a1, %hi(.LCPI107_0) +; RV64V-NEXT: addi a1, a1, %lo(.LCPI107_0) ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64V-NEXT: vle64.v v12, (a1) ; RV64V-NEXT: vluxei64.v v8, (a0), v12 @@ -13336,35 +13479,35 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vmset.m v8 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB105_9 +; RV64ZVE32F-NEXT: beqz zero, .LBB107_9 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB105_10 -; RV64ZVE32F-NEXT: .LBB105_2: # %else2 +; RV64ZVE32F-NEXT: bnez a2, .LBB107_10 +; RV64ZVE32F-NEXT: .LBB107_2: # %else2 ; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB105_11 -; RV64ZVE32F-NEXT: .LBB105_3: # %else5 +; RV64ZVE32F-NEXT: bnez a2, .LBB107_11 +; RV64ZVE32F-NEXT: .LBB107_3: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB105_12 -; RV64ZVE32F-NEXT: .LBB105_4: # %else8 +; RV64ZVE32F-NEXT: bnez a2, .LBB107_12 +; RV64ZVE32F-NEXT: .LBB107_4: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB105_13 -; RV64ZVE32F-NEXT: .LBB105_5: # %else11 +; RV64ZVE32F-NEXT: bnez a2, .LBB107_13 +; RV64ZVE32F-NEXT: .LBB107_5: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB105_14 -; RV64ZVE32F-NEXT: .LBB105_6: # %else14 +; RV64ZVE32F-NEXT: bnez a2, .LBB107_14 +; RV64ZVE32F-NEXT: .LBB107_6: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB105_15 -; RV64ZVE32F-NEXT: .LBB105_7: # %else17 +; RV64ZVE32F-NEXT: bnez a2, .LBB107_15 +; RV64ZVE32F-NEXT: .LBB107_7: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB105_16 -; RV64ZVE32F-NEXT: .LBB105_8: # %else20 +; RV64ZVE32F-NEXT: bnez a1, .LBB107_16 +; RV64ZVE32F-NEXT: .LBB107_8: # %else20 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB105_9: # %cond.load +; RV64ZVE32F-NEXT: .LBB107_9: # %cond.load ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero ; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB105_2 -; RV64ZVE32F-NEXT: .LBB105_10: # %cond.load1 +; RV64ZVE32F-NEXT: beqz a2, .LBB107_2 +; RV64ZVE32F-NEXT: .LBB107_10: # %cond.load1 ; RV64ZVE32F-NEXT: addi a2, a0, 2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -13372,48 +13515,48 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB105_3 -; RV64ZVE32F-NEXT: .LBB105_11: # %cond.load4 +; RV64ZVE32F-NEXT: beqz a2, .LBB107_3 +; RV64ZVE32F-NEXT: .LBB107_11: # %cond.load4 ; RV64ZVE32F-NEXT: addi a2, a0, 4 ; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB105_4 -; RV64ZVE32F-NEXT: .LBB105_12: # %cond.load7 +; RV64ZVE32F-NEXT: beqz a2, .LBB107_4 +; RV64ZVE32F-NEXT: .LBB107_12: # %cond.load7 ; RV64ZVE32F-NEXT: addi a2, a0, 6 ; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB105_5 -; RV64ZVE32F-NEXT: .LBB105_13: # %cond.load10 +; RV64ZVE32F-NEXT: beqz a2, .LBB107_5 +; RV64ZVE32F-NEXT: .LBB107_13: # %cond.load10 ; RV64ZVE32F-NEXT: addi a2, a0, 16 ; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB105_6 -; RV64ZVE32F-NEXT: .LBB105_14: # %cond.load13 +; RV64ZVE32F-NEXT: beqz a2, .LBB107_6 +; RV64ZVE32F-NEXT: .LBB107_14: # %cond.load13 ; RV64ZVE32F-NEXT: addi a2, a0, 18 ; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 ; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB105_7 -; RV64ZVE32F-NEXT: .LBB105_15: # %cond.load16 +; RV64ZVE32F-NEXT: beqz a2, .LBB107_7 +; RV64ZVE32F-NEXT: .LBB107_15: # %cond.load16 ; RV64ZVE32F-NEXT: addi a2, a0, 20 ; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB105_8 -; RV64ZVE32F-NEXT: .LBB105_16: # %cond.load19 +; RV64ZVE32F-NEXT: beqz a1, .LBB107_8 +; RV64ZVE32F-NEXT: .LBB107_16: # %cond.load19 ; RV64ZVE32F-NEXT: addi a0, a0, 22 ; RV64ZVE32F-NEXT: lh a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll index e449b6f2280e2..b359f71be0e67 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll @@ -963,3 +963,47 @@ entry: } declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32 immarg, <8 x i1>, <8 x i8>) + +define void @gather_narrow_idx(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { +; CHECK-LABEL: @gather_narrow_idx( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i16> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = mul nuw nsw <32 x i16> [[VEC_IND]], +; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i16> [[I]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> , <32 x i8> undef) +; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 +; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i16> [[VEC_IND]], +; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %vec.ind = phi <32 x i16> [ , %entry ], [ %vec.ind.next, %vector.body ] + %i = mul nuw nsw <32 x i16> %vec.ind, + %i1 = getelementptr inbounds i8, ptr %B, <32 x i16> %i + %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i1, i32 1, <32 x i1> , <32 x i8> undef) + %i2 = getelementptr inbounds i8, ptr %A, i64 %index + %wide.load = load <32 x i8>, ptr %i2, align 1 + %i4 = add <32 x i8> %wide.load, %wide.masked.gather + store <32 x i8> %i4, ptr %i2, align 1 + %index.next = add nuw i64 %index, 32 + %vec.ind.next = add <32 x i16> %vec.ind, + %i6 = icmp eq i64 %index.next, 1024 + br i1 %i6, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} +