diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index bf128500f6005..93f6e39b56ab6 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -596,8 +596,26 @@ static std::pair getMask(Value *WideMask, unsigned Factor, if (auto *IMI = dyn_cast(WideMask)) { if (unsigned F = getInterleaveIntrinsicFactor(IMI->getIntrinsicID()); - F && F == Factor && llvm::all_equal(IMI->args())) { - return {IMI->getArgOperand(0), GapMask}; + F && F == Factor) { + Value *RefArg = nullptr; + // Check if all the intrinsic arguments are the same, except those that + // are zeros, which we mark as gaps in the gap mask. + for (auto [Idx, Arg] : enumerate(IMI->args())) { + if (auto *C = dyn_cast(Arg); C && C->isZeroValue()) { + GapMask.clearBit(Idx); + continue; + } + + if (!RefArg) + RefArg = Arg; + else if (RefArg != Arg) + return {nullptr, GapMask}; + } + + // In a very rare occasion, all the intrinsic arguments might be zeros, + // in which case we still want to return an all-zeros constant instead of + // nullptr. + return {RefArg ? RefArg : IMI->getArgOperand(0), GapMask}; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index 470e3095d418d..c426ee7b7d2b1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -205,6 +205,23 @@ define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_intrinsic(ptr %pt ret {<4 x i32>, <4 x i32>} %res1 } +; mask = %m, skip the last two fields. +define {<2 x i32>, <2 x i32>} @vpload_factor4_interleaved_mask_intrinsic_skip_fields(ptr %ptr, <2 x i1> %m) { +; CHECK-LABEL: vpload_factor4_interleaved_mask_intrinsic_skip_fields: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; CHECK-NEXT: vlsseg2e32.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %interleaved.mask = call <8 x i1> @llvm.vector.interleave4(<2 x i1> %m, <2 x i1> %m, <2 x i1> splat (i1 false), <2 x i1> splat (i1 false)) + %interleaved.vec = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> %interleaved.mask, i32 8) + %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <2 x i32> + %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <2 x i32> + %res0 = insertvalue {<2 x i32>, <2 x i32>} undef, <2 x i32> %v0, 0 + %res1 = insertvalue {<2 x i32>, <2 x i32>} %res0, <2 x i32> %v1, 1 + ret {<2 x i32>, <2 x i32>} %res1 +} + define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_shuffle(ptr %ptr, <4 x i1> %m) { ; CHECK-LABEL: vpload_factor2_interleaved_mask_shuffle: ; CHECK: # %bb.0: @@ -532,8 +549,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: li a2, 32 ; RV32-NEXT: lui a3, 12 ; RV32-NEXT: lui a6, 12291 -; RV32-NEXT: lui a7, %hi(.LCPI26_0) -; RV32-NEXT: addi a7, a7, %lo(.LCPI26_0) +; RV32-NEXT: lui a7, %hi(.LCPI27_0) +; RV32-NEXT: addi a7, a7, %lo(.LCPI27_0) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vle32.v v24, (a5) ; RV32-NEXT: vmv.s.x v0, a3 @@ -618,12 +635,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill ; RV32-NEXT: lui a7, 49164 -; RV32-NEXT: lui a1, %hi(.LCPI26_1) -; RV32-NEXT: addi a1, a1, %lo(.LCPI26_1) +; RV32-NEXT: lui a1, %hi(.LCPI27_1) +; RV32-NEXT: addi a1, a1, %lo(.LCPI27_1) ; RV32-NEXT: lui t2, 3 ; RV32-NEXT: lui t1, 196656 -; RV32-NEXT: lui a4, %hi(.LCPI26_3) -; RV32-NEXT: addi a4, a4, %lo(.LCPI26_3) +; RV32-NEXT: lui a4, %hi(.LCPI27_3) +; RV32-NEXT: addi a4, a4, %lo(.LCPI27_3) ; RV32-NEXT: lui t0, 786624 ; RV32-NEXT: li a5, 48 ; RV32-NEXT: lui a6, 768 @@ -802,8 +819,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vrgatherei16.vv v24, v8, v2 -; RV32-NEXT: lui a1, %hi(.LCPI26_2) -; RV32-NEXT: addi a1, a1, %lo(.LCPI26_2) +; RV32-NEXT: lui a1, %hi(.LCPI27_2) +; RV32-NEXT: addi a1, a1, %lo(.LCPI27_2) ; RV32-NEXT: lui a3, 3073 ; RV32-NEXT: addi a3, a3, -1024 ; RV32-NEXT: vmv.s.x v0, a3 @@ -867,16 +884,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vrgatherei16.vv v28, v8, v3 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v28, v24 -; RV32-NEXT: lui a1, %hi(.LCPI26_4) -; RV32-NEXT: addi a1, a1, %lo(.LCPI26_4) -; RV32-NEXT: lui a2, %hi(.LCPI26_5) -; RV32-NEXT: addi a2, a2, %lo(.LCPI26_5) +; RV32-NEXT: lui a1, %hi(.LCPI27_4) +; RV32-NEXT: addi a1, a1, %lo(.LCPI27_4) +; RV32-NEXT: lui a2, %hi(.LCPI27_5) +; RV32-NEXT: addi a2, a2, %lo(.LCPI27_5) ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV32-NEXT: vle16.v v24, (a2) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: lui a1, %hi(.LCPI26_7) -; RV32-NEXT: addi a1, a1, %lo(.LCPI26_7) +; RV32-NEXT: lui a1, %hi(.LCPI27_7) +; RV32-NEXT: addi a1, a1, %lo(.LCPI27_7) ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle16.v v10, (a1) ; RV32-NEXT: csrr a1, vlenb @@ -904,14 +921,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vrgatherei16.vv v16, v0, v10 -; RV32-NEXT: lui a1, %hi(.LCPI26_6) -; RV32-NEXT: addi a1, a1, %lo(.LCPI26_6) -; RV32-NEXT: lui a2, %hi(.LCPI26_8) -; RV32-NEXT: addi a2, a2, %lo(.LCPI26_8) +; RV32-NEXT: lui a1, %hi(.LCPI27_6) +; RV32-NEXT: addi a1, a1, %lo(.LCPI27_6) +; RV32-NEXT: lui a2, %hi(.LCPI27_8) +; RV32-NEXT: addi a2, a2, %lo(.LCPI27_8) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vle16.v v4, (a1) -; RV32-NEXT: lui a1, %hi(.LCPI26_9) -; RV32-NEXT: addi a1, a1, %lo(.LCPI26_9) +; RV32-NEXT: lui a1, %hi(.LCPI27_9) +; RV32-NEXT: addi a1, a1, %lo(.LCPI27_9) ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV32-NEXT: vle16.v v6, (a1) ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma @@ -998,8 +1015,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: li a4, 128 ; RV64-NEXT: lui a1, 1 ; RV64-NEXT: vle64.v v8, (a3) -; RV64-NEXT: lui a3, %hi(.LCPI26_0) -; RV64-NEXT: addi a3, a3, %lo(.LCPI26_0) +; RV64-NEXT: lui a3, %hi(.LCPI27_0) +; RV64-NEXT: addi a3, a3, %lo(.LCPI27_0) ; RV64-NEXT: vmv.s.x v0, a4 ; RV64-NEXT: csrr a4, vlenb ; RV64-NEXT: li a5, 61 @@ -1187,8 +1204,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t -; RV64-NEXT: lui a2, %hi(.LCPI26_1) -; RV64-NEXT: addi a2, a2, %lo(.LCPI26_1) +; RV64-NEXT: lui a2, %hi(.LCPI27_1) +; RV64-NEXT: addi a2, a2, %lo(.LCPI27_1) ; RV64-NEXT: li a3, 192 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV64-NEXT: vle16.v v6, (a2) @@ -1222,8 +1239,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vrgatherei16.vv v24, v16, v6 ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui a2, %hi(.LCPI26_2) -; RV64-NEXT: addi a2, a2, %lo(.LCPI26_2) +; RV64-NEXT: lui a2, %hi(.LCPI27_2) +; RV64-NEXT: addi a2, a2, %lo(.LCPI27_2) ; RV64-NEXT: li a3, 1040 ; RV64-NEXT: vmv.s.x v0, a3 ; RV64-NEXT: addi a1, a1, -2016 @@ -1307,12 +1324,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui a1, %hi(.LCPI26_3) -; RV64-NEXT: addi a1, a1, %lo(.LCPI26_3) +; RV64-NEXT: lui a1, %hi(.LCPI27_3) +; RV64-NEXT: addi a1, a1, %lo(.LCPI27_3) ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV64-NEXT: vle16.v v20, (a1) -; RV64-NEXT: lui a1, %hi(.LCPI26_4) -; RV64-NEXT: addi a1, a1, %lo(.LCPI26_4) +; RV64-NEXT: lui a1, %hi(.LCPI27_4) +; RV64-NEXT: addi a1, a1, %lo(.LCPI27_4) ; RV64-NEXT: vle16.v v8, (a1) ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 77 @@ -1363,8 +1380,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vrgatherei16.vv v0, v16, v8 -; RV64-NEXT: lui a1, %hi(.LCPI26_5) -; RV64-NEXT: addi a1, a1, %lo(.LCPI26_5) +; RV64-NEXT: lui a1, %hi(.LCPI27_5) +; RV64-NEXT: addi a1, a1, %lo(.LCPI27_5) ; RV64-NEXT: vle16.v v20, (a1) ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 61 @@ -1981,8 +1998,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) { ; RV32-NEXT: vle32.v v12, (a0), v0.t ; RV32-NEXT: li a0, 36 ; RV32-NEXT: vmv.s.x v20, a1 -; RV32-NEXT: lui a1, %hi(.LCPI62_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI62_0) +; RV32-NEXT: lui a1, %hi(.LCPI63_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI63_0) ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vle16.v v21, (a1) ; RV32-NEXT: vcompress.vm v8, v12, v11 @@ -2057,8 +2074,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) { ; RV32-NEXT: vmv.s.x v10, a0 ; RV32-NEXT: li a0, 146 ; RV32-NEXT: vmv.s.x v11, a0 -; RV32-NEXT: lui a0, %hi(.LCPI63_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI63_0) +; RV32-NEXT: lui a0, %hi(.LCPI64_0) +; RV32-NEXT: addi a0, a0, %lo(.LCPI64_0) ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vle16.v v20, (a0) ; RV32-NEXT: li a0, 36 @@ -2277,8 +2294,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @maskedload_factor3_invalid_skip_field( ; RV32-NEXT: vle32.v v12, (a0), v0.t ; RV32-NEXT: li a0, 36 ; RV32-NEXT: vmv.s.x v20, a1 -; RV32-NEXT: lui a1, %hi(.LCPI72_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI72_0) +; RV32-NEXT: lui a1, %hi(.LCPI73_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI73_0) ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vle16.v v21, (a1) ; RV32-NEXT: vcompress.vm v8, v12, v11