Skip to content

Commit 7da91fa

Browse files
authored
[CodeGen] Fix failing assert in interleaved access pass (#156457)
In the InterleavedAccessPass the function getMask assumes that shufflevector operations are always fixed width, which isn't true because we use them for splats of scalable vectors. This patch fixes the code by bailing out for scalable vectors.
1 parent b169302 commit 7da91fa

File tree

2 files changed

+20
-1
lines changed

2 files changed

+20
-1
lines changed

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,10 @@ static std::pair<Value *, APInt> getMask(Value *WideMask, unsigned Factor,
660660
}
661661

662662
if (auto *SVI = dyn_cast<ShuffleVectorInst>(WideMask)) {
663+
Type *Op1Ty = SVI->getOperand(1)->getType();
664+
if (!isa<FixedVectorType>(Op1Ty))
665+
return {nullptr, GapMask};
666+
663667
// Check that the shuffle mask is: a) an interleave, b) all of the same
664668
// set of the elements, and c) contained by the first source. (c) could
665669
// be relaxed if desired.

llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2-
; RUN: opt < %s -interleaved-access -S | FileCheck %s
32
; RUN: opt < %s -passes=interleaved-access -S | FileCheck %s
43

54
target triple = "aarch64-linux-gnu"
@@ -186,6 +185,22 @@ define void @interleave_nxptr_factor2(ptr %ptr, <vscale x 2 x ptr> %l, <vscale x
186185
ret void
187186
}
188187

188+
define void @interleave_nxi8_factor2_masked_store_splatmask(ptr %ptr, <vscale x 16 x i8> %l, <vscale x 16 x i8> %r, i1 %mask) #0 {
189+
; CHECK-LABEL: define void @interleave_nxi8_factor2_masked_store_splatmask
190+
; CHECK-SAME: (ptr [[PTR:%.*]], <vscale x 16 x i8> [[L:%.*]], <vscale x 16 x i8> [[R:%.*]], i1 [[MASK:%.*]]) #[[ATTR0]] {
191+
; CHECK-NEXT: [[INTERLEAVE:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> [[L]], <vscale x 16 x i8> [[R]])
192+
; CHECK-NEXT: [[MASK_INS:%.*]] = insertelement <vscale x 32 x i1> poison, i1 [[MASK]], i64 0
193+
; CHECK-NEXT: [[MASK_SPLAT:%.*]] = shufflevector <vscale x 32 x i1> [[MASK_INS]], <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
194+
; CHECK-NEXT: tail call void @llvm.masked.store.nxv32i8.p0(<vscale x 32 x i8> [[INTERLEAVE]], ptr [[PTR]], i32 1, <vscale x 32 x i1> [[MASK_SPLAT]])
195+
; CHECK-NEXT: ret void
196+
;
197+
%interleave = tail call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> %l, <vscale x 16 x i8> %r)
198+
%mask.ins = insertelement <vscale x 32 x i1> poison, i1 %mask, i64 0
199+
%mask.splat = shufflevector <vscale x 32 x i1> %mask.ins, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
200+
tail call void @llvm.masked.store.nxv32i8.p0(<vscale x 32 x i8> %interleave, ptr %ptr, i32 1, <vscale x 32 x i1> %mask.splat)
201+
ret void
202+
}
203+
189204
;;; Check that we 'legalize' operations that are wider than the target supports.
190205

191206
define void @deinterleave_wide_nxi32_factor2(ptr %ptr) #0 {

0 commit comments

Comments
 (0)