From 576ddd60de91ef95d2987c3ce7fb94e0bb9c2f38 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Tue, 2 Sep 2025 13:59:26 +0000 Subject: [PATCH] [CodeGen] Fix failing assert in interleaved access pass In the InterleavedAccessPass the function getMask assumes that shufflevector operations are always fixed width, which isn't true because we use them for splats of scalable vectors. This patch fixes the code by bailing out for scalable vectors. --- llvm/lib/CodeGen/InterleavedAccessPass.cpp | 4 ++++ .../AArch64/scalable-deinterleave-intrinsics.ll | 17 ++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index c5e97037be336..e3ded12a1847b 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -660,6 +660,10 @@ static std::pair getMask(Value *WideMask, unsigned Factor, } if (auto *SVI = dyn_cast(WideMask)) { + Type *Op1Ty = SVI->getOperand(1)->getType(); + if (!isa(Op1Ty)) + return {nullptr, GapMask}; + // Check that the shuffle mask is: a) an interleave, b) all of the same // set of the elements, and c) contained by the first source. (c) could // be relaxed if desired. diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll index d7649801ea2fc..ed9fba3a01965 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt < %s -interleaved-access -S | FileCheck %s ; RUN: opt < %s -passes=interleaved-access -S | FileCheck %s target triple = "aarch64-linux-gnu" @@ -186,6 +185,22 @@ define void @interleave_nxptr_factor2(ptr %ptr, %l, %l, %r, i1 %mask) #0 { +; CHECK-LABEL: define void @interleave_nxi8_factor2_masked_store_splatmask +; CHECK-SAME: (ptr [[PTR:%.*]], [[L:%.*]], [[R:%.*]], i1 [[MASK:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[INTERLEAVE:%.*]] = tail call @llvm.vector.interleave2.nxv32i8( [[L]], [[R]]) +; CHECK-NEXT: [[MASK_INS:%.*]] = insertelement poison, i1 [[MASK]], i64 0 +; CHECK-NEXT: [[MASK_SPLAT:%.*]] = shufflevector [[MASK_INS]], poison, zeroinitializer +; CHECK-NEXT: tail call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVE]], ptr [[PTR]], i32 1, [[MASK_SPLAT]]) +; CHECK-NEXT: ret void +; + %interleave = tail call @llvm.vector.interleave2.nxv32i8( %l, %r) + %mask.ins = insertelement poison, i1 %mask, i64 0 + %mask.splat = shufflevector %mask.ins, poison, zeroinitializer + tail call void @llvm.masked.store.nxv32i8.p0( %interleave, ptr %ptr, i32 1, %mask.splat) + ret void +} + ;;; Check that we 'legalize' operations that are wider than the target supports. define void @deinterleave_wide_nxi32_factor2(ptr %ptr) #0 {