From 576ddd60de91ef95d2987c3ce7fb94e0bb9c2f38 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood@arm.com>
Date: Tue, 2 Sep 2025 13:59:26 +0000
Subject: [PATCH] [CodeGen] Fix failing assert in interleaved access pass

In the InterleavedAccessPass the function getMask assumes that
shufflevector operations are always fixed width, which isn't
true because we use them for splats of scalable vectors. This
patch fixes the code by bailing out for scalable vectors.
---
 llvm/lib/CodeGen/InterleavedAccessPass.cpp      |  4 ++++
 .../AArch64/scalable-deinterleave-intrinsics.ll | 17 ++++++++++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index c5e97037be336..e3ded12a1847b 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -660,6 +660,10 @@ static std::pair<Value *, APInt> getMask(Value *WideMask, unsigned Factor,
   }
 
   if (auto *SVI = dyn_cast<ShuffleVectorInst>(WideMask)) {
+    Type *Op1Ty = SVI->getOperand(1)->getType();
+    if (!isa<FixedVectorType>(Op1Ty))
+      return {nullptr, GapMask};
+
     // Check that the shuffle mask is: a) an interleave, b) all of the same
     // set of the elements, and c) contained by the first source.  (c) could
     // be relaxed if desired.
diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll
index d7649801ea2fc..ed9fba3a01965 100644
--- a/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll
+++ b/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll
@@ -1,5 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
-; RUN: opt < %s -interleaved-access -S | FileCheck %s
 ; RUN: opt < %s -passes=interleaved-access -S | FileCheck %s
 
 target triple = "aarch64-linux-gnu"
@@ -186,6 +185,22 @@ define void @interleave_nxptr_factor2(ptr %ptr, <vscale x 2 x ptr> %l, <vscale x
   ret void
 }
 
+define void @interleave_nxi8_factor2_masked_store_splatmask(ptr %ptr, <vscale x 16 x i8> %l, <vscale x 16 x i8> %r, i1 %mask) #0 {
+; CHECK-LABEL: define void @interleave_nxi8_factor2_masked_store_splatmask
+; CHECK-SAME: (ptr [[PTR:%.*]], <vscale x 16 x i8> [[L:%.*]], <vscale x 16 x i8> [[R:%.*]], i1 [[MASK:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[INTERLEAVE:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> [[L]], <vscale x 16 x i8> [[R]])
+; CHECK-NEXT:    [[MASK_INS:%.*]] = insertelement <vscale x 32 x i1> poison, i1 [[MASK]], i64 0
+; CHECK-NEXT:    [[MASK_SPLAT:%.*]] = shufflevector <vscale x 32 x i1> [[MASK_INS]], <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+; CHECK-NEXT:    tail call void @llvm.masked.store.nxv32i8.p0(<vscale x 32 x i8> [[INTERLEAVE]], ptr [[PTR]], i32 1, <vscale x 32 x i1> [[MASK_SPLAT]])
+; CHECK-NEXT:    ret void
+;
+  %interleave = tail call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> %l, <vscale x 16 x i8> %r)
+  %mask.ins = insertelement <vscale x 32 x i1> poison, i1 %mask, i64 0
+  %mask.splat = shufflevector <vscale x 32 x i1> %mask.ins, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  tail call void @llvm.masked.store.nxv32i8.p0(<vscale x 32 x i8> %interleave, ptr %ptr, i32 1, <vscale x 32 x i1> %mask.splat)
+  ret void
+}
+
 ;;; Check that we 'legalize' operations that are wider than the target supports.
 
 define void @deinterleave_wide_nxi32_factor2(ptr %ptr) #0 {