Skip to content

DAG: Avoid creating illegal extract_subvector in legalizer #154100

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented Aug 18, 2025

Fixes #153808

Copy link
Contributor Author

arsenm commented Aug 18, 2025

@llvmbot
Copy link
Member

llvmbot commented Aug 18, 2025

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-selectiondag

Author: Matt Arsenault (arsenm)

Changes

Fixes #153808


Full diff: https://github.com/llvm/llvm-project/pull/154100.diff

2 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (+23-4)
  • (added) llvm/test/CodeGen/AMDGPU/issue153808-extract-subvector-legalize.ll (+51)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index bc2dbfb4cbaae..a252d911a1d4d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3842,13 +3842,32 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
   uint64_t LoEltsMin = Lo.getValueType().getVectorMinNumElements();
   uint64_t IdxVal = Idx->getAsZExtVal();
 
+  unsigned NumResultElts = SubVT.getVectorMinNumElements();
+
   if (IdxVal < LoEltsMin) {
-    assert(IdxVal + SubVT.getVectorMinNumElements() <= LoEltsMin &&
+    assert(IdxVal + NumResultElts <= LoEltsMin &&
            "Extracted subvector crosses vector split!");
     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
-  } else if (SubVT.isScalableVector() ==
-             N->getOperand(0).getValueType().isScalableVector())
-    return DAG.getExtractSubvector(dl, SubVT, Hi, IdxVal - LoEltsMin);
+  }
+
+  EVT SrcVT = N->getOperand(0).getValueType();
+  if (SubVT.isScalableVector() == SrcVT.isScalableVector()) {
+    uint64_t ExtractIdx = IdxVal - LoEltsMin;
+    if (ExtractIdx % NumResultElts == 0)
+      return DAG.getExtractSubvector(dl, SubVT, Hi, ExtractIdx);
+
+    // We cannot create an extract_subvector that isn't a multiple of the result
+    // size, which may go out of bounds for the last elements. Shuffle the
+    // desired elements down to 0 and do a simple 0 extract.
+    EVT HiVT = Hi.getValueType();
+    SmallVector<int, 8> Mask(HiVT.getVectorNumElements(), -1);
+    for (int I = 0; I != static_cast<int>(NumResultElts); ++I)
+      Mask[I] = ExtractIdx + I;
+
+    SDValue Shuffle =
+        DAG.getVectorShuffle(HiVT, dl, Hi, DAG.getPOISON(HiVT), Mask);
+    return DAG.getExtractSubvector(dl, SubVT, Shuffle, 0);
+  }
 
   // After this point the DAG node only permits extracting fixed-width
   // subvectors from scalable vectors.
diff --git a/llvm/test/CodeGen/AMDGPU/issue153808-extract-subvector-legalize.ll b/llvm/test/CodeGen/AMDGPU/issue153808-extract-subvector-legalize.ll
new file mode 100644
index 0000000000000..f1b1ea3fbd6d7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/issue153808-extract-subvector-legalize.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX942 %s
+
+define <3 x float> @issue153808_vector_extract_assert(ptr addrspace(1) %ptr) #0 {
+; GFX900-LABEL: issue153808_vector_extract_assert:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_mov_b32_e32 v4, v1
+; GFX900-NEXT:    v_mov_b32_e32 v3, v0
+; GFX900-NEXT:    global_load_dwordx4 v[5:8], v[3:4], off
+; GFX900-NEXT:    global_load_dwordx3 v[0:2], v[3:4], off offset:192
+; GFX900-NEXT:    s_mov_b32 s4, 0
+; GFX900-NEXT:    s_mov_b32 s5, s4
+; GFX900-NEXT:    s_mov_b32 s6, s4
+; GFX900-NEXT:    s_mov_b32 s7, s4
+; GFX900-NEXT:    s_waitcnt vmcnt(1)
+; GFX900-NEXT:    buffer_store_dwordx4 v[5:8], off, s[4:7], 0
+; GFX900-NEXT:    s_waitcnt vmcnt(0)
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: issue153808_vector_extract_assert:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off
+; GFX942-NEXT:    global_load_dwordx3 v[2:4], v[0:1], off offset:192
+; GFX942-NEXT:    s_mov_b32 s0, 0
+; GFX942-NEXT:    s_mov_b32 s1, s0
+; GFX942-NEXT:    s_mov_b32 s2, s0
+; GFX942-NEXT:    s_mov_b32 s3, s0
+; GFX942-NEXT:    s_waitcnt vmcnt(1)
+; GFX942-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0
+; GFX942-NEXT:    s_waitcnt vmcnt(1)
+; GFX942-NEXT:    v_mov_b32_e32 v0, v2
+; GFX942-NEXT:    v_mov_b32_e32 v1, v3
+; GFX942-NEXT:    v_mov_b32_e32 v2, v4
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+  %val = load <51 x float>, ptr addrspace(1) %ptr, align 4
+  %val.slice.0 = shufflevector <51 x float> %val, <51 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %val.slice.0, ptr addrspace(8) null, i32 0, i32 0, i32 0)
+  %val.slice.48 = shufflevector <51 x float> %val, <51 x float> poison, <3 x i32> <i32 48, i32 49, i32 50>
+  ret <3 x float> %val.slice.48
+}
+
+declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8) writeonly captures(none), i32, i32, i32 immarg) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX9: {{.*}}

arsenm added 4 commits August 19, 2025 00:30
Verify it's a multiple of the result vector element count
instead of asserting this in random combines.

The testcase in #153808 fails in the wrong point. Add an
assert to getNode so the invalid extract asserts at construction
instead of use.
@arsenm arsenm force-pushed the users/arsenm/issue153808/avoid-non-multiple-extract-vector-split branch from 8db5851 to 6dca7d1 Compare August 19, 2025 23:52
@arsenm arsenm force-pushed the users/arsenm/dag/add-assert-getNode-EXTRACT_SUBVECTOR-index-multiple-vector-elts branch from e092268 to 3fd9c57 Compare August 19, 2025 23:52
Base automatically changed from users/arsenm/dag/add-assert-getNode-EXTRACT_SUBVECTOR-index-multiple-vector-elts to main August 20, 2025 00:55
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU llvm:SelectionDAG SelectionDAGISel as well
Projects
None yet
Development

Successfully merging this pull request may close these issues.

Type legalizing emits invalid extract_subvector node
2 participants