Skip to content

release/20.x: [RISCV] Check isFixedLengthVector before calling getVectorNumElements in getSingleShuffleSrc. (#125455) #125590

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 10, 2025

Conversation

llvmbot
Copy link
Member

@llvmbot llvmbot commented Feb 3, 2025

Backport 7c5100d

Requested by: @topperc

@llvmbot
Copy link
Member Author

llvmbot commented Feb 3, 2025

@llvm/pr-subscribers-backend-risc-v

Author: None (llvmbot)

Changes

Backport 7c5100d

Requested by: @topperc


Full diff: https://github.com/llvm/llvm-project/pull/125590.diff

2 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+2-1)
  • (added) llvm/test/CodeGen/RISCV/rvv/pr125306.ll (+118)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8d09e534b1858b..4ff333b1ff2f7a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4512,7 +4512,8 @@ static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1,
 
   // Src needs to have twice the number of elements.
   unsigned NumElts = VT.getVectorNumElements();
-  if (Src.getValueType().getVectorNumElements() != (NumElts * 2))
+  if (!Src.getValueType().isFixedLengthVector() ||
+      Src.getValueType().getVectorNumElements() != (NumElts * 2))
     return SDValue();
 
   // The extracts must extract the two halves of the source.
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr125306.ll b/llvm/test/CodeGen/RISCV/rvv/pr125306.ll
new file mode 100644
index 00000000000000..111f87de220dbf
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/pr125306.ll
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v | FileCheck %s
+
+; Test for an "Invalid size request on a scalable vector". Attempts to reduce
+; the test faurther were not successful. The failure requires a shuffle with 2
+; scalable->fixed extracts from the same vector. 0 is the only valid index for a
+; scalable->fixed extract so the 2 extract must be the same. Shuffles with the
+; same source are aggressively canonicalized to a unary shuffle so it requires
+; the extracts to become identical through other optimizations without the
+; shuffle being canonicalized before it is lowered.
+
+define <2 x i32> @main(ptr %0) {
+; CHECK-LABEL: main:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vse32.v v8, (zero)
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.v.i v10, 0
+; CHECK-NEXT:    li a2, 64
+; CHECK-NEXT:    sw zero, 80(zero)
+; CHECK-NEXT:    lui a1, 7
+; CHECK-NEXT:    lui a3, 1
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    li a4, 16
+; CHECK-NEXT:    lui a5, 2
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vse32.v v10, (a2)
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.i v10, 0
+; CHECK-NEXT:    li a2, 24
+; CHECK-NEXT:    sh zero, -392(a3)
+; CHECK-NEXT:    sh zero, 534(a3)
+; CHECK-NEXT:    sh zero, 1460(a3)
+; CHECK-NEXT:    li a3, 32
+; CHECK-NEXT:    vse32.v v10, (a2)
+; CHECK-NEXT:    li a2, 40
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    sh zero, -1710(a5)
+; CHECK-NEXT:    sh zero, -784(a5)
+; CHECK-NEXT:    sh zero, 142(a5)
+; CHECK-NEXT:    lw a5, -304(a1)
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vadd.vi v9, v11, -1
+; CHECK-NEXT:    vse32.v v10, (a3)
+; CHECK-NEXT:    sh zero, 0(a0)
+; CHECK-NEXT:    lw a0, -188(a1)
+; CHECK-NEXT:    vse32.v v10, (a2)
+; CHECK-NEXT:    lw a2, -188(a1)
+; CHECK-NEXT:    lw a3, 1244(a1)
+; CHECK-NEXT:    vmv.v.x v8, a0
+; CHECK-NEXT:    lw a0, 1244(a1)
+; CHECK-NEXT:    lw a1, -304(a1)
+; CHECK-NEXT:    vmv.v.x v10, a3
+; CHECK-NEXT:    vmv.v.x v11, a5
+; CHECK-NEXT:    vslide1down.vx v8, v8, zero
+; CHECK-NEXT:    vslide1down.vx v10, v10, zero
+; CHECK-NEXT:    vmin.vv v8, v10, v8
+; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    vslide1down.vx v11, v11, zero
+; CHECK-NEXT:    vmin.vx v10, v10, a2
+; CHECK-NEXT:    vmin.vx v10, v10, a1
+; CHECK-NEXT:    vmin.vv v11, v8, v11
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vand.vv v9, v11, v9
+; CHECK-NEXT:    vslideup.vi v8, v10, 1
+; CHECK-NEXT:    vse32.v v9, (a4)
+; CHECK-NEXT:    sh zero, 0(zero)
+; CHECK-NEXT:    ret
+entry:
+  store <16 x i32> zeroinitializer, ptr null, align 4
+  store <8 x i32> zeroinitializer, ptr %0, align 4
+  store <4 x i32> zeroinitializer, ptr getelementptr inbounds nuw (i8, ptr null, i64 64), align 4
+  store i32 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 80), align 4
+  %1 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 29916), align 4
+  %broadcast.splatinsert53 = insertelement <4 x i32> zeroinitializer, i32 %1, i64 0
+  %2 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 28484), align 4
+  %broadcast.splatinsert55 = insertelement <4 x i32> zeroinitializer, i32 %2, i64 0
+  %3 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %broadcast.splatinsert53, <4 x i32> %broadcast.splatinsert55)
+  %4 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 28368), align 4
+  %broadcast.splatinsert57 = insertelement <4 x i32> zeroinitializer, i32 %4, i64 0
+  %5 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %3, <4 x i32> %broadcast.splatinsert57)
+  store i16 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 3704), align 2
+  store i16 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 4630), align 2
+  %6 = shufflevector <4 x i32> %5, <4 x i32> zeroinitializer, <2 x i32> <i32 0, i32 4>
+  store <2 x i32> %6, ptr getelementptr inbounds nuw (i8, ptr null, i64 16), align 4
+  store i16 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 5556), align 2
+  store i16 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 6482), align 2
+  store <2 x i32> zeroinitializer, ptr getelementptr inbounds nuw (i8, ptr null, i64 24), align 4
+  store i16 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 7408), align 2
+  store i16 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 8334), align 2
+  store <2 x i32> zeroinitializer, ptr getelementptr inbounds nuw (i8, ptr null, i64 32), align 4
+  store i16 0, ptr %0, align 2
+  store <2 x i32> zeroinitializer, ptr getelementptr inbounds nuw (i8, ptr null, i64 40), align 4
+  %7 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 29916), align 4
+  %broadcast.splatinsert165 = insertelement <4 x i32> poison, i32 %7, i64 0
+  %8 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 28484), align 4
+  %broadcast.splatinsert167 = insertelement <4 x i32> poison, i32 %8, i64 0
+  %9 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %broadcast.splatinsert165, <4 x i32> %broadcast.splatinsert167)
+  %10 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 28368), align 4
+  %broadcast.splatinsert169 = insertelement <4 x i32> poison, i32 %10, i64 0
+  %11 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %9, <4 x i32> %broadcast.splatinsert169)
+  store i16 0, ptr null, align 2
+  %12 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 29916), align 4
+  %broadcast.splatinsert179 = insertelement <4 x i32> poison, i32 %12, i64 0
+  %13 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 28484), align 4
+  %broadcast.splatinsert181 = insertelement <4 x i32> poison, i32 %13, i64 0
+  %14 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %broadcast.splatinsert179, <4 x i32> %broadcast.splatinsert181)
+  %15 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 28368), align 4
+  %broadcast.splatinsert183 = insertelement <4 x i32> poison, i32 %15, i64 0
+  %16 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %14, <4 x i32> %broadcast.splatinsert183)
+  %17 = shufflevector <4 x i32> %11, <4 x i32> %16, <2 x i32> <i32 0, i32 4>
+  ret <2 x i32> %17
+}

@tstellar tstellar requested review from lukel97 and preames February 10, 2025 20:45
Copy link
Collaborator

@preames preames left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

… in getSingleShuffleSrc. (llvm#125455)

I have been unsuccessful at further reducing the test. The
failure requires a shuffle with 2 scalable->fixed extracts with
the same source. 0 is the only valid index for a scalable->fixed
extract so the 2 sources must be the same extract. Shuffles with
the same source are aggressively canonicalized to a unary shuffle.
So it requires the extracts to become identical through other
optimizations without the shuffle being canonicalized before it is
lowered.

Fixes llvm#125306.

(cherry picked from commit 7c5100d)
@tstellar tstellar merged commit 820c8c7 into llvm:release/20.x Feb 10, 2025
6 of 8 checks passed
Copy link

@topperc (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
Development

Successfully merging this pull request may close these issues.

4 participants