-
Notifications
You must be signed in to change notification settings - Fork 14.8k
release/20.x: [RISCV] Check isFixedLengthVector before calling getVectorNumElements in getSingleShuffleSrc. (#125455) #125590
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: None (llvmbot) ChangesBackport 7c5100d Requested by: @topperc Full diff: https://github.com/llvm/llvm-project/pull/125590.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8d09e534b1858b..4ff333b1ff2f7a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4512,7 +4512,8 @@ static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1,
// Src needs to have twice the number of elements.
unsigned NumElts = VT.getVectorNumElements();
- if (Src.getValueType().getVectorNumElements() != (NumElts * 2))
+ if (!Src.getValueType().isFixedLengthVector() ||
+ Src.getValueType().getVectorNumElements() != (NumElts * 2))
return SDValue();
// The extracts must extract the two halves of the source.
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr125306.ll b/llvm/test/CodeGen/RISCV/rvv/pr125306.ll
new file mode 100644
index 00000000000000..111f87de220dbf
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/pr125306.ll
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v | FileCheck %s
+
+; Test for an "Invalid size request on a scalable vector". Attempts to reduce
+; the test faurther were not successful. The failure requires a shuffle with 2
+; scalable->fixed extracts from the same vector. 0 is the only valid index for a
+; scalable->fixed extract so the 2 extract must be the same. Shuffles with the
+; same source are aggressively canonicalized to a unary shuffle so it requires
+; the extracts to become identical through other optimizations without the
+; shuffle being canonicalized before it is lowered.
+
+define <2 x i32> @main(ptr %0) {
+; CHECK-LABEL: main:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vse32.v v8, (zero)
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: sw zero, 80(zero)
+; CHECK-NEXT: lui a1, 7
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vid.v v11
+; CHECK-NEXT: li a4, 16
+; CHECK-NEXT: lui a5, 2
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vse32.v v10, (a2)
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: li a2, 24
+; CHECK-NEXT: sh zero, -392(a3)
+; CHECK-NEXT: sh zero, 534(a3)
+; CHECK-NEXT: sh zero, 1460(a3)
+; CHECK-NEXT: li a3, 32
+; CHECK-NEXT: vse32.v v10, (a2)
+; CHECK-NEXT: li a2, 40
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: sh zero, -1710(a5)
+; CHECK-NEXT: sh zero, -784(a5)
+; CHECK-NEXT: sh zero, 142(a5)
+; CHECK-NEXT: lw a5, -304(a1)
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vadd.vi v9, v11, -1
+; CHECK-NEXT: vse32.v v10, (a3)
+; CHECK-NEXT: sh zero, 0(a0)
+; CHECK-NEXT: lw a0, -188(a1)
+; CHECK-NEXT: vse32.v v10, (a2)
+; CHECK-NEXT: lw a2, -188(a1)
+; CHECK-NEXT: lw a3, 1244(a1)
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: lw a0, 1244(a1)
+; CHECK-NEXT: lw a1, -304(a1)
+; CHECK-NEXT: vmv.v.x v10, a3
+; CHECK-NEXT: vmv.v.x v11, a5
+; CHECK-NEXT: vslide1down.vx v8, v8, zero
+; CHECK-NEXT: vslide1down.vx v10, v10, zero
+; CHECK-NEXT: vmin.vv v8, v10, v8
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vslide1down.vx v11, v11, zero
+; CHECK-NEXT: vmin.vx v10, v10, a2
+; CHECK-NEXT: vmin.vx v10, v10, a1
+; CHECK-NEXT: vmin.vv v11, v8, v11
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: vand.vv v9, v11, v9
+; CHECK-NEXT: vslideup.vi v8, v10, 1
+; CHECK-NEXT: vse32.v v9, (a4)
+; CHECK-NEXT: sh zero, 0(zero)
+; CHECK-NEXT: ret
+entry:
+ store <16 x i32> zeroinitializer, ptr null, align 4
+ store <8 x i32> zeroinitializer, ptr %0, align 4
+ store <4 x i32> zeroinitializer, ptr getelementptr inbounds nuw (i8, ptr null, i64 64), align 4
+ store i32 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 80), align 4
+ %1 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 29916), align 4
+ %broadcast.splatinsert53 = insertelement <4 x i32> zeroinitializer, i32 %1, i64 0
+ %2 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 28484), align 4
+ %broadcast.splatinsert55 = insertelement <4 x i32> zeroinitializer, i32 %2, i64 0
+ %3 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %broadcast.splatinsert53, <4 x i32> %broadcast.splatinsert55)
+ %4 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 28368), align 4
+ %broadcast.splatinsert57 = insertelement <4 x i32> zeroinitializer, i32 %4, i64 0
+ %5 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %3, <4 x i32> %broadcast.splatinsert57)
+ store i16 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 3704), align 2
+ store i16 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 4630), align 2
+ %6 = shufflevector <4 x i32> %5, <4 x i32> zeroinitializer, <2 x i32> <i32 0, i32 4>
+ store <2 x i32> %6, ptr getelementptr inbounds nuw (i8, ptr null, i64 16), align 4
+ store i16 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 5556), align 2
+ store i16 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 6482), align 2
+ store <2 x i32> zeroinitializer, ptr getelementptr inbounds nuw (i8, ptr null, i64 24), align 4
+ store i16 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 7408), align 2
+ store i16 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 8334), align 2
+ store <2 x i32> zeroinitializer, ptr getelementptr inbounds nuw (i8, ptr null, i64 32), align 4
+ store i16 0, ptr %0, align 2
+ store <2 x i32> zeroinitializer, ptr getelementptr inbounds nuw (i8, ptr null, i64 40), align 4
+ %7 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 29916), align 4
+ %broadcast.splatinsert165 = insertelement <4 x i32> poison, i32 %7, i64 0
+ %8 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 28484), align 4
+ %broadcast.splatinsert167 = insertelement <4 x i32> poison, i32 %8, i64 0
+ %9 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %broadcast.splatinsert165, <4 x i32> %broadcast.splatinsert167)
+ %10 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 28368), align 4
+ %broadcast.splatinsert169 = insertelement <4 x i32> poison, i32 %10, i64 0
+ %11 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %9, <4 x i32> %broadcast.splatinsert169)
+ store i16 0, ptr null, align 2
+ %12 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 29916), align 4
+ %broadcast.splatinsert179 = insertelement <4 x i32> poison, i32 %12, i64 0
+ %13 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 28484), align 4
+ %broadcast.splatinsert181 = insertelement <4 x i32> poison, i32 %13, i64 0
+ %14 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %broadcast.splatinsert179, <4 x i32> %broadcast.splatinsert181)
+ %15 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 28368), align 4
+ %broadcast.splatinsert183 = insertelement <4 x i32> poison, i32 %15, i64 0
+ %16 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %14, <4 x i32> %broadcast.splatinsert183)
+ %17 = shufflevector <4 x i32> %11, <4 x i32> %16, <2 x i32> <i32 0, i32 4>
+ ret <2 x i32> %17
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
… in getSingleShuffleSrc. (llvm#125455) I have been unsuccessful at further reducing the test. The failure requires a shuffle with 2 scalable->fixed extracts with the same source. 0 is the only valid index for a scalable->fixed extract so the 2 sources must be the same extract. Shuffles with the same source are aggressively canonicalized to a unary shuffle. So it requires the extracts to become identical through other optimizations without the shuffle being canonicalized before it is lowered. Fixes llvm#125306. (cherry picked from commit 7c5100d)
@topperc (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
Backport 7c5100d
Requested by: @topperc