-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[RISCV] Disable slideup optimization on the inconsistent element type of EVec and ContainerVT #159373
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Hongyu Chen (XChy) ChangesFixes #159294 Full diff: https://github.com/llvm/llvm-project/pull/159373.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index af9430a23e2c9..4e32d0cec2d2a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4562,6 +4562,13 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);
}
+ // Adapt the element type of EVec into ContainerVT.
+ MVT EVecEltVT = EVecContainerVT.getVectorElementType();
+ MVT ContainerEltVT = ContainerVT.getVectorElementType();
+ if (EVecEltVT != ContainerEltVT)
+ EVec = DAG.getAnyExtOrTrunc(
+ EVec, DL, EVecContainerVT.changeVectorElementType(ContainerEltVT));
+
// Adapt EVec's type into ContainerVT.
if (EVecContainerVT.getVectorMinNumElements() <
ContainerVT.getVectorMinNumElements())
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index 4bec67d91847d..657d62cb6aed2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -3597,5 +3597,130 @@ define <4 x i32> @buildvec_vredmax_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
ret <4 x i32> %255
}
+define <16 x i16> @PR159294(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
+; RV32-ONLY-LABEL: PR159294:
+; RV32-ONLY: # %bb.0: # %entry
+; RV32-ONLY-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-ONLY-NEXT: vmv.x.s a0, v8
+; RV32-ONLY-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; RV32-ONLY-NEXT: vnsrl.wi v8, v9, 0
+; RV32-ONLY-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; RV32-ONLY-NEXT: vslide1up.vx v12, v8, a0
+; RV32-ONLY-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32-ONLY-NEXT: vmv.x.s a0, v10
+; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32-ONLY-NEXT: vslide1up.vx v8, v12, a0
+; RV32-ONLY-NEXT: ret
+;
+; RV32VB-LABEL: PR159294:
+; RV32VB: # %bb.0: # %entry
+; RV32VB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32VB-NEXT: vmv.x.s a0, v8
+; RV32VB-NEXT: vmv.x.s a1, v10
+; RV32VB-NEXT: slli a0, a0, 16
+; RV32VB-NEXT: zext.h a1, a1
+; RV32VB-NEXT: or a0, a1, a0
+; RV32VB-NEXT: vmv.x.s a1, v9
+; RV32VB-NEXT: vmv.v.i v8, 0
+; RV32VB-NEXT: zext.h a1, a1
+; RV32VB-NEXT: vsetvli zero, zero, e32, m2, tu, ma
+; RV32VB-NEXT: vmv.s.x v8, a0
+; RV32VB-NEXT: vmv.s.x v10, a1
+; RV32VB-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; RV32VB-NEXT: vslideup.vi v8, v10, 1
+; RV32VB-NEXT: ret
+;
+; RV32VB-PACK-LABEL: PR159294:
+; RV32VB-PACK: # %bb.0: # %entry
+; RV32VB-PACK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32VB-PACK-NEXT: vmv.x.s a0, v8
+; RV32VB-PACK-NEXT: vmv.x.s a1, v10
+; RV32VB-PACK-NEXT: vmv.x.s a2, v9
+; RV32VB-PACK-NEXT: pack a0, a1, a0
+; RV32VB-PACK-NEXT: pack a1, a0, a0
+; RV32VB-PACK-NEXT: vmv.v.x v8, a1
+; RV32VB-PACK-NEXT: pack a1, a2, a0
+; RV32VB-PACK-NEXT: vsetvli zero, zero, e32, m2, tu, ma
+; RV32VB-PACK-NEXT: vmv.s.x v8, a0
+; RV32VB-PACK-NEXT: vmv.s.x v10, a1
+; RV32VB-PACK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; RV32VB-PACK-NEXT: vslideup.vi v8, v10, 1
+; RV32VB-PACK-NEXT: ret
+;
+; RV64V-ONLY-LABEL: PR159294:
+; RV64V-ONLY: # %bb.0: # %entry
+; RV64V-ONLY-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64V-ONLY-NEXT: vmv.x.s a0, v8
+; RV64V-ONLY-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; RV64V-ONLY-NEXT: vnsrl.wi v8, v9, 0
+; RV64V-ONLY-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; RV64V-ONLY-NEXT: vslide1up.vx v12, v8, a0
+; RV64V-ONLY-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV64V-ONLY-NEXT: vmv.x.s a0, v10
+; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64V-ONLY-NEXT: vslide1up.vx v8, v12, a0
+; RV64V-ONLY-NEXT: ret
+;
+; RVA22U64-LABEL: PR159294:
+; RVA22U64: # %bb.0: # %entry
+; RVA22U64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RVA22U64-NEXT: vmv.x.s a0, v8
+; RVA22U64-NEXT: vmv.x.s a1, v10
+; RVA22U64-NEXT: slli a0, a0, 16
+; RVA22U64-NEXT: zext.h a1, a1
+; RVA22U64-NEXT: or a0, a0, a1
+; RVA22U64-NEXT: vmv.x.s a1, v9
+; RVA22U64-NEXT: vmv.v.i v8, 0
+; RVA22U64-NEXT: zext.h a1, a1
+; RVA22U64-NEXT: vsetvli zero, zero, e32, m2, tu, ma
+; RVA22U64-NEXT: vmv.s.x v8, a0
+; RVA22U64-NEXT: vmv.s.x v10, a1
+; RVA22U64-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; RVA22U64-NEXT: vslideup.vi v8, v10, 1
+; RVA22U64-NEXT: ret
+;
+; RVA22U64-PACK-LABEL: PR159294:
+; RVA22U64-PACK: # %bb.0: # %entry
+; RVA22U64-PACK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RVA22U64-PACK-NEXT: vmv.x.s a0, v8
+; RVA22U64-PACK-NEXT: vmv.x.s a1, v10
+; RVA22U64-PACK-NEXT: vmv.x.s a2, v9
+; RVA22U64-PACK-NEXT: packw a0, a1, a0
+; RVA22U64-PACK-NEXT: packw a1, a0, a0
+; RVA22U64-PACK-NEXT: vmv.v.x v8, a1
+; RVA22U64-PACK-NEXT: packw a1, a2, a0
+; RVA22U64-PACK-NEXT: vsetvli zero, zero, e32, m2, tu, ma
+; RVA22U64-PACK-NEXT: vmv.s.x v8, a0
+; RVA22U64-PACK-NEXT: vmv.s.x v10, a1
+; RVA22U64-PACK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; RVA22U64-PACK-NEXT: vslideup.vi v8, v10, 1
+; RVA22U64-PACK-NEXT: ret
+;
+; RV64ZVE32-LABEL: PR159294:
+; RV64ZVE32: # %bb.0: # %entry
+; RV64ZVE32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32-NEXT: vmv.x.s a0, v8
+; RV64ZVE32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; RV64ZVE32-NEXT: vnsrl.wi v8, v9, 0
+; RV64ZVE32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; RV64ZVE32-NEXT: vslide1up.vx v12, v8, a0
+; RV64ZVE32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV64ZVE32-NEXT: vmv.x.s a0, v10
+; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32-NEXT: vslide1up.vx v8, v12, a0
+; RV64ZVE32-NEXT: ret
+entry:
+ %vecext3 = extractelement <2 x i32> %a, i32 0
+ %conv4 = trunc i32 %vecext3 to i16
+ %vecinit5 = insertelement <16 x i16> <i16 0, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>, i16 %conv4, i32 1
+ %vecext7 = extractelement <2 x i32> %b, i32 0
+ %conv8 = trunc i32 %vecext7 to i16
+ %vecinit9 = insertelement <16 x i16> %vecinit5, i16 %conv8, i32 2
+ %vecext59 = extractelement <2 x i32> %c, i32 0
+ %conv60 = trunc i32 %vecext59 to i16
+ %vecinit61 = insertelement <16 x i16> %vecinit9, i16 %conv60, i32 0
+ ret <16 x i16> %vecinit61
+}
+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV64: {{.*}}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not an expert in the cost of RVV instructions. Looks like it's better to enable the slideup optimization. For your information, you can view the diff of the new commit. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
With an additional element type cast instruction, it's almost certain that the optimization added in #154847 , which folded extract_element + vslide1up into vslideup of 1 and thus eliminated most of the |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps you also want to update the PR title and description
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Fixes #159294
The element type of EVecContainerVT and ContainerVT can be different after promoting integer types.
This patch disables the slideup optimization in that case.