-
Notifications
You must be signed in to change notification settings - Fork 15.2k
DAG: Use poison for some vector result widening #168290
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/arsenm/dag/use-poison-vector-shuffle-result-split
Are you sure you want to change the base?
DAG: Use poison for some vector result widening #168290
Conversation
|
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-backend-powerpc @llvm/pr-subscribers-llvm-selectiondag Author: Matt Arsenault (arsenm) ChangesPatch is 76.41 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168290.diff 6 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index ef53ee6df9f06..10d5f7a9b4f65 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -5654,7 +5654,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// Widen the input and call convert on the widened input vector.
unsigned NumConcat =
WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue();
- SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
+ SmallVector<SDValue, 16> Ops(NumConcat, DAG.getPOISON(InVT));
Ops[0] = InOp;
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
if (N->getNumOperands() == 1)
@@ -5673,7 +5673,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
- SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getPOISON(EltVT));
// Use the original element count so we don't do more scalar opts than
// necessary.
unsigned MinElts = N->getValueType(0).getVectorNumElements();
@@ -5756,7 +5756,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
std::array<EVT, 2> EltVTs = {{EltVT, MVT::Other}};
- SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getPOISON(EltVT));
SmallVector<SDValue, 32> OpChains;
// Use the original element count so we don't do more scalar opts than
// necessary.
@@ -5819,7 +5819,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
}
while (Ops.size() != WidenNumElts)
- Ops.push_back(DAG.getUNDEF(WidenSVT));
+ Ops.push_back(DAG.getPOISON(WidenSVT));
return DAG.getBuildVector(WidenVT, DL, Ops);
}
@@ -6026,7 +6026,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
// input and then widening it. To avoid this, we widen the input only if
// it results in a legal type.
if (WidenSize % InSize == 0) {
- SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getUNDEF(InVT));
+ SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getPOISON(InVT));
Ops[0] = InOp;
NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
@@ -6034,7 +6034,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
SmallVector<SDValue, 16> Ops;
DAG.ExtractVectorElements(InOp, Ops);
Ops.append(WidenSize / InScalarSize - Ops.size(),
- DAG.getUNDEF(InVT.getVectorElementType()));
+ DAG.getPOISON(InVT.getVectorElementType()));
NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops);
}
@@ -6088,7 +6088,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
if (WidenNumElts % NumInElts == 0) {
// Add undef vectors to widen to correct length.
unsigned NumConcat = WidenNumElts / NumInElts;
- SDValue UndefVal = DAG.getUNDEF(InVT);
+ SDValue UndefVal = DAG.getPOISON(InVT);
SmallVector<SDValue, 16> Ops(NumConcat);
for (unsigned i=0; i < NumOperands; ++i)
Ops[i] = N->getOperand(i);
@@ -6146,7 +6146,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
for (unsigned j = 0; j < NumInElts; ++j)
Ops[Idx++] = DAG.getExtractVectorElt(dl, EltVT, InOp, j);
}
- SDValue UndefVal = DAG.getUNDEF(EltVT);
+ SDValue UndefVal = DAG.getPOISON(EltVT);
for (; Idx < WidenNumElts; ++Idx)
Ops[Idx] = UndefVal;
return DAG.getBuildVector(WidenVT, dl, Ops);
@@ -6213,7 +6213,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
Parts.push_back(
DAG.getExtractSubvector(dl, PartVT, InOp, IdxVal + I * GCD));
for (; I < WidenNumElts / GCD; ++I)
- Parts.push_back(DAG.getUNDEF(PartVT));
+ Parts.push_back(DAG.getPOISON(PartVT));
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
}
@@ -6229,7 +6229,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
for (i = 0; i < VTNumElts; ++i)
Ops[i] = DAG.getExtractVectorElt(dl, EltVT, InOp, IdxVal + i);
- SDValue UndefVal = DAG.getUNDEF(EltVT);
+ SDValue UndefVal = DAG.getPOISON(EltVT);
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
return DAG.getBuildVector(WidenVT, dl, Ops);
@@ -6903,7 +6903,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_REVERSE(SDNode *N) {
Parts.push_back(
DAG.getExtractSubvector(dl, PartVT, ReverseVal, IdxVal + i * GCD));
for (; i < WidenNumElts / GCD; ++i)
- Parts.push_back(DAG.getUNDEF(PartVT));
+ Parts.push_back(DAG.getPOISON(PartVT));
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
}
@@ -6992,7 +6992,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
EVT TmpEltVT = LHS.getValueType().getVectorElementType();
// Fully unroll and reassemble.
- SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getPOISON(EltVT));
SmallVector<SDValue, 8> Chains(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
SDValue LHSElem = DAG.getExtractVectorElt(dl, TmpEltVT, LHS, i);
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
index f6251ff66299e..8fc27248abac3 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
@@ -612,13 +612,6 @@ define <vscale x 14 x i8> @extract_nxv14i8_nxv28i8_14(<vscale x 28 x i8> %in) {
; CHECK-NEXT: uunpkhi z3.d, z3.s
; CHECK-NEXT: uzp1 z1.s, z1.s, z3.s
; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h
-; CHECK-NEXT: uzp1 z1.b, z0.b, z1.b
-; CHECK-NEXT: uunpkhi z1.h, z1.b
-; CHECK-NEXT: uunpkhi z2.s, z1.h
-; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: uunpklo z2.d, z2.s
-; CHECK-NEXT: uzp1 z2.s, z2.s, z0.s
-; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
; CHECK-NEXT: ret
%res = call <vscale x 14 x i8> @llvm.vector.extract.nxv14i8.nxv28i8(<vscale x 28 x i8> %in, i64 14)
diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 71c3069a406fe..08ca1d153248e 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -5286,16 +5286,16 @@ entry:
define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptosi_v3i32_v3f32:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT: xxswapd 1, 34
+; PC64LE-NEXT: xxswapd 0, 34
+; PC64LE-NEXT: xxsldwi 1, 34, 34, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xxsldwi 2, 34, 34, 1
; PC64LE-NEXT: xscvdpsxws 0, 0
; PC64LE-NEXT: xscvdpsxws 1, 1
; PC64LE-NEXT: mffprwz 3, 0
-; PC64LE-NEXT: mtfprwz 0, 3
-; PC64LE-NEXT: mffprwz 3, 1
+; PC64LE-NEXT: mffprwz 4, 1
+; PC64LE-NEXT: mtfprwz 0, 4
; PC64LE-NEXT: mtfprwz 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI97_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI97_0@toc@l
@@ -5311,25 +5311,25 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f32:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3
-; PC64LE9-NEXT: xxswapd 1, 34
+; PC64LE9-NEXT: xxsldwi 0, 34, 34, 1
; PC64LE9-NEXT: xscvspdpn 0, 0
-; PC64LE9-NEXT: xscvspdpn 1, 1
; PC64LE9-NEXT: xscvdpsxws 0, 0
-; PC64LE9-NEXT: xscvdpsxws 1, 1
; PC64LE9-NEXT: mffprwz 3, 0
-; PC64LE9-NEXT: mtfprwz 0, 3
-; PC64LE9-NEXT: mffprwz 3, 1
-; PC64LE9-NEXT: mtfprwz 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI97_0@toc@ha
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xxsldwi 1, 34, 34, 1
-; PC64LE9-NEXT: addi 3, 3, .LCPI97_0@toc@l
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: xscvspdpn 1, 1
-; PC64LE9-NEXT: xscvdpsxws 1, 1
-; PC64LE9-NEXT: mffprwz 3, 1
+; PC64LE9-NEXT: xxswapd 0, 34
+; PC64LE9-NEXT: xscvspdpn 0, 0
+; PC64LE9-NEXT: xscvdpsxws 0, 0
+; PC64LE9-NEXT: mffprwz 4, 0
+; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3
; PC64LE9-NEXT: mtvsrwz 34, 3
+; PC64LE9-NEXT: mtfprwz 1, 4
+; PC64LE9-NEXT: addis 4, 2, .LCPI97_0@toc@ha
+; PC64LE9-NEXT: xscvspdpn 0, 0
+; PC64LE9-NEXT: addi 4, 4, .LCPI97_0@toc@l
+; PC64LE9-NEXT: xscvdpsxws 0, 0
+; PC64LE9-NEXT: mffprwz 5, 0
+; PC64LE9-NEXT: mtfprwz 0, 5
+; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: lxv 0, 0(4)
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -5558,11 +5558,11 @@ entry:
define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64(<3 x double> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptosi_v3i32_v3f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xscvdpsxws 0, 1
-; PC64LE-NEXT: xscvdpsxws 1, 2
+; PC64LE-NEXT: xscvdpsxws 0, 2
+; PC64LE-NEXT: xscvdpsxws 1, 1
; PC64LE-NEXT: mffprwz 3, 0
-; PC64LE-NEXT: mtfprwz 0, 3
-; PC64LE-NEXT: mffprwz 3, 1
+; PC64LE-NEXT: mffprwz 4, 1
+; PC64LE-NEXT: mtfprwz 0, 4
; PC64LE-NEXT: mtfprwz 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI105_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI105_0@toc@l
@@ -5577,19 +5577,19 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64(<3 x double> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xscvdpsxws 0, 1
-; PC64LE9-NEXT: xscvdpsxws 1, 2
+; PC64LE9-NEXT: xscvdpsxws 0, 3
; PC64LE9-NEXT: mffprwz 3, 0
-; PC64LE9-NEXT: mtfprwz 0, 3
-; PC64LE9-NEXT: mffprwz 3, 1
-; PC64LE9-NEXT: mtfprwz 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI105_0@toc@ha
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xscvdpsxws 1, 3
-; PC64LE9-NEXT: addi 3, 3, .LCPI105_0@toc@l
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: mffprwz 3, 1
+; PC64LE9-NEXT: xscvdpsxws 0, 2
; PC64LE9-NEXT: mtvsrwz 34, 3
+; PC64LE9-NEXT: mffprwz 4, 0
+; PC64LE9-NEXT: xscvdpsxws 0, 1
+; PC64LE9-NEXT: mtfprwz 1, 4
+; PC64LE9-NEXT: addis 4, 2, .LCPI105_0@toc@ha
+; PC64LE9-NEXT: addi 4, 4, .LCPI105_0@toc@l
+; PC64LE9-NEXT: mffprwz 5, 0
+; PC64LE9-NEXT: mtfprwz 0, 5
+; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: lxv 0, 0(4)
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -5783,16 +5783,16 @@ entry:
define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptoui_v3i32_v3f32:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT: xxswapd 1, 34
+; PC64LE-NEXT: xxswapd 0, 34
+; PC64LE-NEXT: xxsldwi 1, 34, 34, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xxsldwi 2, 34, 34, 1
; PC64LE-NEXT: xscvdpuxws 0, 0
; PC64LE-NEXT: xscvdpuxws 1, 1
; PC64LE-NEXT: mffprwz 3, 0
-; PC64LE-NEXT: mtfprwz 0, 3
-; PC64LE-NEXT: mffprwz 3, 1
+; PC64LE-NEXT: mffprwz 4, 1
+; PC64LE-NEXT: mtfprwz 0, 4
; PC64LE-NEXT: mtfprwz 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI113_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI113_0@toc@l
@@ -5808,25 +5808,25 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f32:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3
-; PC64LE9-NEXT: xxswapd 1, 34
+; PC64LE9-NEXT: xxsldwi 0, 34, 34, 1
; PC64LE9-NEXT: xscvspdpn 0, 0
-; PC64LE9-NEXT: xscvspdpn 1, 1
; PC64LE9-NEXT: xscvdpuxws 0, 0
-; PC64LE9-NEXT: xscvdpuxws 1, 1
; PC64LE9-NEXT: mffprwz 3, 0
-; PC64LE9-NEXT: mtfprwz 0, 3
-; PC64LE9-NEXT: mffprwz 3, 1
-; PC64LE9-NEXT: mtfprwz 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI113_0@toc@ha
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xxsldwi 1, 34, 34, 1
-; PC64LE9-NEXT: addi 3, 3, .LCPI113_0@toc@l
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: xscvspdpn 1, 1
-; PC64LE9-NEXT: xscvdpuxws 1, 1
-; PC64LE9-NEXT: mffprwz 3, 1
+; PC64LE9-NEXT: xxswapd 0, 34
+; PC64LE9-NEXT: xscvspdpn 0, 0
+; PC64LE9-NEXT: xscvdpuxws 0, 0
+; PC64LE9-NEXT: mffprwz 4, 0
+; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3
; PC64LE9-NEXT: mtvsrwz 34, 3
+; PC64LE9-NEXT: mtfprwz 1, 4
+; PC64LE9-NEXT: addis 4, 2, .LCPI113_0@toc@ha
+; PC64LE9-NEXT: xscvspdpn 0, 0
+; PC64LE9-NEXT: addi 4, 4, .LCPI113_0@toc@l
+; PC64LE9-NEXT: xscvdpuxws 0, 0
+; PC64LE9-NEXT: mffprwz 5, 0
+; PC64LE9-NEXT: mtfprwz 0, 5
+; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: lxv 0, 0(4)
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -6054,11 +6054,11 @@ entry:
define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64(<3 x double> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptoui_v3i32_v3f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xscvdpuxws 0, 1
-; PC64LE-NEXT: xscvdpuxws 1, 2
+; PC64LE-NEXT: xscvdpuxws 0, 2
+; PC64LE-NEXT: xscvdpuxws 1, 1
; PC64LE-NEXT: mffprwz 3, 0
-; PC64LE-NEXT: mtfprwz 0, 3
-; PC64LE-NEXT: mffprwz 3, 1
+; PC64LE-NEXT: mffprwz 4, 1
+; PC64LE-NEXT: mtfprwz 0, 4
; PC64LE-NEXT: mtfprwz 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI121_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI121_0@toc@l
@@ -6073,19 +6073,19 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64(<3 x double> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xscvdpuxws 0, 1
-; PC64LE9-NEXT: xscvdpuxws 1, 2
+; PC64LE9-NEXT: xscvdpuxws 0, 3
; PC64LE9-NEXT: mffprwz 3, 0
-; PC64LE9-NEXT: mtfprwz 0, 3
-; PC64LE9-NEXT: mffprwz 3, 1
-; PC64LE9-NEXT: mtfprwz 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI121_0@toc@ha
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xscvdpuxws 1, 3
-; PC64LE9-NEXT: addi 3, 3, .LCPI121_0@toc@l
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: mffprwz 3, 1
+; PC64LE9-NEXT: xscvdpuxws 0, 2
; PC64LE9-NEXT: mtvsrwz 34, 3
+; PC64LE9-NEXT: mffprwz 4, 0
+; PC64LE9-NEXT: xscvdpuxws 0, 1
+; PC64LE9-NEXT: mtfprwz 1, 4
+; PC64LE9-NEXT: addis 4, 2, .LCPI121_0@toc@ha
+; PC64LE9-NEXT: addi 4, 4, .LCPI121_0@toc@l
+; PC64LE9-NEXT: mffprwz 5, 0
+; PC64LE9-NEXT: mtfprwz 0, 5
+; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: lxv 0, 0(4)
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -6269,33 +6269,33 @@ entry:
define <3 x float> @constrained_vector_fptrunc_v3f64(<3 x double> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptrunc_v3f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xsrsp 0, 1
-; PC64LE-NEXT: xsrsp 1, 2
+; PC64LE-NEXT: xsrsp 0, 3
+; PC64LE-NEXT: xsrsp 2, 2
; PC64LE-NEXT: addis 3, 2, .LCPI129_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI129_0@toc@l
-; PC64LE-NEXT: xscvdpspn 0, 0
+; PC64LE-NEXT: xsrsp 1, 1
; PC64LE-NEXT: xscvdpspn 1, 1
-; PC64LE-NEXT: xxmrghw 34, 1, 0
-; PC64LE-NEXT: lxvd2x 0, 0, 3
-; PC64LE-NEXT: xxswapd 35, 0
-; PC64LE-NEXT: xsrsp 0, 3
+; PC64LE-NEXT: xscvdpspn 2, 2
; PC64LE-NEXT: xscvdpspn 36, 0
+; PC64LE-NEXT: xxmrghw 34, 2, 1
+; PC64LE-NEXT: lxvd2x 1, 0, 3
+; PC64LE-NEXT: xxswapd 35, 1
; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_fptrunc_v3f64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xsrsp 0, 1
-; PC64LE9-NEXT: xsrsp 1, 2
+; PC64LE9-NEXT: xsrsp 0, 3
+; PC64LE9-NEXT: xsrsp 2, 2
+; PC64LE9-NEXT: xsrsp 1, 1
; PC64LE9-NEXT: addis 3, 2, .LCPI129_0@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI129_0@toc@l
-; PC64LE9-NEXT: xscvdpspn 0, 0
; PC64LE9-NEXT: xscvdpspn 1, 1
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xsrsp 1, 3
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: xscvdpspn 34, 1
-; PC64LE9-NEXT: xxperm 34, 35, 0
+; PC64LE9-NEXT: xscvdpspn 2, 2
+; PC64LE9-NEXT: xscvdpspn 34, 0
+; PC64LE9-NEXT: xxmrghw 35, 2, 1
+; PC64LE9-NEXT: lxv 1, 0(3)
+; PC64LE9-NEXT: xxperm 34, 35, 1
; PC64LE9-NEXT: blr
entry:
%result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
@@ -7142,8 +7142,8 @@ entry:
define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
; PC64LE-LABEL: constrained_vector_sitofp_v3f32_v3i32:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xxswapd 0, 34
-; PC64LE-NEXT: xxsldwi 1, 34, 34, 1
+; PC64LE-NEXT: xxsldwi 0, 34, 34, 1
+; PC64LE-NEXT: xxswapd 1, 34
; PC64LE-NEXT: mffprwz 3, 0
; PC64LE-NEXT: mtfprwa 0, 3
; PC64LE-NEXT: mffprwz 3, 1
@@ -7154,7 +7154,7 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
; PC64LE-NEXT: xscvsxdsp 1, 1
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 1, 1
-; PC64LE-NEXT: xxmrghw 35, 1, 0
+; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: mfvsrwz 3, 34
; PC64LE-NEXT: xxswapd 36, 0
@@ -7166,24 +7166,24 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i32:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: li 3, 0
+; PC64LE9-NEXT: li 3, 4
; PC64LE9-NEXT: vextuwrx 3, 3, 2
; PC64LE9-NEXT: mtfprwa 0, 3
-; PC64LE9-NEXT: li 3, 4
+; PC64LE9-NEXT: li 3, 0
; PC64LE9-NEXT: vextuwrx 3, 3, 2
; PC64LE9-NEXT: xscvsxdsp 0, 0
; PC64LE9-NEXT: mtfprwa 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI161_0@toc@ha
+; PC64LE9-NEXT: mfvsrwz 3, 34
; PC64LE9-NEXT: xscvsxdsp 1, 1
-; PC64LE9-NEXT: addi 3, 3, .LCPI161_0@toc@l
+; PC64LE9-NEXT: mtfprwa 2, 3
+; PC64LE9-NEXT: addis 3, 2, .LCPI161_0@toc@ha
+; PC64LE9-NEXT: xscvsxdsp 2, 2
; PC64LE9-NEXT: xscvdpspn 0, 0
+; PC64LE9-NEXT: addi 3, 3, .LCPI161_0@toc@l
; PC64LE9-NEXT: xscvdpspn 1, 1
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: xscvdpspn 34, 2
+; PC64LE9-NEXT: xxmrghw 35, 0, 1
; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: mfvsrwz 3, 34
-; PC64LE9-NEXT: mtfprwa 1, 3
-; PC64LE9-NEXT: xscvsxdsp 1, 1
-; PC64LE9-NEXT: xscvdpspn 34, 1
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -7225,15 +7225,15 @@ entry:
define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; PC64LE-LABEL: constrained_vector_sitofp_v3f32_v3i64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: mtfprd 0, 3
-; PC64LE-NEXT: mtfprd 1, 4
+; PC64LE-NEXT: mtfprd 0, 4
+; PC64LE-NEXT: mtfprd 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI163_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI163_0@toc@l
; PC64LE-NEXT: xscvsxdsp 0, 0
; PC64LE-NEXT: xscvsxdsp 1, 1
-; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 1, 1
-; PC64LE-NEXT: xxmrghw 34, 1, 0
+; PC64LE-NEXT: xscvdpspn 0, 0
+; PC64LE-NEXT: xxmrghw 34, 0, 1
; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: xxswapd 35, 0
; PC64LE-NEXT: mtfprd 0, 5
@@ -7244,20 +7244,20 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: mtfprd 0, 3
; PC64LE9-NEXT: mtfprd 1, 4
+; PC64LE9-NEXT: mtfprd 2, 3
+; PC64LE9-NEXT: mtfprd 0, 5
; PC64LE9-NEXT: addis 3, 2, .LCPI163_0@toc@ha
-; PC64LE9-NEXT: xscvsxdsp 0, 0
; PC64LE9-NEXT: xscvsxdsp 1, 1
+; PC64LE9-NEXT: xscvsxdsp 2, 2
+; PC64LE9-NEXT: xscvsxdsp 0, 0
; PC64LE9-NEXT: addi 3, 3, .LCPI163_0@toc@l
-; PC64LE9-NEXT: xscvdpspn 0, 0
+; PC64LE9-NEXT: xscvdpspn 2, 2
; PC64LE9-NEXT: xscvdpspn 1, 1
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: mtfprd 1, 5
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: xscvsxdsp 1, 1
-; PC64LE9-NEXT: ...
[truncated]
|
|
@llvm/pr-subscribers-backend-x86 Author: Matt Arsenault (arsenm) ChangesPatch is 76.41 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168290.diff 6 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index ef53ee6df9f06..10d5f7a9b4f65 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -5654,7 +5654,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// Widen the input and call convert on the widened input vector.
unsigned NumConcat =
WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue();
- SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
+ SmallVector<SDValue, 16> Ops(NumConcat, DAG.getPOISON(InVT));
Ops[0] = InOp;
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
if (N->getNumOperands() == 1)
@@ -5673,7 +5673,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
- SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getPOISON(EltVT));
// Use the original element count so we don't do more scalar opts than
// necessary.
unsigned MinElts = N->getValueType(0).getVectorNumElements();
@@ -5756,7 +5756,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
std::array<EVT, 2> EltVTs = {{EltVT, MVT::Other}};
- SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getPOISON(EltVT));
SmallVector<SDValue, 32> OpChains;
// Use the original element count so we don't do more scalar opts than
// necessary.
@@ -5819,7 +5819,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
}
while (Ops.size() != WidenNumElts)
- Ops.push_back(DAG.getUNDEF(WidenSVT));
+ Ops.push_back(DAG.getPOISON(WidenSVT));
return DAG.getBuildVector(WidenVT, DL, Ops);
}
@@ -6026,7 +6026,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
// input and then widening it. To avoid this, we widen the input only if
// it results in a legal type.
if (WidenSize % InSize == 0) {
- SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getUNDEF(InVT));
+ SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getPOISON(InVT));
Ops[0] = InOp;
NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
@@ -6034,7 +6034,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
SmallVector<SDValue, 16> Ops;
DAG.ExtractVectorElements(InOp, Ops);
Ops.append(WidenSize / InScalarSize - Ops.size(),
- DAG.getUNDEF(InVT.getVectorElementType()));
+ DAG.getPOISON(InVT.getVectorElementType()));
NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops);
}
@@ -6088,7 +6088,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
if (WidenNumElts % NumInElts == 0) {
// Add undef vectors to widen to correct length.
unsigned NumConcat = WidenNumElts / NumInElts;
- SDValue UndefVal = DAG.getUNDEF(InVT);
+ SDValue UndefVal = DAG.getPOISON(InVT);
SmallVector<SDValue, 16> Ops(NumConcat);
for (unsigned i=0; i < NumOperands; ++i)
Ops[i] = N->getOperand(i);
@@ -6146,7 +6146,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
for (unsigned j = 0; j < NumInElts; ++j)
Ops[Idx++] = DAG.getExtractVectorElt(dl, EltVT, InOp, j);
}
- SDValue UndefVal = DAG.getUNDEF(EltVT);
+ SDValue UndefVal = DAG.getPOISON(EltVT);
for (; Idx < WidenNumElts; ++Idx)
Ops[Idx] = UndefVal;
return DAG.getBuildVector(WidenVT, dl, Ops);
@@ -6213,7 +6213,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
Parts.push_back(
DAG.getExtractSubvector(dl, PartVT, InOp, IdxVal + I * GCD));
for (; I < WidenNumElts / GCD; ++I)
- Parts.push_back(DAG.getUNDEF(PartVT));
+ Parts.push_back(DAG.getPOISON(PartVT));
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
}
@@ -6229,7 +6229,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
for (i = 0; i < VTNumElts; ++i)
Ops[i] = DAG.getExtractVectorElt(dl, EltVT, InOp, IdxVal + i);
- SDValue UndefVal = DAG.getUNDEF(EltVT);
+ SDValue UndefVal = DAG.getPOISON(EltVT);
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
return DAG.getBuildVector(WidenVT, dl, Ops);
@@ -6903,7 +6903,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_REVERSE(SDNode *N) {
Parts.push_back(
DAG.getExtractSubvector(dl, PartVT, ReverseVal, IdxVal + i * GCD));
for (; i < WidenNumElts / GCD; ++i)
- Parts.push_back(DAG.getUNDEF(PartVT));
+ Parts.push_back(DAG.getPOISON(PartVT));
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
}
@@ -6992,7 +6992,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
EVT TmpEltVT = LHS.getValueType().getVectorElementType();
// Fully unroll and reassemble.
- SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getPOISON(EltVT));
SmallVector<SDValue, 8> Chains(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
SDValue LHSElem = DAG.getExtractVectorElt(dl, TmpEltVT, LHS, i);
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
index f6251ff66299e..8fc27248abac3 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
@@ -612,13 +612,6 @@ define <vscale x 14 x i8> @extract_nxv14i8_nxv28i8_14(<vscale x 28 x i8> %in) {
; CHECK-NEXT: uunpkhi z3.d, z3.s
; CHECK-NEXT: uzp1 z1.s, z1.s, z3.s
; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h
-; CHECK-NEXT: uzp1 z1.b, z0.b, z1.b
-; CHECK-NEXT: uunpkhi z1.h, z1.b
-; CHECK-NEXT: uunpkhi z2.s, z1.h
-; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: uunpklo z2.d, z2.s
-; CHECK-NEXT: uzp1 z2.s, z2.s, z0.s
-; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
; CHECK-NEXT: ret
%res = call <vscale x 14 x i8> @llvm.vector.extract.nxv14i8.nxv28i8(<vscale x 28 x i8> %in, i64 14)
diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 71c3069a406fe..08ca1d153248e 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -5286,16 +5286,16 @@ entry:
define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptosi_v3i32_v3f32:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT: xxswapd 1, 34
+; PC64LE-NEXT: xxswapd 0, 34
+; PC64LE-NEXT: xxsldwi 1, 34, 34, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xxsldwi 2, 34, 34, 1
; PC64LE-NEXT: xscvdpsxws 0, 0
; PC64LE-NEXT: xscvdpsxws 1, 1
; PC64LE-NEXT: mffprwz 3, 0
-; PC64LE-NEXT: mtfprwz 0, 3
-; PC64LE-NEXT: mffprwz 3, 1
+; PC64LE-NEXT: mffprwz 4, 1
+; PC64LE-NEXT: mtfprwz 0, 4
; PC64LE-NEXT: mtfprwz 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI97_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI97_0@toc@l
@@ -5311,25 +5311,25 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f32:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3
-; PC64LE9-NEXT: xxswapd 1, 34
+; PC64LE9-NEXT: xxsldwi 0, 34, 34, 1
; PC64LE9-NEXT: xscvspdpn 0, 0
-; PC64LE9-NEXT: xscvspdpn 1, 1
; PC64LE9-NEXT: xscvdpsxws 0, 0
-; PC64LE9-NEXT: xscvdpsxws 1, 1
; PC64LE9-NEXT: mffprwz 3, 0
-; PC64LE9-NEXT: mtfprwz 0, 3
-; PC64LE9-NEXT: mffprwz 3, 1
-; PC64LE9-NEXT: mtfprwz 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI97_0@toc@ha
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xxsldwi 1, 34, 34, 1
-; PC64LE9-NEXT: addi 3, 3, .LCPI97_0@toc@l
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: xscvspdpn 1, 1
-; PC64LE9-NEXT: xscvdpsxws 1, 1
-; PC64LE9-NEXT: mffprwz 3, 1
+; PC64LE9-NEXT: xxswapd 0, 34
+; PC64LE9-NEXT: xscvspdpn 0, 0
+; PC64LE9-NEXT: xscvdpsxws 0, 0
+; PC64LE9-NEXT: mffprwz 4, 0
+; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3
; PC64LE9-NEXT: mtvsrwz 34, 3
+; PC64LE9-NEXT: mtfprwz 1, 4
+; PC64LE9-NEXT: addis 4, 2, .LCPI97_0@toc@ha
+; PC64LE9-NEXT: xscvspdpn 0, 0
+; PC64LE9-NEXT: addi 4, 4, .LCPI97_0@toc@l
+; PC64LE9-NEXT: xscvdpsxws 0, 0
+; PC64LE9-NEXT: mffprwz 5, 0
+; PC64LE9-NEXT: mtfprwz 0, 5
+; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: lxv 0, 0(4)
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -5558,11 +5558,11 @@ entry:
define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64(<3 x double> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptosi_v3i32_v3f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xscvdpsxws 0, 1
-; PC64LE-NEXT: xscvdpsxws 1, 2
+; PC64LE-NEXT: xscvdpsxws 0, 2
+; PC64LE-NEXT: xscvdpsxws 1, 1
; PC64LE-NEXT: mffprwz 3, 0
-; PC64LE-NEXT: mtfprwz 0, 3
-; PC64LE-NEXT: mffprwz 3, 1
+; PC64LE-NEXT: mffprwz 4, 1
+; PC64LE-NEXT: mtfprwz 0, 4
; PC64LE-NEXT: mtfprwz 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI105_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI105_0@toc@l
@@ -5577,19 +5577,19 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64(<3 x double> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xscvdpsxws 0, 1
-; PC64LE9-NEXT: xscvdpsxws 1, 2
+; PC64LE9-NEXT: xscvdpsxws 0, 3
; PC64LE9-NEXT: mffprwz 3, 0
-; PC64LE9-NEXT: mtfprwz 0, 3
-; PC64LE9-NEXT: mffprwz 3, 1
-; PC64LE9-NEXT: mtfprwz 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI105_0@toc@ha
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xscvdpsxws 1, 3
-; PC64LE9-NEXT: addi 3, 3, .LCPI105_0@toc@l
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: mffprwz 3, 1
+; PC64LE9-NEXT: xscvdpsxws 0, 2
; PC64LE9-NEXT: mtvsrwz 34, 3
+; PC64LE9-NEXT: mffprwz 4, 0
+; PC64LE9-NEXT: xscvdpsxws 0, 1
+; PC64LE9-NEXT: mtfprwz 1, 4
+; PC64LE9-NEXT: addis 4, 2, .LCPI105_0@toc@ha
+; PC64LE9-NEXT: addi 4, 4, .LCPI105_0@toc@l
+; PC64LE9-NEXT: mffprwz 5, 0
+; PC64LE9-NEXT: mtfprwz 0, 5
+; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: lxv 0, 0(4)
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -5783,16 +5783,16 @@ entry:
define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptoui_v3i32_v3f32:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT: xxswapd 1, 34
+; PC64LE-NEXT: xxswapd 0, 34
+; PC64LE-NEXT: xxsldwi 1, 34, 34, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xxsldwi 2, 34, 34, 1
; PC64LE-NEXT: xscvdpuxws 0, 0
; PC64LE-NEXT: xscvdpuxws 1, 1
; PC64LE-NEXT: mffprwz 3, 0
-; PC64LE-NEXT: mtfprwz 0, 3
-; PC64LE-NEXT: mffprwz 3, 1
+; PC64LE-NEXT: mffprwz 4, 1
+; PC64LE-NEXT: mtfprwz 0, 4
; PC64LE-NEXT: mtfprwz 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI113_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI113_0@toc@l
@@ -5808,25 +5808,25 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f32:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3
-; PC64LE9-NEXT: xxswapd 1, 34
+; PC64LE9-NEXT: xxsldwi 0, 34, 34, 1
; PC64LE9-NEXT: xscvspdpn 0, 0
-; PC64LE9-NEXT: xscvspdpn 1, 1
; PC64LE9-NEXT: xscvdpuxws 0, 0
-; PC64LE9-NEXT: xscvdpuxws 1, 1
; PC64LE9-NEXT: mffprwz 3, 0
-; PC64LE9-NEXT: mtfprwz 0, 3
-; PC64LE9-NEXT: mffprwz 3, 1
-; PC64LE9-NEXT: mtfprwz 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI113_0@toc@ha
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xxsldwi 1, 34, 34, 1
-; PC64LE9-NEXT: addi 3, 3, .LCPI113_0@toc@l
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: xscvspdpn 1, 1
-; PC64LE9-NEXT: xscvdpuxws 1, 1
-; PC64LE9-NEXT: mffprwz 3, 1
+; PC64LE9-NEXT: xxswapd 0, 34
+; PC64LE9-NEXT: xscvspdpn 0, 0
+; PC64LE9-NEXT: xscvdpuxws 0, 0
+; PC64LE9-NEXT: mffprwz 4, 0
+; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3
; PC64LE9-NEXT: mtvsrwz 34, 3
+; PC64LE9-NEXT: mtfprwz 1, 4
+; PC64LE9-NEXT: addis 4, 2, .LCPI113_0@toc@ha
+; PC64LE9-NEXT: xscvspdpn 0, 0
+; PC64LE9-NEXT: addi 4, 4, .LCPI113_0@toc@l
+; PC64LE9-NEXT: xscvdpuxws 0, 0
+; PC64LE9-NEXT: mffprwz 5, 0
+; PC64LE9-NEXT: mtfprwz 0, 5
+; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: lxv 0, 0(4)
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -6054,11 +6054,11 @@ entry:
define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64(<3 x double> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptoui_v3i32_v3f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xscvdpuxws 0, 1
-; PC64LE-NEXT: xscvdpuxws 1, 2
+; PC64LE-NEXT: xscvdpuxws 0, 2
+; PC64LE-NEXT: xscvdpuxws 1, 1
; PC64LE-NEXT: mffprwz 3, 0
-; PC64LE-NEXT: mtfprwz 0, 3
-; PC64LE-NEXT: mffprwz 3, 1
+; PC64LE-NEXT: mffprwz 4, 1
+; PC64LE-NEXT: mtfprwz 0, 4
; PC64LE-NEXT: mtfprwz 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI121_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI121_0@toc@l
@@ -6073,19 +6073,19 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64(<3 x double> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xscvdpuxws 0, 1
-; PC64LE9-NEXT: xscvdpuxws 1, 2
+; PC64LE9-NEXT: xscvdpuxws 0, 3
; PC64LE9-NEXT: mffprwz 3, 0
-; PC64LE9-NEXT: mtfprwz 0, 3
-; PC64LE9-NEXT: mffprwz 3, 1
-; PC64LE9-NEXT: mtfprwz 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI121_0@toc@ha
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xscvdpuxws 1, 3
-; PC64LE9-NEXT: addi 3, 3, .LCPI121_0@toc@l
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: mffprwz 3, 1
+; PC64LE9-NEXT: xscvdpuxws 0, 2
; PC64LE9-NEXT: mtvsrwz 34, 3
+; PC64LE9-NEXT: mffprwz 4, 0
+; PC64LE9-NEXT: xscvdpuxws 0, 1
+; PC64LE9-NEXT: mtfprwz 1, 4
+; PC64LE9-NEXT: addis 4, 2, .LCPI121_0@toc@ha
+; PC64LE9-NEXT: addi 4, 4, .LCPI121_0@toc@l
+; PC64LE9-NEXT: mffprwz 5, 0
+; PC64LE9-NEXT: mtfprwz 0, 5
+; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: lxv 0, 0(4)
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -6269,33 +6269,33 @@ entry:
define <3 x float> @constrained_vector_fptrunc_v3f64(<3 x double> %x) #0 {
; PC64LE-LABEL: constrained_vector_fptrunc_v3f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xsrsp 0, 1
-; PC64LE-NEXT: xsrsp 1, 2
+; PC64LE-NEXT: xsrsp 0, 3
+; PC64LE-NEXT: xsrsp 2, 2
; PC64LE-NEXT: addis 3, 2, .LCPI129_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI129_0@toc@l
-; PC64LE-NEXT: xscvdpspn 0, 0
+; PC64LE-NEXT: xsrsp 1, 1
; PC64LE-NEXT: xscvdpspn 1, 1
-; PC64LE-NEXT: xxmrghw 34, 1, 0
-; PC64LE-NEXT: lxvd2x 0, 0, 3
-; PC64LE-NEXT: xxswapd 35, 0
-; PC64LE-NEXT: xsrsp 0, 3
+; PC64LE-NEXT: xscvdpspn 2, 2
; PC64LE-NEXT: xscvdpspn 36, 0
+; PC64LE-NEXT: xxmrghw 34, 2, 1
+; PC64LE-NEXT: lxvd2x 1, 0, 3
+; PC64LE-NEXT: xxswapd 35, 1
; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_fptrunc_v3f64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: xsrsp 0, 1
-; PC64LE9-NEXT: xsrsp 1, 2
+; PC64LE9-NEXT: xsrsp 0, 3
+; PC64LE9-NEXT: xsrsp 2, 2
+; PC64LE9-NEXT: xsrsp 1, 1
; PC64LE9-NEXT: addis 3, 2, .LCPI129_0@toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI129_0@toc@l
-; PC64LE9-NEXT: xscvdpspn 0, 0
; PC64LE9-NEXT: xscvdpspn 1, 1
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: xsrsp 1, 3
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: xscvdpspn 34, 1
-; PC64LE9-NEXT: xxperm 34, 35, 0
+; PC64LE9-NEXT: xscvdpspn 2, 2
+; PC64LE9-NEXT: xscvdpspn 34, 0
+; PC64LE9-NEXT: xxmrghw 35, 2, 1
+; PC64LE9-NEXT: lxv 1, 0(3)
+; PC64LE9-NEXT: xxperm 34, 35, 1
; PC64LE9-NEXT: blr
entry:
%result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
@@ -7142,8 +7142,8 @@ entry:
define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
; PC64LE-LABEL: constrained_vector_sitofp_v3f32_v3i32:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: xxswapd 0, 34
-; PC64LE-NEXT: xxsldwi 1, 34, 34, 1
+; PC64LE-NEXT: xxsldwi 0, 34, 34, 1
+; PC64LE-NEXT: xxswapd 1, 34
; PC64LE-NEXT: mffprwz 3, 0
; PC64LE-NEXT: mtfprwa 0, 3
; PC64LE-NEXT: mffprwz 3, 1
@@ -7154,7 +7154,7 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
; PC64LE-NEXT: xscvsxdsp 1, 1
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 1, 1
-; PC64LE-NEXT: xxmrghw 35, 1, 0
+; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: mfvsrwz 3, 34
; PC64LE-NEXT: xxswapd 36, 0
@@ -7166,24 +7166,24 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i32:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: li 3, 0
+; PC64LE9-NEXT: li 3, 4
; PC64LE9-NEXT: vextuwrx 3, 3, 2
; PC64LE9-NEXT: mtfprwa 0, 3
-; PC64LE9-NEXT: li 3, 4
+; PC64LE9-NEXT: li 3, 0
; PC64LE9-NEXT: vextuwrx 3, 3, 2
; PC64LE9-NEXT: xscvsxdsp 0, 0
; PC64LE9-NEXT: mtfprwa 1, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI161_0@toc@ha
+; PC64LE9-NEXT: mfvsrwz 3, 34
; PC64LE9-NEXT: xscvsxdsp 1, 1
-; PC64LE9-NEXT: addi 3, 3, .LCPI161_0@toc@l
+; PC64LE9-NEXT: mtfprwa 2, 3
+; PC64LE9-NEXT: addis 3, 2, .LCPI161_0@toc@ha
+; PC64LE9-NEXT: xscvsxdsp 2, 2
; PC64LE9-NEXT: xscvdpspn 0, 0
+; PC64LE9-NEXT: addi 3, 3, .LCPI161_0@toc@l
; PC64LE9-NEXT: xscvdpspn 1, 1
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
+; PC64LE9-NEXT: xscvdpspn 34, 2
+; PC64LE9-NEXT: xxmrghw 35, 0, 1
; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: mfvsrwz 3, 34
-; PC64LE9-NEXT: mtfprwa 1, 3
-; PC64LE9-NEXT: xscvsxdsp 1, 1
-; PC64LE9-NEXT: xscvdpspn 34, 1
; PC64LE9-NEXT: xxperm 34, 35, 0
; PC64LE9-NEXT: blr
entry:
@@ -7225,15 +7225,15 @@ entry:
define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; PC64LE-LABEL: constrained_vector_sitofp_v3f32_v3i64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: mtfprd 0, 3
-; PC64LE-NEXT: mtfprd 1, 4
+; PC64LE-NEXT: mtfprd 0, 4
+; PC64LE-NEXT: mtfprd 1, 3
; PC64LE-NEXT: addis 3, 2, .LCPI163_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI163_0@toc@l
; PC64LE-NEXT: xscvsxdsp 0, 0
; PC64LE-NEXT: xscvsxdsp 1, 1
-; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 1, 1
-; PC64LE-NEXT: xxmrghw 34, 1, 0
+; PC64LE-NEXT: xscvdpspn 0, 0
+; PC64LE-NEXT: xxmrghw 34, 0, 1
; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: xxswapd 35, 0
; PC64LE-NEXT: mtfprd 0, 5
@@ -7244,20 +7244,20 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: mtfprd 0, 3
; PC64LE9-NEXT: mtfprd 1, 4
+; PC64LE9-NEXT: mtfprd 2, 3
+; PC64LE9-NEXT: mtfprd 0, 5
; PC64LE9-NEXT: addis 3, 2, .LCPI163_0@toc@ha
-; PC64LE9-NEXT: xscvsxdsp 0, 0
; PC64LE9-NEXT: xscvsxdsp 1, 1
+; PC64LE9-NEXT: xscvsxdsp 2, 2
+; PC64LE9-NEXT: xscvsxdsp 0, 0
; PC64LE9-NEXT: addi 3, 3, .LCPI163_0@toc@l
-; PC64LE9-NEXT: xscvdpspn 0, 0
+; PC64LE9-NEXT: xscvdpspn 2, 2
; PC64LE9-NEXT: xscvdpspn 1, 1
-; PC64LE9-NEXT: xxmrghw 35, 1, 0
-; PC64LE9-NEXT: mtfprd 1, 5
-; PC64LE9-NEXT: lxv 0, 0(3)
-; PC64LE9-NEXT: xscvsxdsp 1, 1
-; PC64LE9-NEXT: ...
[truncated]
|

No description provided.