Skip to content

Conversation

@arsenm
Copy link
Contributor

@arsenm arsenm commented Nov 16, 2025

No description provided.

Copy link
Contributor Author

arsenm commented Nov 16, 2025

Warning

This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
Learn more

This stack of pull requests is managed by Graphite. Learn more about stacking.

@arsenm arsenm marked this pull request as ready for review November 16, 2025 21:41
@llvmbot
Copy link
Member

llvmbot commented Nov 16, 2025

@llvm/pr-subscribers-backend-powerpc
@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-selectiondag

Author: Matt Arsenault (arsenm)

Changes

Patch is 76.41 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168290.diff

6 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (+12-12)
  • (modified) llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll (-7)
  • (modified) llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll (+133-133)
  • (modified) llvm/test/CodeGen/X86/half.ll (+64-69)
  • (modified) llvm/test/CodeGen/X86/matrix-multiply.ll (+38-36)
  • (modified) llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll (+216-218)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index ef53ee6df9f06..10d5f7a9b4f65 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -5654,7 +5654,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
       // Widen the input and call convert on the widened input vector.
       unsigned NumConcat =
           WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue();
-      SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
+      SmallVector<SDValue, 16> Ops(NumConcat, DAG.getPOISON(InVT));
       Ops[0] = InOp;
       SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
       if (N->getNumOperands() == 1)
@@ -5673,7 +5673,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
 
   // Otherwise unroll into some nasty scalar code and rebuild the vector.
   EVT EltVT = WidenVT.getVectorElementType();
-  SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT));
+  SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getPOISON(EltVT));
   // Use the original element count so we don't do more scalar opts than
   // necessary.
   unsigned MinElts = N->getValueType(0).getVectorNumElements();
@@ -5756,7 +5756,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
   // Otherwise unroll into some nasty scalar code and rebuild the vector.
   EVT EltVT = WidenVT.getVectorElementType();
   std::array<EVT, 2> EltVTs = {{EltVT, MVT::Other}};
-  SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+  SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getPOISON(EltVT));
   SmallVector<SDValue, 32> OpChains;
   // Use the original element count so we don't do more scalar opts than
   // necessary.
@@ -5819,7 +5819,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
   }
 
   while (Ops.size() != WidenNumElts)
-    Ops.push_back(DAG.getUNDEF(WidenSVT));
+    Ops.push_back(DAG.getPOISON(WidenSVT));
 
   return DAG.getBuildVector(WidenVT, DL, Ops);
 }
@@ -6026,7 +6026,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
         // input and then widening it. To avoid this, we widen the input only if
         // it results in a legal type.
         if (WidenSize % InSize == 0) {
-          SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getUNDEF(InVT));
+          SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getPOISON(InVT));
           Ops[0] = InOp;
 
           NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
@@ -6034,7 +6034,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
           SmallVector<SDValue, 16> Ops;
           DAG.ExtractVectorElements(InOp, Ops);
           Ops.append(WidenSize / InScalarSize - Ops.size(),
-                     DAG.getUNDEF(InVT.getVectorElementType()));
+                     DAG.getPOISON(InVT.getVectorElementType()));
 
           NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops);
         }
@@ -6088,7 +6088,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
     if (WidenNumElts % NumInElts == 0) {
       // Add undef vectors to widen to correct length.
       unsigned NumConcat = WidenNumElts / NumInElts;
-      SDValue UndefVal = DAG.getUNDEF(InVT);
+      SDValue UndefVal = DAG.getPOISON(InVT);
       SmallVector<SDValue, 16> Ops(NumConcat);
       for (unsigned i=0; i < NumOperands; ++i)
         Ops[i] = N->getOperand(i);
@@ -6146,7 +6146,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
     for (unsigned j = 0; j < NumInElts; ++j)
       Ops[Idx++] = DAG.getExtractVectorElt(dl, EltVT, InOp, j);
   }
-  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  SDValue UndefVal = DAG.getPOISON(EltVT);
   for (; Idx < WidenNumElts; ++Idx)
     Ops[Idx] = UndefVal;
   return DAG.getBuildVector(WidenVT, dl, Ops);
@@ -6213,7 +6213,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
         Parts.push_back(
             DAG.getExtractSubvector(dl, PartVT, InOp, IdxVal + I * GCD));
       for (; I < WidenNumElts / GCD; ++I)
-        Parts.push_back(DAG.getUNDEF(PartVT));
+        Parts.push_back(DAG.getPOISON(PartVT));
 
       return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
     }
@@ -6229,7 +6229,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
   for (i = 0; i < VTNumElts; ++i)
     Ops[i] = DAG.getExtractVectorElt(dl, EltVT, InOp, IdxVal + i);
 
-  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  SDValue UndefVal = DAG.getPOISON(EltVT);
   for (; i < WidenNumElts; ++i)
     Ops[i] = UndefVal;
   return DAG.getBuildVector(WidenVT, dl, Ops);
@@ -6903,7 +6903,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_REVERSE(SDNode *N) {
       Parts.push_back(
           DAG.getExtractSubvector(dl, PartVT, ReverseVal, IdxVal + i * GCD));
     for (; i < WidenNumElts / GCD; ++i)
-      Parts.push_back(DAG.getUNDEF(PartVT));
+      Parts.push_back(DAG.getPOISON(PartVT));
 
     return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
   }
@@ -6992,7 +6992,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
   EVT TmpEltVT = LHS.getValueType().getVectorElementType();
 
   // Fully unroll and reassemble.
-  SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT));
+  SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getPOISON(EltVT));
   SmallVector<SDValue, 8> Chains(NumElts);
   for (unsigned i = 0; i != NumElts; ++i) {
     SDValue LHSElem = DAG.getExtractVectorElt(dl, TmpEltVT, LHS, i);
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
index f6251ff66299e..8fc27248abac3 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
@@ -612,13 +612,6 @@ define <vscale x 14 x i8> @extract_nxv14i8_nxv28i8_14(<vscale x 28 x i8> %in) {
 ; CHECK-NEXT:    uunpkhi z3.d, z3.s
 ; CHECK-NEXT:    uzp1 z1.s, z1.s, z3.s
 ; CHECK-NEXT:    uzp1 z1.h, z2.h, z1.h
-; CHECK-NEXT:    uzp1 z1.b, z0.b, z1.b
-; CHECK-NEXT:    uunpkhi z1.h, z1.b
-; CHECK-NEXT:    uunpkhi z2.s, z1.h
-; CHECK-NEXT:    uunpklo z1.s, z1.h
-; CHECK-NEXT:    uunpklo z2.d, z2.s
-; CHECK-NEXT:    uzp1 z2.s, z2.s, z0.s
-; CHECK-NEXT:    uzp1 z1.h, z1.h, z2.h
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z1.b
 ; CHECK-NEXT:    ret
   %res = call <vscale x 14 x i8> @llvm.vector.extract.nxv14i8.nxv28i8(<vscale x 28 x i8> %in, i64 14)
diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 71c3069a406fe..08ca1d153248e 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -5286,16 +5286,16 @@ entry:
 define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptosi_v3i32_v3f32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT:    xxswapd 1, 34
+; PC64LE-NEXT:    xxswapd 0, 34
+; PC64LE-NEXT:    xxsldwi 1, 34, 34, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    xscvdpsxws 0, 0
 ; PC64LE-NEXT:    xscvdpsxws 1, 1
 ; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mtfprwz 0, 3
-; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mffprwz 4, 1
+; PC64LE-NEXT:    mtfprwz 0, 4
 ; PC64LE-NEXT:    mtfprwz 1, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI97_0@toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI97_0@toc@l
@@ -5311,25 +5311,25 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 {
 ;
 ; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 3
-; PC64LE9-NEXT:    xxswapd 1, 34
+; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
-; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvdpsxws 0, 0
-; PC64LE9-NEXT:    xscvdpsxws 1, 1
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtfprwz 0, 3
-; PC64LE9-NEXT:    mffprwz 3, 1
-; PC64LE9-NEXT:    mtfprwz 1, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI97_0@toc@ha
-; PC64LE9-NEXT:    xxmrghw 35, 1, 0
-; PC64LE9-NEXT:    xxsldwi 1, 34, 34, 1
-; PC64LE9-NEXT:    addi 3, 3, .LCPI97_0@toc@l
-; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    xscvspdpn 1, 1
-; PC64LE9-NEXT:    xscvdpsxws 1, 1
-; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    xxswapd 0, 34
+; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    xscvdpsxws 0, 0
+; PC64LE9-NEXT:    mffprwz 4, 0
+; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 3
 ; PC64LE9-NEXT:    mtvsrwz 34, 3
+; PC64LE9-NEXT:    mtfprwz 1, 4
+; PC64LE9-NEXT:    addis 4, 2, .LCPI97_0@toc@ha
+; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    addi 4, 4, .LCPI97_0@toc@l
+; PC64LE9-NEXT:    xscvdpsxws 0, 0
+; PC64LE9-NEXT:    mffprwz 5, 0
+; PC64LE9-NEXT:    mtfprwz 0, 5
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    lxv 0, 0(4)
 ; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
@@ -5558,11 +5558,11 @@ entry:
 define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptosi_v3i32_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xscvdpsxws 0, 1
-; PC64LE-NEXT:    xscvdpsxws 1, 2
+; PC64LE-NEXT:    xscvdpsxws 0, 2
+; PC64LE-NEXT:    xscvdpsxws 1, 1
 ; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mtfprwz 0, 3
-; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mffprwz 4, 1
+; PC64LE-NEXT:    mtfprwz 0, 4
 ; PC64LE-NEXT:    mtfprwz 1, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI105_0@toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI105_0@toc@l
@@ -5577,19 +5577,19 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64(<3 x double> %x) #0 {
 ;
 ; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    xscvdpsxws 0, 1
-; PC64LE9-NEXT:    xscvdpsxws 1, 2
+; PC64LE9-NEXT:    xscvdpsxws 0, 3
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtfprwz 0, 3
-; PC64LE9-NEXT:    mffprwz 3, 1
-; PC64LE9-NEXT:    mtfprwz 1, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI105_0@toc@ha
-; PC64LE9-NEXT:    xxmrghw 35, 1, 0
-; PC64LE9-NEXT:    xscvdpsxws 1, 3
-; PC64LE9-NEXT:    addi 3, 3, .LCPI105_0@toc@l
-; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    xscvdpsxws 0, 2
 ; PC64LE9-NEXT:    mtvsrwz 34, 3
+; PC64LE9-NEXT:    mffprwz 4, 0
+; PC64LE9-NEXT:    xscvdpsxws 0, 1
+; PC64LE9-NEXT:    mtfprwz 1, 4
+; PC64LE9-NEXT:    addis 4, 2, .LCPI105_0@toc@ha
+; PC64LE9-NEXT:    addi 4, 4, .LCPI105_0@toc@l
+; PC64LE9-NEXT:    mffprwz 5, 0
+; PC64LE9-NEXT:    mtfprwz 0, 5
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    lxv 0, 0(4)
 ; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
@@ -5783,16 +5783,16 @@ entry:
 define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptoui_v3i32_v3f32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT:    xxswapd 1, 34
+; PC64LE-NEXT:    xxswapd 0, 34
+; PC64LE-NEXT:    xxsldwi 1, 34, 34, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    xscvdpuxws 0, 0
 ; PC64LE-NEXT:    xscvdpuxws 1, 1
 ; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mtfprwz 0, 3
-; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mffprwz 4, 1
+; PC64LE-NEXT:    mtfprwz 0, 4
 ; PC64LE-NEXT:    mtfprwz 1, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI113_0@toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI113_0@toc@l
@@ -5808,25 +5808,25 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 {
 ;
 ; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 3
-; PC64LE9-NEXT:    xxswapd 1, 34
+; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
-; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvdpuxws 0, 0
-; PC64LE9-NEXT:    xscvdpuxws 1, 1
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtfprwz 0, 3
-; PC64LE9-NEXT:    mffprwz 3, 1
-; PC64LE9-NEXT:    mtfprwz 1, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI113_0@toc@ha
-; PC64LE9-NEXT:    xxmrghw 35, 1, 0
-; PC64LE9-NEXT:    xxsldwi 1, 34, 34, 1
-; PC64LE9-NEXT:    addi 3, 3, .LCPI113_0@toc@l
-; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    xscvspdpn 1, 1
-; PC64LE9-NEXT:    xscvdpuxws 1, 1
-; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    xxswapd 0, 34
+; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    xscvdpuxws 0, 0
+; PC64LE9-NEXT:    mffprwz 4, 0
+; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 3
 ; PC64LE9-NEXT:    mtvsrwz 34, 3
+; PC64LE9-NEXT:    mtfprwz 1, 4
+; PC64LE9-NEXT:    addis 4, 2, .LCPI113_0@toc@ha
+; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    addi 4, 4, .LCPI113_0@toc@l
+; PC64LE9-NEXT:    xscvdpuxws 0, 0
+; PC64LE9-NEXT:    mffprwz 5, 0
+; PC64LE9-NEXT:    mtfprwz 0, 5
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    lxv 0, 0(4)
 ; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
@@ -6054,11 +6054,11 @@ entry:
 define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptoui_v3i32_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xscvdpuxws 0, 1
-; PC64LE-NEXT:    xscvdpuxws 1, 2
+; PC64LE-NEXT:    xscvdpuxws 0, 2
+; PC64LE-NEXT:    xscvdpuxws 1, 1
 ; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mtfprwz 0, 3
-; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mffprwz 4, 1
+; PC64LE-NEXT:    mtfprwz 0, 4
 ; PC64LE-NEXT:    mtfprwz 1, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI121_0@toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI121_0@toc@l
@@ -6073,19 +6073,19 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64(<3 x double> %x) #0 {
 ;
 ; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    xscvdpuxws 0, 1
-; PC64LE9-NEXT:    xscvdpuxws 1, 2
+; PC64LE9-NEXT:    xscvdpuxws 0, 3
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtfprwz 0, 3
-; PC64LE9-NEXT:    mffprwz 3, 1
-; PC64LE9-NEXT:    mtfprwz 1, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI121_0@toc@ha
-; PC64LE9-NEXT:    xxmrghw 35, 1, 0
-; PC64LE9-NEXT:    xscvdpuxws 1, 3
-; PC64LE9-NEXT:    addi 3, 3, .LCPI121_0@toc@l
-; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    xscvdpuxws 0, 2
 ; PC64LE9-NEXT:    mtvsrwz 34, 3
+; PC64LE9-NEXT:    mffprwz 4, 0
+; PC64LE9-NEXT:    xscvdpuxws 0, 1
+; PC64LE9-NEXT:    mtfprwz 1, 4
+; PC64LE9-NEXT:    addis 4, 2, .LCPI121_0@toc@ha
+; PC64LE9-NEXT:    addi 4, 4, .LCPI121_0@toc@l
+; PC64LE9-NEXT:    mffprwz 5, 0
+; PC64LE9-NEXT:    mtfprwz 0, 5
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    lxv 0, 0(4)
 ; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
@@ -6269,33 +6269,33 @@ entry:
 define <3 x float> @constrained_vector_fptrunc_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptrunc_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xsrsp 0, 1
-; PC64LE-NEXT:    xsrsp 1, 2
+; PC64LE-NEXT:    xsrsp 0, 3
+; PC64LE-NEXT:    xsrsp 2, 2
 ; PC64LE-NEXT:    addis 3, 2, .LCPI129_0@toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI129_0@toc@l
-; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xsrsp 1, 1
 ; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxmrghw 34, 1, 0
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 35, 0
-; PC64LE-NEXT:    xsrsp 0, 3
+; PC64LE-NEXT:    xscvdpspn 2, 2
 ; PC64LE-NEXT:    xscvdpspn 36, 0
+; PC64LE-NEXT:    xxmrghw 34, 2, 1
+; PC64LE-NEXT:    lxvd2x 1, 0, 3
+; PC64LE-NEXT:    xxswapd 35, 1
 ; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptrunc_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    xsrsp 0, 1
-; PC64LE9-NEXT:    xsrsp 1, 2
+; PC64LE9-NEXT:    xsrsp 0, 3
+; PC64LE9-NEXT:    xsrsp 2, 2
+; PC64LE9-NEXT:    xsrsp 1, 1
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI129_0@toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI129_0@toc@l
-; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xxmrghw 35, 1, 0
-; PC64LE9-NEXT:    xsrsp 1, 3
-; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    xscvdpspn 34, 1
-; PC64LE9-NEXT:    xxperm 34, 35, 0
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xscvdpspn 34, 0
+; PC64LE9-NEXT:    xxmrghw 35, 2, 1
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 35, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
@@ -7142,8 +7142,8 @@ entry:
 define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sitofp_v3f32_v3i32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxswapd 0, 34
-; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
+; PC64LE-NEXT:    xxsldwi 0, 34, 34, 1
+; PC64LE-NEXT:    xxswapd 1, 34
 ; PC64LE-NEXT:    mffprwz 3, 0
 ; PC64LE-NEXT:    mtfprwa 0, 3
 ; PC64LE-NEXT:    mffprwz 3, 1
@@ -7154,7 +7154,7 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE-NEXT:    xscvsxdsp 1, 1
 ; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxmrghw 35, 1, 0
+; PC64LE-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    mfvsrwz 3, 34
 ; PC64LE-NEXT:    xxswapd 36, 0
@@ -7166,24 +7166,24 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ;
 ; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i32:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    li 3, 0
+; PC64LE9-NEXT:    li 3, 4
 ; PC64LE9-NEXT:    vextuwrx 3, 3, 2
 ; PC64LE9-NEXT:    mtfprwa 0, 3
-; PC64LE9-NEXT:    li 3, 4
+; PC64LE9-NEXT:    li 3, 0
 ; PC64LE9-NEXT:    vextuwrx 3, 3, 2
 ; PC64LE9-NEXT:    xscvsxdsp 0, 0
 ; PC64LE9-NEXT:    mtfprwa 1, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI161_0@toc@ha
+; PC64LE9-NEXT:    mfvsrwz 3, 34
 ; PC64LE9-NEXT:    xscvsxdsp 1, 1
-; PC64LE9-NEXT:    addi 3, 3, .LCPI161_0@toc@l
+; PC64LE9-NEXT:    mtfprwa 2, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI161_0@toc@ha
+; PC64LE9-NEXT:    xscvsxdsp 2, 2
 ; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    addi 3, 3, .LCPI161_0@toc@l
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    xscvdpspn 34, 2
+; PC64LE9-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    mfvsrwz 3, 34
-; PC64LE9-NEXT:    mtfprwa 1, 3
-; PC64LE9-NEXT:    xscvsxdsp 1, 1
-; PC64LE9-NEXT:    xscvdpspn 34, 1
 ; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
@@ -7225,15 +7225,15 @@ entry:
 define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sitofp_v3f32_v3i64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mtfprd 0, 3
-; PC64LE-NEXT:    mtfprd 1, 4
+; PC64LE-NEXT:    mtfprd 0, 4
+; PC64LE-NEXT:    mtfprd 1, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI163_0@toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI163_0@toc@l
 ; PC64LE-NEXT:    xscvsxdsp 0, 0
 ; PC64LE-NEXT:    xscvsxdsp 1, 1
-; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxmrghw 34, 1, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxmrghw 34, 0, 1
 ; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    xxswapd 35, 0
 ; PC64LE-NEXT:    mtfprd 0, 5
@@ -7244,20 +7244,20 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ;
 ; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i64:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mtfprd 0, 3
 ; PC64LE9-NEXT:    mtfprd 1, 4
+; PC64LE9-NEXT:    mtfprd 2, 3
+; PC64LE9-NEXT:    mtfprd 0, 5
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI163_0@toc@ha
-; PC64LE9-NEXT:    xscvsxdsp 0, 0
 ; PC64LE9-NEXT:    xscvsxdsp 1, 1
+; PC64LE9-NEXT:    xscvsxdsp 2, 2
+; PC64LE9-NEXT:    xscvsxdsp 0, 0
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI163_0@toc@l
-; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 2, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xxmrghw 35, 1, 0
-; PC64LE9-NEXT:    mtfprd 1, 5
-; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    xscvsxdsp 1, 1
-; PC64LE9-NEXT:  ...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Nov 16, 2025

@llvm/pr-subscribers-backend-x86

Author: Matt Arsenault (arsenm)

Changes

Patch is 76.41 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168290.diff

6 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (+12-12)
  • (modified) llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll (-7)
  • (modified) llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll (+133-133)
  • (modified) llvm/test/CodeGen/X86/half.ll (+64-69)
  • (modified) llvm/test/CodeGen/X86/matrix-multiply.ll (+38-36)
  • (modified) llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll (+216-218)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index ef53ee6df9f06..10d5f7a9b4f65 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -5654,7 +5654,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
       // Widen the input and call convert on the widened input vector.
       unsigned NumConcat =
           WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue();
-      SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
+      SmallVector<SDValue, 16> Ops(NumConcat, DAG.getPOISON(InVT));
       Ops[0] = InOp;
       SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
       if (N->getNumOperands() == 1)
@@ -5673,7 +5673,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
 
   // Otherwise unroll into some nasty scalar code and rebuild the vector.
   EVT EltVT = WidenVT.getVectorElementType();
-  SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT));
+  SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getPOISON(EltVT));
   // Use the original element count so we don't do more scalar opts than
   // necessary.
   unsigned MinElts = N->getValueType(0).getVectorNumElements();
@@ -5756,7 +5756,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
   // Otherwise unroll into some nasty scalar code and rebuild the vector.
   EVT EltVT = WidenVT.getVectorElementType();
   std::array<EVT, 2> EltVTs = {{EltVT, MVT::Other}};
-  SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+  SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getPOISON(EltVT));
   SmallVector<SDValue, 32> OpChains;
   // Use the original element count so we don't do more scalar opts than
   // necessary.
@@ -5819,7 +5819,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
   }
 
   while (Ops.size() != WidenNumElts)
-    Ops.push_back(DAG.getUNDEF(WidenSVT));
+    Ops.push_back(DAG.getPOISON(WidenSVT));
 
   return DAG.getBuildVector(WidenVT, DL, Ops);
 }
@@ -6026,7 +6026,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
         // input and then widening it. To avoid this, we widen the input only if
         // it results in a legal type.
         if (WidenSize % InSize == 0) {
-          SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getUNDEF(InVT));
+          SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getPOISON(InVT));
           Ops[0] = InOp;
 
           NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
@@ -6034,7 +6034,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
           SmallVector<SDValue, 16> Ops;
           DAG.ExtractVectorElements(InOp, Ops);
           Ops.append(WidenSize / InScalarSize - Ops.size(),
-                     DAG.getUNDEF(InVT.getVectorElementType()));
+                     DAG.getPOISON(InVT.getVectorElementType()));
 
           NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops);
         }
@@ -6088,7 +6088,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
     if (WidenNumElts % NumInElts == 0) {
       // Add undef vectors to widen to correct length.
       unsigned NumConcat = WidenNumElts / NumInElts;
-      SDValue UndefVal = DAG.getUNDEF(InVT);
+      SDValue UndefVal = DAG.getPOISON(InVT);
       SmallVector<SDValue, 16> Ops(NumConcat);
       for (unsigned i=0; i < NumOperands; ++i)
         Ops[i] = N->getOperand(i);
@@ -6146,7 +6146,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
     for (unsigned j = 0; j < NumInElts; ++j)
       Ops[Idx++] = DAG.getExtractVectorElt(dl, EltVT, InOp, j);
   }
-  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  SDValue UndefVal = DAG.getPOISON(EltVT);
   for (; Idx < WidenNumElts; ++Idx)
     Ops[Idx] = UndefVal;
   return DAG.getBuildVector(WidenVT, dl, Ops);
@@ -6213,7 +6213,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
         Parts.push_back(
             DAG.getExtractSubvector(dl, PartVT, InOp, IdxVal + I * GCD));
       for (; I < WidenNumElts / GCD; ++I)
-        Parts.push_back(DAG.getUNDEF(PartVT));
+        Parts.push_back(DAG.getPOISON(PartVT));
 
       return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
     }
@@ -6229,7 +6229,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
   for (i = 0; i < VTNumElts; ++i)
     Ops[i] = DAG.getExtractVectorElt(dl, EltVT, InOp, IdxVal + i);
 
-  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  SDValue UndefVal = DAG.getPOISON(EltVT);
   for (; i < WidenNumElts; ++i)
     Ops[i] = UndefVal;
   return DAG.getBuildVector(WidenVT, dl, Ops);
@@ -6903,7 +6903,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_REVERSE(SDNode *N) {
       Parts.push_back(
           DAG.getExtractSubvector(dl, PartVT, ReverseVal, IdxVal + i * GCD));
     for (; i < WidenNumElts / GCD; ++i)
-      Parts.push_back(DAG.getUNDEF(PartVT));
+      Parts.push_back(DAG.getPOISON(PartVT));
 
     return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
   }
@@ -6992,7 +6992,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
   EVT TmpEltVT = LHS.getValueType().getVectorElementType();
 
   // Fully unroll and reassemble.
-  SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT));
+  SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getPOISON(EltVT));
   SmallVector<SDValue, 8> Chains(NumElts);
   for (unsigned i = 0; i != NumElts; ++i) {
     SDValue LHSElem = DAG.getExtractVectorElt(dl, TmpEltVT, LHS, i);
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
index f6251ff66299e..8fc27248abac3 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
@@ -612,13 +612,6 @@ define <vscale x 14 x i8> @extract_nxv14i8_nxv28i8_14(<vscale x 28 x i8> %in) {
 ; CHECK-NEXT:    uunpkhi z3.d, z3.s
 ; CHECK-NEXT:    uzp1 z1.s, z1.s, z3.s
 ; CHECK-NEXT:    uzp1 z1.h, z2.h, z1.h
-; CHECK-NEXT:    uzp1 z1.b, z0.b, z1.b
-; CHECK-NEXT:    uunpkhi z1.h, z1.b
-; CHECK-NEXT:    uunpkhi z2.s, z1.h
-; CHECK-NEXT:    uunpklo z1.s, z1.h
-; CHECK-NEXT:    uunpklo z2.d, z2.s
-; CHECK-NEXT:    uzp1 z2.s, z2.s, z0.s
-; CHECK-NEXT:    uzp1 z1.h, z1.h, z2.h
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z1.b
 ; CHECK-NEXT:    ret
   %res = call <vscale x 14 x i8> @llvm.vector.extract.nxv14i8.nxv28i8(<vscale x 28 x i8> %in, i64 14)
diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 71c3069a406fe..08ca1d153248e 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -5286,16 +5286,16 @@ entry:
 define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptosi_v3i32_v3f32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT:    xxswapd 1, 34
+; PC64LE-NEXT:    xxswapd 0, 34
+; PC64LE-NEXT:    xxsldwi 1, 34, 34, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    xscvdpsxws 0, 0
 ; PC64LE-NEXT:    xscvdpsxws 1, 1
 ; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mtfprwz 0, 3
-; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mffprwz 4, 1
+; PC64LE-NEXT:    mtfprwz 0, 4
 ; PC64LE-NEXT:    mtfprwz 1, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI97_0@toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI97_0@toc@l
@@ -5311,25 +5311,25 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 {
 ;
 ; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 3
-; PC64LE9-NEXT:    xxswapd 1, 34
+; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
-; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvdpsxws 0, 0
-; PC64LE9-NEXT:    xscvdpsxws 1, 1
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtfprwz 0, 3
-; PC64LE9-NEXT:    mffprwz 3, 1
-; PC64LE9-NEXT:    mtfprwz 1, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI97_0@toc@ha
-; PC64LE9-NEXT:    xxmrghw 35, 1, 0
-; PC64LE9-NEXT:    xxsldwi 1, 34, 34, 1
-; PC64LE9-NEXT:    addi 3, 3, .LCPI97_0@toc@l
-; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    xscvspdpn 1, 1
-; PC64LE9-NEXT:    xscvdpsxws 1, 1
-; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    xxswapd 0, 34
+; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    xscvdpsxws 0, 0
+; PC64LE9-NEXT:    mffprwz 4, 0
+; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 3
 ; PC64LE9-NEXT:    mtvsrwz 34, 3
+; PC64LE9-NEXT:    mtfprwz 1, 4
+; PC64LE9-NEXT:    addis 4, 2, .LCPI97_0@toc@ha
+; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    addi 4, 4, .LCPI97_0@toc@l
+; PC64LE9-NEXT:    xscvdpsxws 0, 0
+; PC64LE9-NEXT:    mffprwz 5, 0
+; PC64LE9-NEXT:    mtfprwz 0, 5
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    lxv 0, 0(4)
 ; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
@@ -5558,11 +5558,11 @@ entry:
 define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptosi_v3i32_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xscvdpsxws 0, 1
-; PC64LE-NEXT:    xscvdpsxws 1, 2
+; PC64LE-NEXT:    xscvdpsxws 0, 2
+; PC64LE-NEXT:    xscvdpsxws 1, 1
 ; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mtfprwz 0, 3
-; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mffprwz 4, 1
+; PC64LE-NEXT:    mtfprwz 0, 4
 ; PC64LE-NEXT:    mtfprwz 1, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI105_0@toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI105_0@toc@l
@@ -5577,19 +5577,19 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64(<3 x double> %x) #0 {
 ;
 ; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    xscvdpsxws 0, 1
-; PC64LE9-NEXT:    xscvdpsxws 1, 2
+; PC64LE9-NEXT:    xscvdpsxws 0, 3
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtfprwz 0, 3
-; PC64LE9-NEXT:    mffprwz 3, 1
-; PC64LE9-NEXT:    mtfprwz 1, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI105_0@toc@ha
-; PC64LE9-NEXT:    xxmrghw 35, 1, 0
-; PC64LE9-NEXT:    xscvdpsxws 1, 3
-; PC64LE9-NEXT:    addi 3, 3, .LCPI105_0@toc@l
-; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    xscvdpsxws 0, 2
 ; PC64LE9-NEXT:    mtvsrwz 34, 3
+; PC64LE9-NEXT:    mffprwz 4, 0
+; PC64LE9-NEXT:    xscvdpsxws 0, 1
+; PC64LE9-NEXT:    mtfprwz 1, 4
+; PC64LE9-NEXT:    addis 4, 2, .LCPI105_0@toc@ha
+; PC64LE9-NEXT:    addi 4, 4, .LCPI105_0@toc@l
+; PC64LE9-NEXT:    mffprwz 5, 0
+; PC64LE9-NEXT:    mtfprwz 0, 5
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    lxv 0, 0(4)
 ; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
@@ -5783,16 +5783,16 @@ entry:
 define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptoui_v3i32_v3f32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT:    xxswapd 1, 34
+; PC64LE-NEXT:    xxswapd 0, 34
+; PC64LE-NEXT:    xxsldwi 1, 34, 34, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    xscvdpuxws 0, 0
 ; PC64LE-NEXT:    xscvdpuxws 1, 1
 ; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mtfprwz 0, 3
-; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mffprwz 4, 1
+; PC64LE-NEXT:    mtfprwz 0, 4
 ; PC64LE-NEXT:    mtfprwz 1, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI113_0@toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI113_0@toc@l
@@ -5808,25 +5808,25 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 {
 ;
 ; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 3
-; PC64LE9-NEXT:    xxswapd 1, 34
+; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
-; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvdpuxws 0, 0
-; PC64LE9-NEXT:    xscvdpuxws 1, 1
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtfprwz 0, 3
-; PC64LE9-NEXT:    mffprwz 3, 1
-; PC64LE9-NEXT:    mtfprwz 1, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI113_0@toc@ha
-; PC64LE9-NEXT:    xxmrghw 35, 1, 0
-; PC64LE9-NEXT:    xxsldwi 1, 34, 34, 1
-; PC64LE9-NEXT:    addi 3, 3, .LCPI113_0@toc@l
-; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    xscvspdpn 1, 1
-; PC64LE9-NEXT:    xscvdpuxws 1, 1
-; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    xxswapd 0, 34
+; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    xscvdpuxws 0, 0
+; PC64LE9-NEXT:    mffprwz 4, 0
+; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 3
 ; PC64LE9-NEXT:    mtvsrwz 34, 3
+; PC64LE9-NEXT:    mtfprwz 1, 4
+; PC64LE9-NEXT:    addis 4, 2, .LCPI113_0@toc@ha
+; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    addi 4, 4, .LCPI113_0@toc@l
+; PC64LE9-NEXT:    xscvdpuxws 0, 0
+; PC64LE9-NEXT:    mffprwz 5, 0
+; PC64LE9-NEXT:    mtfprwz 0, 5
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    lxv 0, 0(4)
 ; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
@@ -6054,11 +6054,11 @@ entry:
 define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptoui_v3i32_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xscvdpuxws 0, 1
-; PC64LE-NEXT:    xscvdpuxws 1, 2
+; PC64LE-NEXT:    xscvdpuxws 0, 2
+; PC64LE-NEXT:    xscvdpuxws 1, 1
 ; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mtfprwz 0, 3
-; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mffprwz 4, 1
+; PC64LE-NEXT:    mtfprwz 0, 4
 ; PC64LE-NEXT:    mtfprwz 1, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI121_0@toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI121_0@toc@l
@@ -6073,19 +6073,19 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64(<3 x double> %x) #0 {
 ;
 ; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    xscvdpuxws 0, 1
-; PC64LE9-NEXT:    xscvdpuxws 1, 2
+; PC64LE9-NEXT:    xscvdpuxws 0, 3
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtfprwz 0, 3
-; PC64LE9-NEXT:    mffprwz 3, 1
-; PC64LE9-NEXT:    mtfprwz 1, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI121_0@toc@ha
-; PC64LE9-NEXT:    xxmrghw 35, 1, 0
-; PC64LE9-NEXT:    xscvdpuxws 1, 3
-; PC64LE9-NEXT:    addi 3, 3, .LCPI121_0@toc@l
-; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    xscvdpuxws 0, 2
 ; PC64LE9-NEXT:    mtvsrwz 34, 3
+; PC64LE9-NEXT:    mffprwz 4, 0
+; PC64LE9-NEXT:    xscvdpuxws 0, 1
+; PC64LE9-NEXT:    mtfprwz 1, 4
+; PC64LE9-NEXT:    addis 4, 2, .LCPI121_0@toc@ha
+; PC64LE9-NEXT:    addi 4, 4, .LCPI121_0@toc@l
+; PC64LE9-NEXT:    mffprwz 5, 0
+; PC64LE9-NEXT:    mtfprwz 0, 5
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    lxv 0, 0(4)
 ; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
@@ -6269,33 +6269,33 @@ entry:
 define <3 x float> @constrained_vector_fptrunc_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptrunc_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xsrsp 0, 1
-; PC64LE-NEXT:    xsrsp 1, 2
+; PC64LE-NEXT:    xsrsp 0, 3
+; PC64LE-NEXT:    xsrsp 2, 2
 ; PC64LE-NEXT:    addis 3, 2, .LCPI129_0@toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI129_0@toc@l
-; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xsrsp 1, 1
 ; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxmrghw 34, 1, 0
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 35, 0
-; PC64LE-NEXT:    xsrsp 0, 3
+; PC64LE-NEXT:    xscvdpspn 2, 2
 ; PC64LE-NEXT:    xscvdpspn 36, 0
+; PC64LE-NEXT:    xxmrghw 34, 2, 1
+; PC64LE-NEXT:    lxvd2x 1, 0, 3
+; PC64LE-NEXT:    xxswapd 35, 1
 ; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptrunc_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    xsrsp 0, 1
-; PC64LE9-NEXT:    xsrsp 1, 2
+; PC64LE9-NEXT:    xsrsp 0, 3
+; PC64LE9-NEXT:    xsrsp 2, 2
+; PC64LE9-NEXT:    xsrsp 1, 1
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI129_0@toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI129_0@toc@l
-; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xxmrghw 35, 1, 0
-; PC64LE9-NEXT:    xsrsp 1, 3
-; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    xscvdpspn 34, 1
-; PC64LE9-NEXT:    xxperm 34, 35, 0
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xscvdpspn 34, 0
+; PC64LE9-NEXT:    xxmrghw 35, 2, 1
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 35, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
@@ -7142,8 +7142,8 @@ entry:
 define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sitofp_v3f32_v3i32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxswapd 0, 34
-; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
+; PC64LE-NEXT:    xxsldwi 0, 34, 34, 1
+; PC64LE-NEXT:    xxswapd 1, 34
 ; PC64LE-NEXT:    mffprwz 3, 0
 ; PC64LE-NEXT:    mtfprwa 0, 3
 ; PC64LE-NEXT:    mffprwz 3, 1
@@ -7154,7 +7154,7 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE-NEXT:    xscvsxdsp 1, 1
 ; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxmrghw 35, 1, 0
+; PC64LE-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    mfvsrwz 3, 34
 ; PC64LE-NEXT:    xxswapd 36, 0
@@ -7166,24 +7166,24 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ;
 ; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i32:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    li 3, 0
+; PC64LE9-NEXT:    li 3, 4
 ; PC64LE9-NEXT:    vextuwrx 3, 3, 2
 ; PC64LE9-NEXT:    mtfprwa 0, 3
-; PC64LE9-NEXT:    li 3, 4
+; PC64LE9-NEXT:    li 3, 0
 ; PC64LE9-NEXT:    vextuwrx 3, 3, 2
 ; PC64LE9-NEXT:    xscvsxdsp 0, 0
 ; PC64LE9-NEXT:    mtfprwa 1, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI161_0@toc@ha
+; PC64LE9-NEXT:    mfvsrwz 3, 34
 ; PC64LE9-NEXT:    xscvsxdsp 1, 1
-; PC64LE9-NEXT:    addi 3, 3, .LCPI161_0@toc@l
+; PC64LE9-NEXT:    mtfprwa 2, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI161_0@toc@ha
+; PC64LE9-NEXT:    xscvsxdsp 2, 2
 ; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    addi 3, 3, .LCPI161_0@toc@l
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    xscvdpspn 34, 2
+; PC64LE9-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    mfvsrwz 3, 34
-; PC64LE9-NEXT:    mtfprwa 1, 3
-; PC64LE9-NEXT:    xscvsxdsp 1, 1
-; PC64LE9-NEXT:    xscvdpspn 34, 1
 ; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
@@ -7225,15 +7225,15 @@ entry:
 define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sitofp_v3f32_v3i64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mtfprd 0, 3
-; PC64LE-NEXT:    mtfprd 1, 4
+; PC64LE-NEXT:    mtfprd 0, 4
+; PC64LE-NEXT:    mtfprd 1, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI163_0@toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI163_0@toc@l
 ; PC64LE-NEXT:    xscvsxdsp 0, 0
 ; PC64LE-NEXT:    xscvsxdsp 1, 1
-; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxmrghw 34, 1, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxmrghw 34, 0, 1
 ; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    xxswapd 35, 0
 ; PC64LE-NEXT:    mtfprd 0, 5
@@ -7244,20 +7244,20 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ;
 ; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i64:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mtfprd 0, 3
 ; PC64LE9-NEXT:    mtfprd 1, 4
+; PC64LE9-NEXT:    mtfprd 2, 3
+; PC64LE9-NEXT:    mtfprd 0, 5
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI163_0@toc@ha
-; PC64LE9-NEXT:    xscvsxdsp 0, 0
 ; PC64LE9-NEXT:    xscvsxdsp 1, 1
+; PC64LE9-NEXT:    xscvsxdsp 2, 2
+; PC64LE9-NEXT:    xscvsxdsp 0, 0
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI163_0@toc@l
-; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 2, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xxmrghw 35, 1, 0
-; PC64LE9-NEXT:    mtfprd 1, 5
-; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    xscvsxdsp 1, 1
-; PC64LE9-NEXT:  ...
[truncated]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants