diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index ef53ee6df9f06..10d5f7a9b4f65 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -5654,7 +5654,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { // Widen the input and call convert on the widened input vector. unsigned NumConcat = WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue(); - SmallVector Ops(NumConcat, DAG.getUNDEF(InVT)); + SmallVector Ops(NumConcat, DAG.getPOISON(InVT)); Ops[0] = InOp; SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); if (N->getNumOperands() == 1) @@ -5673,7 +5673,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { // Otherwise unroll into some nasty scalar code and rebuild the vector. EVT EltVT = WidenVT.getVectorElementType(); - SmallVector Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT)); + SmallVector Ops(WidenEC.getFixedValue(), DAG.getPOISON(EltVT)); // Use the original element count so we don't do more scalar opts than // necessary. unsigned MinElts = N->getValueType(0).getVectorNumElements(); @@ -5756,7 +5756,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) { // Otherwise unroll into some nasty scalar code and rebuild the vector. EVT EltVT = WidenVT.getVectorElementType(); std::array EltVTs = {{EltVT, MVT::Other}}; - SmallVector Ops(WidenNumElts, DAG.getUNDEF(EltVT)); + SmallVector Ops(WidenNumElts, DAG.getPOISON(EltVT)); SmallVector OpChains; // Use the original element count so we don't do more scalar opts than // necessary. @@ -5819,7 +5819,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) { } while (Ops.size() != WidenNumElts) - Ops.push_back(DAG.getUNDEF(WidenSVT)); + Ops.push_back(DAG.getPOISON(WidenSVT)); return DAG.getBuildVector(WidenVT, DL, Ops); } @@ -6026,7 +6026,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { // input and then widening it. To avoid this, we widen the input only if // it results in a legal type. if (WidenSize % InSize == 0) { - SmallVector Ops(NewNumParts, DAG.getUNDEF(InVT)); + SmallVector Ops(NewNumParts, DAG.getPOISON(InVT)); Ops[0] = InOp; NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops); @@ -6034,7 +6034,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { SmallVector Ops; DAG.ExtractVectorElements(InOp, Ops); Ops.append(WidenSize / InScalarSize - Ops.size(), - DAG.getUNDEF(InVT.getVectorElementType())); + DAG.getPOISON(InVT.getVectorElementType())); NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops); } @@ -6088,7 +6088,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { if (WidenNumElts % NumInElts == 0) { // Add undef vectors to widen to correct length. unsigned NumConcat = WidenNumElts / NumInElts; - SDValue UndefVal = DAG.getUNDEF(InVT); + SDValue UndefVal = DAG.getPOISON(InVT); SmallVector Ops(NumConcat); for (unsigned i=0; i < NumOperands; ++i) Ops[i] = N->getOperand(i); @@ -6146,7 +6146,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { for (unsigned j = 0; j < NumInElts; ++j) Ops[Idx++] = DAG.getExtractVectorElt(dl, EltVT, InOp, j); } - SDValue UndefVal = DAG.getUNDEF(EltVT); + SDValue UndefVal = DAG.getPOISON(EltVT); for (; Idx < WidenNumElts; ++Idx) Ops[Idx] = UndefVal; return DAG.getBuildVector(WidenVT, dl, Ops); @@ -6213,7 +6213,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { Parts.push_back( DAG.getExtractSubvector(dl, PartVT, InOp, IdxVal + I * GCD)); for (; I < WidenNumElts / GCD; ++I) - Parts.push_back(DAG.getUNDEF(PartVT)); + Parts.push_back(DAG.getPOISON(PartVT)); return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts); } @@ -6229,7 +6229,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { for (i = 0; i < VTNumElts; ++i) Ops[i] = DAG.getExtractVectorElt(dl, EltVT, InOp, IdxVal + i); - SDValue UndefVal = DAG.getUNDEF(EltVT); + SDValue UndefVal = DAG.getPOISON(EltVT); for (; i < WidenNumElts; ++i) Ops[i] = UndefVal; return DAG.getBuildVector(WidenVT, dl, Ops); @@ -6903,7 +6903,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_REVERSE(SDNode *N) { Parts.push_back( DAG.getExtractSubvector(dl, PartVT, ReverseVal, IdxVal + i * GCD)); for (; i < WidenNumElts / GCD; ++i) - Parts.push_back(DAG.getUNDEF(PartVT)); + Parts.push_back(DAG.getPOISON(PartVT)); return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts); } @@ -6992,7 +6992,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) { EVT TmpEltVT = LHS.getValueType().getVectorElementType(); // Fully unroll and reassemble. - SmallVector Scalars(WidenNumElts, DAG.getUNDEF(EltVT)); + SmallVector Scalars(WidenNumElts, DAG.getPOISON(EltVT)); SmallVector Chains(NumElts); for (unsigned i = 0; i != NumElts; ++i) { SDValue LHSElem = DAG.getExtractVectorElt(dl, TmpEltVT, LHS, i); diff --git a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll index f6251ff66299e..8fc27248abac3 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll @@ -612,13 +612,6 @@ define @extract_nxv14i8_nxv28i8_14( %in) { ; CHECK-NEXT: uunpkhi z3.d, z3.s ; CHECK-NEXT: uzp1 z1.s, z1.s, z3.s ; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h -; CHECK-NEXT: uzp1 z1.b, z0.b, z1.b -; CHECK-NEXT: uunpkhi z1.h, z1.b -; CHECK-NEXT: uunpkhi z2.s, z1.h -; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z2.d, z2.s -; CHECK-NEXT: uzp1 z2.s, z2.s, z0.s -; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b ; CHECK-NEXT: ret %res = call @llvm.vector.extract.nxv14i8.nxv28i8( %in, i64 14) diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll index 71c3069a406fe..08ca1d153248e 100644 --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -5286,16 +5286,16 @@ entry: define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 { ; PC64LE-LABEL: constrained_vector_fptosi_v3i32_v3f32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: xxsldwi 0, 34, 34, 3 -; PC64LE-NEXT: xxswapd 1, 34 +; PC64LE-NEXT: xxswapd 0, 34 +; PC64LE-NEXT: xxsldwi 1, 34, 34, 3 ; PC64LE-NEXT: xscvspdpn 0, 0 ; PC64LE-NEXT: xscvspdpn 1, 1 ; PC64LE-NEXT: xxsldwi 2, 34, 34, 1 ; PC64LE-NEXT: xscvdpsxws 0, 0 ; PC64LE-NEXT: xscvdpsxws 1, 1 ; PC64LE-NEXT: mffprwz 3, 0 -; PC64LE-NEXT: mtfprwz 0, 3 -; PC64LE-NEXT: mffprwz 3, 1 +; PC64LE-NEXT: mffprwz 4, 1 +; PC64LE-NEXT: mtfprwz 0, 4 ; PC64LE-NEXT: mtfprwz 1, 3 ; PC64LE-NEXT: addis 3, 2, .LCPI97_0@toc@ha ; PC64LE-NEXT: addi 3, 3, .LCPI97_0@toc@l @@ -5311,25 +5311,25 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 { ; ; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f32: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3 -; PC64LE9-NEXT: xxswapd 1, 34 +; PC64LE9-NEXT: xxsldwi 0, 34, 34, 1 ; PC64LE9-NEXT: xscvspdpn 0, 0 -; PC64LE9-NEXT: xscvspdpn 1, 1 ; PC64LE9-NEXT: xscvdpsxws 0, 0 -; PC64LE9-NEXT: xscvdpsxws 1, 1 ; PC64LE9-NEXT: mffprwz 3, 0 -; PC64LE9-NEXT: mtfprwz 0, 3 -; PC64LE9-NEXT: mffprwz 3, 1 -; PC64LE9-NEXT: mtfprwz 1, 3 -; PC64LE9-NEXT: addis 3, 2, .LCPI97_0@toc@ha -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: xxsldwi 1, 34, 34, 1 -; PC64LE9-NEXT: addi 3, 3, .LCPI97_0@toc@l -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xscvspdpn 1, 1 -; PC64LE9-NEXT: xscvdpsxws 1, 1 -; PC64LE9-NEXT: mffprwz 3, 1 +; PC64LE9-NEXT: xxswapd 0, 34 +; PC64LE9-NEXT: xscvspdpn 0, 0 +; PC64LE9-NEXT: xscvdpsxws 0, 0 +; PC64LE9-NEXT: mffprwz 4, 0 +; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3 ; PC64LE9-NEXT: mtvsrwz 34, 3 +; PC64LE9-NEXT: mtfprwz 1, 4 +; PC64LE9-NEXT: addis 4, 2, .LCPI97_0@toc@ha +; PC64LE9-NEXT: xscvspdpn 0, 0 +; PC64LE9-NEXT: addi 4, 4, .LCPI97_0@toc@l +; PC64LE9-NEXT: xscvdpsxws 0, 0 +; PC64LE9-NEXT: mffprwz 5, 0 +; PC64LE9-NEXT: mtfprwz 0, 5 +; PC64LE9-NEXT: xxmrghw 35, 1, 0 +; PC64LE9-NEXT: lxv 0, 0(4) ; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: blr entry: @@ -5558,11 +5558,11 @@ entry: define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64(<3 x double> %x) #0 { ; PC64LE-LABEL: constrained_vector_fptosi_v3i32_v3f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: xscvdpsxws 0, 1 -; PC64LE-NEXT: xscvdpsxws 1, 2 +; PC64LE-NEXT: xscvdpsxws 0, 2 +; PC64LE-NEXT: xscvdpsxws 1, 1 ; PC64LE-NEXT: mffprwz 3, 0 -; PC64LE-NEXT: mtfprwz 0, 3 -; PC64LE-NEXT: mffprwz 3, 1 +; PC64LE-NEXT: mffprwz 4, 1 +; PC64LE-NEXT: mtfprwz 0, 4 ; PC64LE-NEXT: mtfprwz 1, 3 ; PC64LE-NEXT: addis 3, 2, .LCPI105_0@toc@ha ; PC64LE-NEXT: addi 3, 3, .LCPI105_0@toc@l @@ -5577,19 +5577,19 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64(<3 x double> %x) #0 { ; ; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: xscvdpsxws 0, 1 -; PC64LE9-NEXT: xscvdpsxws 1, 2 +; PC64LE9-NEXT: xscvdpsxws 0, 3 ; PC64LE9-NEXT: mffprwz 3, 0 -; PC64LE9-NEXT: mtfprwz 0, 3 -; PC64LE9-NEXT: mffprwz 3, 1 -; PC64LE9-NEXT: mtfprwz 1, 3 -; PC64LE9-NEXT: addis 3, 2, .LCPI105_0@toc@ha -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: xscvdpsxws 1, 3 -; PC64LE9-NEXT: addi 3, 3, .LCPI105_0@toc@l -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: mffprwz 3, 1 +; PC64LE9-NEXT: xscvdpsxws 0, 2 ; PC64LE9-NEXT: mtvsrwz 34, 3 +; PC64LE9-NEXT: mffprwz 4, 0 +; PC64LE9-NEXT: xscvdpsxws 0, 1 +; PC64LE9-NEXT: mtfprwz 1, 4 +; PC64LE9-NEXT: addis 4, 2, .LCPI105_0@toc@ha +; PC64LE9-NEXT: addi 4, 4, .LCPI105_0@toc@l +; PC64LE9-NEXT: mffprwz 5, 0 +; PC64LE9-NEXT: mtfprwz 0, 5 +; PC64LE9-NEXT: xxmrghw 35, 1, 0 +; PC64LE9-NEXT: lxv 0, 0(4) ; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: blr entry: @@ -5783,16 +5783,16 @@ entry: define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 { ; PC64LE-LABEL: constrained_vector_fptoui_v3i32_v3f32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: xxsldwi 0, 34, 34, 3 -; PC64LE-NEXT: xxswapd 1, 34 +; PC64LE-NEXT: xxswapd 0, 34 +; PC64LE-NEXT: xxsldwi 1, 34, 34, 3 ; PC64LE-NEXT: xscvspdpn 0, 0 ; PC64LE-NEXT: xscvspdpn 1, 1 ; PC64LE-NEXT: xxsldwi 2, 34, 34, 1 ; PC64LE-NEXT: xscvdpuxws 0, 0 ; PC64LE-NEXT: xscvdpuxws 1, 1 ; PC64LE-NEXT: mffprwz 3, 0 -; PC64LE-NEXT: mtfprwz 0, 3 -; PC64LE-NEXT: mffprwz 3, 1 +; PC64LE-NEXT: mffprwz 4, 1 +; PC64LE-NEXT: mtfprwz 0, 4 ; PC64LE-NEXT: mtfprwz 1, 3 ; PC64LE-NEXT: addis 3, 2, .LCPI113_0@toc@ha ; PC64LE-NEXT: addi 3, 3, .LCPI113_0@toc@l @@ -5808,25 +5808,25 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 { ; ; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f32: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3 -; PC64LE9-NEXT: xxswapd 1, 34 +; PC64LE9-NEXT: xxsldwi 0, 34, 34, 1 ; PC64LE9-NEXT: xscvspdpn 0, 0 -; PC64LE9-NEXT: xscvspdpn 1, 1 ; PC64LE9-NEXT: xscvdpuxws 0, 0 -; PC64LE9-NEXT: xscvdpuxws 1, 1 ; PC64LE9-NEXT: mffprwz 3, 0 -; PC64LE9-NEXT: mtfprwz 0, 3 -; PC64LE9-NEXT: mffprwz 3, 1 -; PC64LE9-NEXT: mtfprwz 1, 3 -; PC64LE9-NEXT: addis 3, 2, .LCPI113_0@toc@ha -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: xxsldwi 1, 34, 34, 1 -; PC64LE9-NEXT: addi 3, 3, .LCPI113_0@toc@l -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xscvspdpn 1, 1 -; PC64LE9-NEXT: xscvdpuxws 1, 1 -; PC64LE9-NEXT: mffprwz 3, 1 +; PC64LE9-NEXT: xxswapd 0, 34 +; PC64LE9-NEXT: xscvspdpn 0, 0 +; PC64LE9-NEXT: xscvdpuxws 0, 0 +; PC64LE9-NEXT: mffprwz 4, 0 +; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3 ; PC64LE9-NEXT: mtvsrwz 34, 3 +; PC64LE9-NEXT: mtfprwz 1, 4 +; PC64LE9-NEXT: addis 4, 2, .LCPI113_0@toc@ha +; PC64LE9-NEXT: xscvspdpn 0, 0 +; PC64LE9-NEXT: addi 4, 4, .LCPI113_0@toc@l +; PC64LE9-NEXT: xscvdpuxws 0, 0 +; PC64LE9-NEXT: mffprwz 5, 0 +; PC64LE9-NEXT: mtfprwz 0, 5 +; PC64LE9-NEXT: xxmrghw 35, 1, 0 +; PC64LE9-NEXT: lxv 0, 0(4) ; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: blr entry: @@ -6054,11 +6054,11 @@ entry: define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64(<3 x double> %x) #0 { ; PC64LE-LABEL: constrained_vector_fptoui_v3i32_v3f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: xscvdpuxws 0, 1 -; PC64LE-NEXT: xscvdpuxws 1, 2 +; PC64LE-NEXT: xscvdpuxws 0, 2 +; PC64LE-NEXT: xscvdpuxws 1, 1 ; PC64LE-NEXT: mffprwz 3, 0 -; PC64LE-NEXT: mtfprwz 0, 3 -; PC64LE-NEXT: mffprwz 3, 1 +; PC64LE-NEXT: mffprwz 4, 1 +; PC64LE-NEXT: mtfprwz 0, 4 ; PC64LE-NEXT: mtfprwz 1, 3 ; PC64LE-NEXT: addis 3, 2, .LCPI121_0@toc@ha ; PC64LE-NEXT: addi 3, 3, .LCPI121_0@toc@l @@ -6073,19 +6073,19 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64(<3 x double> %x) #0 { ; ; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: xscvdpuxws 0, 1 -; PC64LE9-NEXT: xscvdpuxws 1, 2 +; PC64LE9-NEXT: xscvdpuxws 0, 3 ; PC64LE9-NEXT: mffprwz 3, 0 -; PC64LE9-NEXT: mtfprwz 0, 3 -; PC64LE9-NEXT: mffprwz 3, 1 -; PC64LE9-NEXT: mtfprwz 1, 3 -; PC64LE9-NEXT: addis 3, 2, .LCPI121_0@toc@ha -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: xscvdpuxws 1, 3 -; PC64LE9-NEXT: addi 3, 3, .LCPI121_0@toc@l -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: mffprwz 3, 1 +; PC64LE9-NEXT: xscvdpuxws 0, 2 ; PC64LE9-NEXT: mtvsrwz 34, 3 +; PC64LE9-NEXT: mffprwz 4, 0 +; PC64LE9-NEXT: xscvdpuxws 0, 1 +; PC64LE9-NEXT: mtfprwz 1, 4 +; PC64LE9-NEXT: addis 4, 2, .LCPI121_0@toc@ha +; PC64LE9-NEXT: addi 4, 4, .LCPI121_0@toc@l +; PC64LE9-NEXT: mffprwz 5, 0 +; PC64LE9-NEXT: mtfprwz 0, 5 +; PC64LE9-NEXT: xxmrghw 35, 1, 0 +; PC64LE9-NEXT: lxv 0, 0(4) ; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: blr entry: @@ -6269,33 +6269,33 @@ entry: define <3 x float> @constrained_vector_fptrunc_v3f64(<3 x double> %x) #0 { ; PC64LE-LABEL: constrained_vector_fptrunc_v3f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: xsrsp 0, 1 -; PC64LE-NEXT: xsrsp 1, 2 +; PC64LE-NEXT: xsrsp 0, 3 +; PC64LE-NEXT: xsrsp 2, 2 ; PC64LE-NEXT: addis 3, 2, .LCPI129_0@toc@ha ; PC64LE-NEXT: addi 3, 3, .LCPI129_0@toc@l -; PC64LE-NEXT: xscvdpspn 0, 0 +; PC64LE-NEXT: xsrsp 1, 1 ; PC64LE-NEXT: xscvdpspn 1, 1 -; PC64LE-NEXT: xxmrghw 34, 1, 0 -; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: xxswapd 35, 0 -; PC64LE-NEXT: xsrsp 0, 3 +; PC64LE-NEXT: xscvdpspn 2, 2 ; PC64LE-NEXT: xscvdpspn 36, 0 +; PC64LE-NEXT: xxmrghw 34, 2, 1 +; PC64LE-NEXT: lxvd2x 1, 0, 3 +; PC64LE-NEXT: xxswapd 35, 1 ; PC64LE-NEXT: vperm 2, 4, 2, 3 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fptrunc_v3f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: xsrsp 0, 1 -; PC64LE9-NEXT: xsrsp 1, 2 +; PC64LE9-NEXT: xsrsp 0, 3 +; PC64LE9-NEXT: xsrsp 2, 2 +; PC64LE9-NEXT: xsrsp 1, 1 ; PC64LE9-NEXT: addis 3, 2, .LCPI129_0@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI129_0@toc@l -; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: xsrsp 1, 3 -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xscvdpspn 34, 1 -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xscvdpspn 2, 2 +; PC64LE9-NEXT: xscvdpspn 34, 0 +; PC64LE9-NEXT: xxmrghw 35, 2, 1 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 35, 1 ; PC64LE9-NEXT: blr entry: %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64( @@ -7142,8 +7142,8 @@ entry: define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 { ; PC64LE-LABEL: constrained_vector_sitofp_v3f32_v3i32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: xxswapd 0, 34 -; PC64LE-NEXT: xxsldwi 1, 34, 34, 1 +; PC64LE-NEXT: xxsldwi 0, 34, 34, 1 +; PC64LE-NEXT: xxswapd 1, 34 ; PC64LE-NEXT: mffprwz 3, 0 ; PC64LE-NEXT: mtfprwa 0, 3 ; PC64LE-NEXT: mffprwz 3, 1 @@ -7154,7 +7154,7 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 { ; PC64LE-NEXT: xscvsxdsp 1, 1 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 -; PC64LE-NEXT: xxmrghw 35, 1, 0 +; PC64LE-NEXT: xxmrghw 35, 0, 1 ; PC64LE-NEXT: lxvd2x 0, 0, 3 ; PC64LE-NEXT: mfvsrwz 3, 34 ; PC64LE-NEXT: xxswapd 36, 0 @@ -7166,24 +7166,24 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 { ; ; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i32: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: li 3, 0 +; PC64LE9-NEXT: li 3, 4 ; PC64LE9-NEXT: vextuwrx 3, 3, 2 ; PC64LE9-NEXT: mtfprwa 0, 3 -; PC64LE9-NEXT: li 3, 4 +; PC64LE9-NEXT: li 3, 0 ; PC64LE9-NEXT: vextuwrx 3, 3, 2 ; PC64LE9-NEXT: xscvsxdsp 0, 0 ; PC64LE9-NEXT: mtfprwa 1, 3 -; PC64LE9-NEXT: addis 3, 2, .LCPI161_0@toc@ha +; PC64LE9-NEXT: mfvsrwz 3, 34 ; PC64LE9-NEXT: xscvsxdsp 1, 1 -; PC64LE9-NEXT: addi 3, 3, .LCPI161_0@toc@l +; PC64LE9-NEXT: mtfprwa 2, 3 +; PC64LE9-NEXT: addis 3, 2, .LCPI161_0@toc@ha +; PC64LE9-NEXT: xscvsxdsp 2, 2 ; PC64LE9-NEXT: xscvdpspn 0, 0 +; PC64LE9-NEXT: addi 3, 3, .LCPI161_0@toc@l ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xxmrghw 35, 1, 0 +; PC64LE9-NEXT: xscvdpspn 34, 2 +; PC64LE9-NEXT: xxmrghw 35, 0, 1 ; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: mfvsrwz 3, 34 -; PC64LE9-NEXT: mtfprwa 1, 3 -; PC64LE9-NEXT: xscvsxdsp 1, 1 -; PC64LE9-NEXT: xscvdpspn 34, 1 ; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: blr entry: @@ -7225,15 +7225,15 @@ entry: define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 { ; PC64LE-LABEL: constrained_vector_sitofp_v3f32_v3i64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mtfprd 0, 3 -; PC64LE-NEXT: mtfprd 1, 4 +; PC64LE-NEXT: mtfprd 0, 4 +; PC64LE-NEXT: mtfprd 1, 3 ; PC64LE-NEXT: addis 3, 2, .LCPI163_0@toc@ha ; PC64LE-NEXT: addi 3, 3, .LCPI163_0@toc@l ; PC64LE-NEXT: xscvsxdsp 0, 0 ; PC64LE-NEXT: xscvsxdsp 1, 1 -; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 -; PC64LE-NEXT: xxmrghw 34, 1, 0 +; PC64LE-NEXT: xscvdpspn 0, 0 +; PC64LE-NEXT: xxmrghw 34, 0, 1 ; PC64LE-NEXT: lxvd2x 0, 0, 3 ; PC64LE-NEXT: xxswapd 35, 0 ; PC64LE-NEXT: mtfprd 0, 5 @@ -7244,20 +7244,20 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 { ; ; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mtfprd 0, 3 ; PC64LE9-NEXT: mtfprd 1, 4 +; PC64LE9-NEXT: mtfprd 2, 3 +; PC64LE9-NEXT: mtfprd 0, 5 ; PC64LE9-NEXT: addis 3, 2, .LCPI163_0@toc@ha -; PC64LE9-NEXT: xscvsxdsp 0, 0 ; PC64LE9-NEXT: xscvsxdsp 1, 1 +; PC64LE9-NEXT: xscvsxdsp 2, 2 +; PC64LE9-NEXT: xscvsxdsp 0, 0 ; PC64LE9-NEXT: addi 3, 3, .LCPI163_0@toc@l -; PC64LE9-NEXT: xscvdpspn 0, 0 +; PC64LE9-NEXT: xscvdpspn 2, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: mtfprd 1, 5 -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xscvsxdsp 1, 1 -; PC64LE9-NEXT: xscvdpspn 34, 1 -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xscvdpspn 34, 0 +; PC64LE9-NEXT: xxmrghw 35, 1, 2 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 35, 1 ; PC64LE9-NEXT: blr entry: %result = call <3 x float> @@ -7709,8 +7709,8 @@ entry: define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 { ; PC64LE-LABEL: constrained_vector_uitofp_v3f32_v3i32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: xxswapd 0, 34 -; PC64LE-NEXT: xxsldwi 1, 34, 34, 1 +; PC64LE-NEXT: xxsldwi 0, 34, 34, 1 +; PC64LE-NEXT: xxswapd 1, 34 ; PC64LE-NEXT: mffprwz 3, 0 ; PC64LE-NEXT: mtfprwz 0, 3 ; PC64LE-NEXT: mffprwz 3, 1 @@ -7721,7 +7721,7 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 { ; PC64LE-NEXT: xscvuxdsp 1, 1 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 -; PC64LE-NEXT: xxmrghw 35, 1, 0 +; PC64LE-NEXT: xxmrghw 35, 0, 1 ; PC64LE-NEXT: lxvd2x 0, 0, 3 ; PC64LE-NEXT: mfvsrwz 3, 34 ; PC64LE-NEXT: xxswapd 36, 0 @@ -7733,24 +7733,24 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 { ; ; PC64LE9-LABEL: constrained_vector_uitofp_v3f32_v3i32: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: li 3, 0 +; PC64LE9-NEXT: li 3, 4 ; PC64LE9-NEXT: vextuwrx 3, 3, 2 ; PC64LE9-NEXT: mtfprwz 0, 3 -; PC64LE9-NEXT: li 3, 4 +; PC64LE9-NEXT: li 3, 0 ; PC64LE9-NEXT: vextuwrx 3, 3, 2 ; PC64LE9-NEXT: xscvuxdsp 0, 0 ; PC64LE9-NEXT: mtfprwz 1, 3 -; PC64LE9-NEXT: addis 3, 2, .LCPI179_0@toc@ha +; PC64LE9-NEXT: mfvsrwz 3, 34 ; PC64LE9-NEXT: xscvuxdsp 1, 1 -; PC64LE9-NEXT: addi 3, 3, .LCPI179_0@toc@l +; PC64LE9-NEXT: mtfprwz 2, 3 +; PC64LE9-NEXT: addis 3, 2, .LCPI179_0@toc@ha +; PC64LE9-NEXT: xscvuxdsp 2, 2 ; PC64LE9-NEXT: xscvdpspn 0, 0 +; PC64LE9-NEXT: addi 3, 3, .LCPI179_0@toc@l ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xxmrghw 35, 1, 0 +; PC64LE9-NEXT: xscvdpspn 34, 2 +; PC64LE9-NEXT: xxmrghw 35, 0, 1 ; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: mfvsrwz 3, 34 -; PC64LE9-NEXT: mtfprwz 1, 3 -; PC64LE9-NEXT: xscvuxdsp 1, 1 -; PC64LE9-NEXT: xscvdpspn 34, 1 ; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: blr entry: @@ -7792,15 +7792,15 @@ entry: define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 { ; PC64LE-LABEL: constrained_vector_uitofp_v3f32_v3i64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mtfprd 0, 3 -; PC64LE-NEXT: mtfprd 1, 4 +; PC64LE-NEXT: mtfprd 0, 4 +; PC64LE-NEXT: mtfprd 1, 3 ; PC64LE-NEXT: addis 3, 2, .LCPI181_0@toc@ha ; PC64LE-NEXT: addi 3, 3, .LCPI181_0@toc@l ; PC64LE-NEXT: xscvuxdsp 0, 0 ; PC64LE-NEXT: xscvuxdsp 1, 1 -; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 -; PC64LE-NEXT: xxmrghw 34, 1, 0 +; PC64LE-NEXT: xscvdpspn 0, 0 +; PC64LE-NEXT: xxmrghw 34, 0, 1 ; PC64LE-NEXT: lxvd2x 0, 0, 3 ; PC64LE-NEXT: xxswapd 35, 0 ; PC64LE-NEXT: mtfprd 0, 5 @@ -7811,20 +7811,20 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 { ; ; PC64LE9-LABEL: constrained_vector_uitofp_v3f32_v3i64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mtfprd 0, 3 ; PC64LE9-NEXT: mtfprd 1, 4 +; PC64LE9-NEXT: mtfprd 2, 3 +; PC64LE9-NEXT: mtfprd 0, 5 ; PC64LE9-NEXT: addis 3, 2, .LCPI181_0@toc@ha -; PC64LE9-NEXT: xscvuxdsp 0, 0 ; PC64LE9-NEXT: xscvuxdsp 1, 1 +; PC64LE9-NEXT: xscvuxdsp 2, 2 +; PC64LE9-NEXT: xscvuxdsp 0, 0 ; PC64LE9-NEXT: addi 3, 3, .LCPI181_0@toc@l -; PC64LE9-NEXT: xscvdpspn 0, 0 +; PC64LE9-NEXT: xscvdpspn 2, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: mtfprd 1, 5 -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xscvuxdsp 1, 1 -; PC64LE9-NEXT: xscvdpspn 34, 1 -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xscvdpspn 34, 0 +; PC64LE9-NEXT: xxmrghw 35, 1, 2 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 35, 1 ; PC64LE9-NEXT: blr entry: %result = call <3 x float> diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll index 38b4dc7d5bd4a..b6a4a12eb0fac 100644 --- a/llvm/test/CodeGen/X86/half.ll +++ b/llvm/test/CodeGen/X86/half.ll @@ -1990,38 +1990,38 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 { define void @pr63114() { ; CHECK-LIBCALL-LABEL: pr63114: ; CHECK-LIBCALL: # %bb.0: -; CHECK-LIBCALL-NEXT: movups (%rax), %xmm0 -; CHECK-LIBCALL-NEXT: movaps %xmm0, %xmm1 -; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] -; CHECK-LIBCALL-NEXT: movaps %xmm0, %xmm3 -; CHECK-LIBCALL-NEXT: psrld $16, %xmm3 -; CHECK-LIBCALL-NEXT: movaps %xmm0, %xmm2 -; CHECK-LIBCALL-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] -; CHECK-LIBCALL-NEXT: movaps %xmm0, %xmm5 -; CHECK-LIBCALL-NEXT: psrldq {{.*#+}} xmm5 = xmm5[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-LIBCALL-NEXT: movaps %xmm0, %xmm4 -; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[1,1] -; CHECK-LIBCALL-NEXT: movaps %xmm0, %xmm6 -; CHECK-LIBCALL-NEXT: psrldq {{.*#+}} xmm6 = xmm6[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-LIBCALL-NEXT: movaps %xmm0, %xmm7 -; CHECK-LIBCALL-NEXT: psrlq $48, %xmm7 -; CHECK-LIBCALL-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm8 -; CHECK-LIBCALL-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3] -; CHECK-LIBCALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm8[0],xmm0[1],xmm8[1],xmm0[2],xmm8[2],xmm0[3],xmm8[3] -; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm7[0,0] -; CHECK-LIBCALL-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm8[0],xmm6[1],xmm8[1],xmm6[2],xmm8[2],xmm6[3],xmm8[3] -; CHECK-LIBCALL-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm8[0],xmm4[1],xmm8[1],xmm4[2],xmm8[2],xmm4[3],xmm8[3] -; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,0],xmm6[0,0] -; CHECK-LIBCALL-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm8[0],xmm5[1],xmm8[1],xmm5[2],xmm8[2],xmm5[3],xmm8[3] -; CHECK-LIBCALL-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3] -; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm5[0,0] -; CHECK-LIBCALL-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3] -; CHECK-LIBCALL-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm8[0],xmm1[1],xmm8[1],xmm1[2],xmm8[2],xmm1[3],xmm8[3] -; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm3[0,0] -; CHECK-LIBCALL-NEXT: movups %xmm1, 32 -; CHECK-LIBCALL-NEXT: movups %xmm2, 48 -; CHECK-LIBCALL-NEXT: movups %xmm4, 0 -; CHECK-LIBCALL-NEXT: movups %xmm0, 16 +; CHECK-LIBCALL-NEXT: movdqu (%rax), %xmm4 +; CHECK-LIBCALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm4[0,1,3,3,4,5,6,7] +; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,1] +; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,65535,65535,65535,65535] +; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm0 +; CHECK-LIBCALL-NEXT: movq {{.*#+}} xmm2 = [0,0,0,15360,0,0,0,0] +; CHECK-LIBCALL-NEXT: por %xmm2, %xmm0 +; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm3 = [65535,65535,65535,65535,65535,65535,65535,0] +; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm0 +; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm5 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60] +; CHECK-LIBCALL-NEXT: por %xmm5, %xmm0 +; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm6 = xmm4[0,1,2,3,4,5,7,7] +; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3] +; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm6 +; CHECK-LIBCALL-NEXT: por %xmm2, %xmm6 +; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm6 +; CHECK-LIBCALL-NEXT: por %xmm5, %xmm6 +; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm7 = xmm4[0,1,2,3,5,5,5,5] +; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,3,0,3] +; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,5,5,5,5] +; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm4 +; CHECK-LIBCALL-NEXT: por %xmm2, %xmm4 +; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm4 +; CHECK-LIBCALL-NEXT: por %xmm5, %xmm4 +; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm7 +; CHECK-LIBCALL-NEXT: por %xmm2, %xmm7 +; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm7 +; CHECK-LIBCALL-NEXT: por %xmm5, %xmm7 +; CHECK-LIBCALL-NEXT: movdqu %xmm7, 0 +; CHECK-LIBCALL-NEXT: movdqu %xmm4, 32 +; CHECK-LIBCALL-NEXT: movdqu %xmm6, 48 +; CHECK-LIBCALL-NEXT: movdqu %xmm0, 16 ; CHECK-LIBCALL-NEXT: retq ; ; BWON-F16C-LABEL: pr63114: @@ -2055,43 +2055,38 @@ define void @pr63114() { ; ; CHECK-I686-LABEL: pr63114: ; CHECK-I686: # %bb.0: -; CHECK-I686-NEXT: subl $28, %esp -; CHECK-I686-NEXT: .cfi_def_cfa_offset 32 -; CHECK-I686-NEXT: movdqu (%eax), %xmm0 -; CHECK-I686-NEXT: movdqa %xmm0, %xmm3 -; CHECK-I686-NEXT: movdqa %xmm0, (%esp) # 16-byte Spill -; CHECK-I686-NEXT: movdqa %xmm0, %xmm2 -; CHECK-I686-NEXT: movdqa %xmm0, %xmm5 -; CHECK-I686-NEXT: movdqa %xmm0, %xmm4 -; CHECK-I686-NEXT: movdqa %xmm0, %xmm7 -; CHECK-I686-NEXT: psrlq $48, %xmm7 -; CHECK-I686-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}, %xmm6 -; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3] -; CHECK-I686-NEXT: movdqa %xmm0, %xmm1 -; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3] -; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm7[0,0] -; CHECK-I686-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1,1,1] -; CHECK-I686-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3] -; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1],xmm4[2],xmm6[2],xmm4[3],xmm6[3] -; CHECK-I686-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,0],xmm1[0,0] -; CHECK-I686-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1,1] -; CHECK-I686-NEXT: psrldq {{.*#+}} xmm5 = xmm5[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3] -; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3] -; CHECK-I686-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm5[0,0] -; CHECK-I686-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3,3,3] -; CHECK-I686-NEXT: movdqa (%esp), %xmm1 # 16-byte Reload -; CHECK-I686-NEXT: psrld $16, %xmm1 -; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3] -; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1],xmm3[2],xmm6[2],xmm3[3],xmm6[3] -; CHECK-I686-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm1[0,0] -; CHECK-I686-NEXT: movups %xmm3, 32 -; CHECK-I686-NEXT: movups %xmm2, 48 -; CHECK-I686-NEXT: movups %xmm4, 0 -; CHECK-I686-NEXT: movups %xmm0, 16 -; CHECK-I686-NEXT: addl $28, %esp -; CHECK-I686-NEXT: .cfi_def_cfa_offset 4 +; CHECK-I686-NEXT: movdqu (%eax), %xmm6 +; CHECK-I686-NEXT: pshuflw {{.*#+}} xmm0 = xmm6[0,1,3,3,4,5,6,7] +; CHECK-I686-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,1] +; CHECK-I686-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,65535,65535,65535,65535] +; CHECK-I686-NEXT: pand %xmm1, %xmm0 +; CHECK-I686-NEXT: movq {{.*#+}} xmm2 = [0,0,0,15360,0,0,0,0] +; CHECK-I686-NEXT: por %xmm2, %xmm0 +; CHECK-I686-NEXT: movdqa {{.*#+}} xmm3 = [65535,65535,65535,65535,65535,65535,65535,0] +; CHECK-I686-NEXT: pand %xmm3, %xmm0 +; CHECK-I686-NEXT: movdqa {{.*#+}} xmm4 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60] +; CHECK-I686-NEXT: por %xmm4, %xmm0 +; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm5 = xmm6[0,1,2,3,4,5,7,7] +; CHECK-I686-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3] +; CHECK-I686-NEXT: pand %xmm1, %xmm5 +; CHECK-I686-NEXT: por %xmm2, %xmm5 +; CHECK-I686-NEXT: pand %xmm3, %xmm5 +; CHECK-I686-NEXT: por %xmm4, %xmm5 +; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm7 = xmm6[0,1,2,3,5,5,5,5] +; CHECK-I686-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,3,0,3] +; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,5,5,5,5] +; CHECK-I686-NEXT: pand %xmm1, %xmm6 +; CHECK-I686-NEXT: por %xmm2, %xmm6 +; CHECK-I686-NEXT: pand %xmm3, %xmm6 +; CHECK-I686-NEXT: por %xmm4, %xmm6 +; CHECK-I686-NEXT: pand %xmm1, %xmm7 +; CHECK-I686-NEXT: por %xmm2, %xmm7 +; CHECK-I686-NEXT: pand %xmm3, %xmm7 +; CHECK-I686-NEXT: por %xmm4, %xmm7 +; CHECK-I686-NEXT: movdqu %xmm7, 0 +; CHECK-I686-NEXT: movdqu %xmm6, 32 +; CHECK-I686-NEXT: movdqu %xmm5, 48 +; CHECK-I686-NEXT: movdqu %xmm0, 16 ; CHECK-I686-NEXT: retl %1 = load <24 x half>, ptr poison, align 2 %2 = shufflevector <24 x half> %1, <24 x half> poison, <8 x i32> diff --git a/llvm/test/CodeGen/X86/matrix-multiply.ll b/llvm/test/CodeGen/X86/matrix-multiply.ll index 388d8528a2b80..f38b769fe4987 100644 --- a/llvm/test/CodeGen/X86/matrix-multiply.ll +++ b/llvm/test/CodeGen/X86/matrix-multiply.ll @@ -368,46 +368,47 @@ define <9 x float> @test_mul3x3_f32(<9 x float> %a0, <9 x float> %a1) nounwind { ; AVX512F-NEXT: vaddps %xmm4, %xmm9, %xmm9 ; AVX512F-NEXT: vshufpd {{.*#+}} xmm4 = xmm0[1,0] ; AVX512F-NEXT: vmulss %xmm1, %xmm4, %xmm10 -; AVX512F-NEXT: vmovshdup {{.*#+}} xmm5 = xmm5[1,1,3,3] -; AVX512F-NEXT: vmulss %xmm6, %xmm5, %xmm6 -; AVX512F-NEXT: vaddss %xmm6, %xmm10, %xmm6 -; AVX512F-NEXT: vextractf32x4 $2, %zmm0, %xmm10 -; AVX512F-NEXT: vmulss %xmm8, %xmm10, %xmm8 -; AVX512F-NEXT: vaddss %xmm6, %xmm8, %xmm6 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm6 = xmm9[0,1],xmm6[0],xmm9[3] +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm11 = xmm5[1,1,3,3] +; AVX512F-NEXT: vmulss %xmm6, %xmm11, %xmm5 +; AVX512F-NEXT: vaddss %xmm5, %xmm10, %xmm5 +; AVX512F-NEXT: vextractf32x4 $2, %zmm0, %xmm6 +; AVX512F-NEXT: vmulss %xmm6, %xmm8, %xmm8 +; AVX512F-NEXT: vaddss %xmm5, %xmm8, %xmm5 +; AVX512F-NEXT: vinsertps {{.*#+}} xmm5 = xmm9[0,1],xmm5[0],xmm9[3] ; AVX512F-NEXT: vmulps %xmm7, %xmm0, %xmm8 ; AVX512F-NEXT: vextractf128 $1, %ymm1, %xmm9 -; AVX512F-NEXT: vmovsldup {{.*#+}} xmm11 = xmm9[0,0,2,2] -; AVX512F-NEXT: vmulps %xmm2, %xmm11, %xmm11 -; AVX512F-NEXT: vaddps %xmm11, %xmm8, %xmm8 -; AVX512F-NEXT: vmovshdup {{.*#+}} xmm11 = xmm9[1,1,3,3] -; AVX512F-NEXT: vmulps %xmm3, %xmm11, %xmm12 +; AVX512F-NEXT: vmovsldup {{.*#+}} xmm10 = xmm9[0,0,2,2] +; AVX512F-NEXT: vmulps %xmm2, %xmm10, %xmm10 +; AVX512F-NEXT: vaddps %xmm10, %xmm8, %xmm8 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm10 = xmm9[1,1,3,3] +; AVX512F-NEXT: vmulps %xmm3, %xmm10, %xmm12 ; AVX512F-NEXT: vaddps %xmm12, %xmm8, %xmm8 ; AVX512F-NEXT: vmulss %xmm7, %xmm4, %xmm7 -; AVX512F-NEXT: vmulss %xmm5, %xmm9, %xmm12 +; AVX512F-NEXT: vmulss %xmm9, %xmm11, %xmm12 ; AVX512F-NEXT: vaddss %xmm7, %xmm12, %xmm7 -; AVX512F-NEXT: vmulss %xmm11, %xmm10, %xmm11 -; AVX512F-NEXT: vaddss %xmm7, %xmm11, %xmm7 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm7 = xmm8[0,1],xmm7[0],xmm8[3] -; AVX512F-NEXT: vshufps {{.*#+}} xmm8 = xmm9[3,3,3,3] -; AVX512F-NEXT: vshufpd {{.*#+}} xmm11 = xmm9[1,0] +; AVX512F-NEXT: vmulss %xmm6, %xmm10, %xmm10 +; AVX512F-NEXT: vaddss %xmm7, %xmm10, %xmm7 +; AVX512F-NEXT: vshufps {{.*#+}} xmm10 = xmm9[3,3,3,3] +; AVX512F-NEXT: vshufpd {{.*#+}} xmm12 = xmm9[1,0] ; AVX512F-NEXT: vshufps {{.*#+}} xmm9 = xmm9[2,2,2,2] ; AVX512F-NEXT: vmulps %xmm0, %xmm9, %xmm0 -; AVX512F-NEXT: vmulps %xmm2, %xmm8, %xmm2 +; AVX512F-NEXT: vmulps %xmm2, %xmm10, %xmm2 ; AVX512F-NEXT: vaddps %xmm2, %xmm0, %xmm0 ; AVX512F-NEXT: vextractf32x4 $2, %zmm1, %xmm1 ; AVX512F-NEXT: vbroadcastss %xmm1, %xmm2 ; AVX512F-NEXT: vmulps %xmm2, %xmm3, %xmm2 ; AVX512F-NEXT: vaddps %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vmulss %xmm4, %xmm11, %xmm2 -; AVX512F-NEXT: vmulss %xmm5, %xmm8, %xmm3 +; AVX512F-NEXT: vmulss %xmm4, %xmm12, %xmm2 +; AVX512F-NEXT: vmulss %xmm10, %xmm11, %xmm3 ; AVX512F-NEXT: vaddss %xmm3, %xmm2, %xmm2 -; AVX512F-NEXT: vmulss %xmm1, %xmm10, %xmm1 +; AVX512F-NEXT: vmulss %xmm1, %xmm6, %xmm1 ; AVX512F-NEXT: vaddss %xmm1, %xmm2, %xmm1 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0,1],xmm1[0],xmm0[3] -; AVX512F-NEXT: vinsertf32x4 $1, %xmm7, %zmm6, %zmm2 -; AVX512F-NEXT: vpmovsxbd {{.*#+}} zmm0 = [0,1,2,4,5,6,16,17,18,0,0,0,0,0,0,0] -; AVX512F-NEXT: vpermi2ps %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm2 = xmm8[1,1,3,3] +; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm7[0],xmm2[2,3] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm5[0,1,2],xmm8[0] +; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX512F-NEXT: vinsertf32x4 $2, %xmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: test_mul3x3_f32: @@ -447,26 +448,27 @@ define <9 x float> @test_mul3x3_f32(<9 x float> %a0, <9 x float> %a1) nounwind { ; AVX512VL-NEXT: vaddss %xmm7, %xmm12, %xmm7 ; AVX512VL-NEXT: vmulss %xmm11, %xmm10, %xmm11 ; AVX512VL-NEXT: vaddss %xmm7, %xmm11, %xmm7 -; AVX512VL-NEXT: vinsertps {{.*#+}} xmm5 = xmm5[0,1],xmm7[0],xmm5[3] -; AVX512VL-NEXT: vshufps {{.*#+}} xmm7 = xmm8[3,3,3,3] -; AVX512VL-NEXT: vshufpd {{.*#+}} xmm11 = xmm8[1,0] +; AVX512VL-NEXT: vshufps {{.*#+}} xmm11 = xmm8[3,3,3,3] +; AVX512VL-NEXT: vshufpd {{.*#+}} xmm12 = xmm8[1,0] ; AVX512VL-NEXT: vshufps {{.*#+}} xmm8 = xmm8[2,2,2,2] ; AVX512VL-NEXT: vmulps %xmm0, %xmm8, %xmm0 -; AVX512VL-NEXT: vmulps %xmm7, %xmm2, %xmm2 +; AVX512VL-NEXT: vmulps %xmm2, %xmm11, %xmm2 ; AVX512VL-NEXT: vaddps %xmm2, %xmm0, %xmm0 ; AVX512VL-NEXT: vextractf32x4 $2, %zmm1, %xmm1 ; AVX512VL-NEXT: vbroadcastss %xmm1, %xmm2 ; AVX512VL-NEXT: vmulps %xmm2, %xmm6, %xmm2 ; AVX512VL-NEXT: vaddps %xmm2, %xmm0, %xmm0 -; AVX512VL-NEXT: vmulss %xmm11, %xmm9, %xmm2 -; AVX512VL-NEXT: vmulss %xmm7, %xmm4, %xmm4 +; AVX512VL-NEXT: vmulss %xmm12, %xmm9, %xmm2 +; AVX512VL-NEXT: vmulss %xmm4, %xmm11, %xmm4 ; AVX512VL-NEXT: vaddss %xmm4, %xmm2, %xmm2 ; AVX512VL-NEXT: vmulss %xmm1, %xmm10, %xmm1 ; AVX512VL-NEXT: vaddss %xmm1, %xmm2, %xmm1 -; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0,1],xmm1[0],xmm0[3] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm5, %zmm3, %zmm2 -; AVX512VL-NEXT: vpmovsxbd {{.*#+}} zmm0 = [0,1,2,4,5,6,16,17,18,0,0,0,0,0,0,0] -; AVX512VL-NEXT: vpermi2ps %zmm1, %zmm2, %zmm0 +; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm5[1,1,3,3] +; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm7[0],xmm2[2,3] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm5[0] +; AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX512VL-NEXT: vinsertf32x4 $2, %xmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: retq entry: %block = shufflevector <9 x float> %a0, <9 x float> poison, <2 x i32> diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll index 4a5b4277c3cca..88d3ad181d766 100644 --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -4143,11 +4143,11 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32() #0 { ; CHECK-LABEL: constrained_vector_fptosi_v3i32_v3f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax -; CHECK-NEXT: movd %eax, %xmm1 -; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax -; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx +; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %edx +; CHECK-NEXT: movd %edx, %xmm1 +; CHECK-NEXT: movd %ecx, %xmm0 ; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax ; CHECK-NEXT: movd %eax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq @@ -4155,10 +4155,10 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32() #0 { ; AVX-LABEL: constrained_vector_fptosi_v3i32_v3f32: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax -; AVX-NEXT: vmovd %eax, %xmm0 -; AVX-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax -; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; AVX-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax +; AVX-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx +; AVX-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %edx +; AVX-NEXT: vmovd %edx, %xmm0 +; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ; AVX-NEXT: retq entry: @@ -4256,11 +4256,11 @@ define <3 x i64> @constrained_vector_fptosi_v3i64_v3f32() #0 { ; AVX1-LABEL: constrained_vector_fptosi_v3i64_v3f32: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax -; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax -; AVX1-NEXT: vmovq %rax, %xmm1 +; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx +; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx +; AVX1-NEXT: vmovq %rdx, %xmm0 +; AVX1-NEXT: vmovq %rcx, %xmm1 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax ; AVX1-NEXT: vmovq %rax, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -4268,11 +4268,11 @@ define <3 x i64> @constrained_vector_fptosi_v3i64_v3f32() #0 { ; AVX512-LABEL: constrained_vector_fptosi_v3i64_v3f32: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax -; AVX512-NEXT: vmovq %rax, %xmm0 -; AVX512-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax -; AVX512-NEXT: vmovq %rax, %xmm1 +; AVX512-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx +; AVX512-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx +; AVX512-NEXT: vmovq %rdx, %xmm0 +; AVX512-NEXT: vmovq %rcx, %xmm1 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX512-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax ; AVX512-NEXT: vmovq %rax, %xmm1 ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq @@ -4382,11 +4382,11 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64() #0 { ; CHECK-LABEL: constrained_vector_fptosi_v3i32_v3f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax -; CHECK-NEXT: movd %eax, %xmm1 -; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax -; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx +; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %edx +; CHECK-NEXT: movd %edx, %xmm1 +; CHECK-NEXT: movd %ecx, %xmm0 ; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax ; CHECK-NEXT: movd %eax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq @@ -4394,10 +4394,10 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64() #0 { ; AVX-LABEL: constrained_vector_fptosi_v3i32_v3f64: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax -; AVX-NEXT: vmovd %eax, %xmm0 -; AVX-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax -; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; AVX-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax +; AVX-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx +; AVX-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %edx +; AVX-NEXT: vmovd %edx, %xmm0 +; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ; AVX-NEXT: retq entry: @@ -4498,11 +4498,11 @@ define <3 x i64> @constrained_vector_fptosi_v3i64_v3f64() #0 { ; AVX1-LABEL: constrained_vector_fptosi_v3i64_v3f64: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax -; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax -; AVX1-NEXT: vmovq %rax, %xmm1 +; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx +; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx +; AVX1-NEXT: vmovq %rdx, %xmm0 +; AVX1-NEXT: vmovq %rcx, %xmm1 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax ; AVX1-NEXT: vmovq %rax, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -4510,11 +4510,11 @@ define <3 x i64> @constrained_vector_fptosi_v3i64_v3f64() #0 { ; AVX512-LABEL: constrained_vector_fptosi_v3i64_v3f64: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax -; AVX512-NEXT: vmovq %rax, %xmm0 -; AVX512-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax -; AVX512-NEXT: vmovq %rax, %xmm1 +; AVX512-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx +; AVX512-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx +; AVX512-NEXT: vmovq %rdx, %xmm0 +; AVX512-NEXT: vmovq %rcx, %xmm1 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX512-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax ; AVX512-NEXT: vmovq %rax, %xmm1 ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq @@ -4645,11 +4645,11 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32() #0 { ; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax -; CHECK-NEXT: movd %eax, %xmm1 -; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax -; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx +; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx +; CHECK-NEXT: movd %edx, %xmm1 +; CHECK-NEXT: movd %ecx, %xmm0 ; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax ; CHECK-NEXT: movd %eax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq @@ -4658,19 +4658,19 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32() #0 { ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax ; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx -; AVX1-NEXT: vmovd %ecx, %xmm0 -; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax +; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx +; AVX1-NEXT: vmovd %edx, %xmm0 +; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_fptoui_v3i32_v3f32: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax -; AVX512-NEXT: vmovd %eax, %xmm0 -; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax -; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax +; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx +; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %edx +; AVX512-NEXT: vmovd %edx, %xmm0 +; AVX512-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ; AVX512-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ; AVX512-NEXT: retq entry: @@ -4911,7 +4911,7 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 { ; ; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX1-NEXT: vmovss {{.*#+}} xmm2 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] ; AVX1-NEXT: vmovss {{.*#+}} xmm0 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] ; AVX1-NEXT: vcomiss %xmm2, %xmm0 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 @@ -4921,51 +4921,51 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 { ; AVX1-NEXT: vmovaps %xmm0, %xmm3 ; AVX1-NEXT: .LBB123_2: # %entry ; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vcvttss2si %xmm2, %rax -; AVX1-NEXT: setbe %cl -; AVX1-NEXT: movzbl %cl, %ecx -; AVX1-NEXT: shlq $63, %rcx -; AVX1-NEXT: xorq %rax, %rcx -; AVX1-NEXT: vmovq %rcx, %xmm2 -; AVX1-NEXT: vmovss {{.*#+}} xmm3 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] -; AVX1-NEXT: vcomiss %xmm3, %xmm0 -; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vcvttss2si %xmm2, %rcx +; AVX1-NEXT: setbe %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: shlq $63, %rax +; AVX1-NEXT: xorq %rcx, %rax +; AVX1-NEXT: vmovss {{.*#+}} xmm2 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX1-NEXT: vcomiss %xmm2, %xmm0 +; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: ja .LBB123_4 ; AVX1-NEXT: # %bb.3: # %entry -; AVX1-NEXT: vmovaps %xmm0, %xmm4 +; AVX1-NEXT: vmovaps %xmm0, %xmm3 ; AVX1-NEXT: .LBB123_4: # %entry -; AVX1-NEXT: vsubss %xmm4, %xmm3, %xmm3 -; AVX1-NEXT: vcvttss2si %xmm3, %rax +; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vcvttss2si %xmm2, %rdx ; AVX1-NEXT: setbe %cl ; AVX1-NEXT: movzbl %cl, %ecx ; AVX1-NEXT: shlq $63, %rcx -; AVX1-NEXT: xorq %rax, %rcx -; AVX1-NEXT: vmovq %rcx, %xmm3 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] -; AVX1-NEXT: vmovss {{.*#+}} xmm3 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] -; AVX1-NEXT: vcomiss %xmm3, %xmm0 +; AVX1-NEXT: xorq %rdx, %rcx +; AVX1-NEXT: vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX1-NEXT: vcomiss %xmm2, %xmm0 ; AVX1-NEXT: ja .LBB123_6 ; AVX1-NEXT: # %bb.5: # %entry ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: .LBB123_6: # %entry -; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm0 -; AVX1-NEXT: vcvttss2si %xmm0, %rax -; AVX1-NEXT: setbe %cl -; AVX1-NEXT: movzbl %cl, %ecx -; AVX1-NEXT: shlq $63, %rcx -; AVX1-NEXT: xorq %rax, %rcx -; AVX1-NEXT: vmovq %rcx, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm0 +; AVX1-NEXT: vcvttss2si %xmm0, %rdx +; AVX1-NEXT: setbe %sil +; AVX1-NEXT: movzbl %sil, %esi +; AVX1-NEXT: shlq $63, %rsi +; AVX1-NEXT: xorq %rdx, %rsi +; AVX1-NEXT: vmovq %rsi, %xmm0 +; AVX1-NEXT: vmovq %rcx, %xmm1 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX1-NEXT: vmovq %rax, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f32: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax -; AVX512-NEXT: vmovq %rax, %xmm0 -; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax -; AVX512-NEXT: vmovq %rax, %xmm1 +; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx +; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx +; AVX512-NEXT: vmovq %rdx, %xmm0 +; AVX512-NEXT: vmovq %rcx, %xmm1 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax ; AVX512-NEXT: vmovq %rax, %xmm1 ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq @@ -5194,11 +5194,11 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64() #0 { ; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax -; CHECK-NEXT: movd %eax, %xmm1 -; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax -; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx +; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx +; CHECK-NEXT: movd %edx, %xmm1 +; CHECK-NEXT: movd %ecx, %xmm0 ; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax ; CHECK-NEXT: movd %eax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq @@ -5207,19 +5207,19 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64() #0 { ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax ; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx -; AVX1-NEXT: vmovd %ecx, %xmm0 -; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax +; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx +; AVX1-NEXT: vmovd %edx, %xmm0 +; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_fptoui_v3i32_v3f64: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax -; AVX512-NEXT: vmovd %eax, %xmm0 -; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax -; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax +; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx +; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %edx +; AVX512-NEXT: vmovd %edx, %xmm0 +; AVX512-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ; AVX512-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ; AVX512-NEXT: retq entry: @@ -5466,7 +5466,7 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 { ; ; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = [4.2200000000000003E+1,0.0E+0] +; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = [4.2299999999999997E+1,0.0E+0] ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = [9.2233720368547758E+18,0.0E+0] ; AVX1-NEXT: vcomisd %xmm2, %xmm0 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 @@ -5476,51 +5476,51 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 { ; AVX1-NEXT: vmovapd %xmm0, %xmm3 ; AVX1-NEXT: .LBB131_2: # %entry ; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vcvttsd2si %xmm2, %rax -; AVX1-NEXT: setbe %cl -; AVX1-NEXT: movzbl %cl, %ecx -; AVX1-NEXT: shlq $63, %rcx -; AVX1-NEXT: xorq %rax, %rcx -; AVX1-NEXT: vmovq %rcx, %xmm2 -; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = [4.2100000000000001E+1,0.0E+0] -; AVX1-NEXT: vcomisd %xmm3, %xmm0 -; AVX1-NEXT: vxorpd %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vcvttsd2si %xmm2, %rcx +; AVX1-NEXT: setbe %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: shlq $63, %rax +; AVX1-NEXT: xorq %rcx, %rax +; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = [4.2100000000000001E+1,0.0E+0] +; AVX1-NEXT: vcomisd %xmm2, %xmm0 +; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: ja .LBB131_4 ; AVX1-NEXT: # %bb.3: # %entry -; AVX1-NEXT: vmovapd %xmm0, %xmm4 +; AVX1-NEXT: vmovapd %xmm0, %xmm3 ; AVX1-NEXT: .LBB131_4: # %entry -; AVX1-NEXT: vsubsd %xmm4, %xmm3, %xmm3 -; AVX1-NEXT: vcvttsd2si %xmm3, %rax +; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vcvttsd2si %xmm2, %rdx ; AVX1-NEXT: setbe %cl ; AVX1-NEXT: movzbl %cl, %ecx ; AVX1-NEXT: shlq $63, %rcx -; AVX1-NEXT: xorq %rax, %rcx -; AVX1-NEXT: vmovq %rcx, %xmm3 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] -; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = [4.2299999999999997E+1,0.0E+0] -; AVX1-NEXT: vcomisd %xmm3, %xmm0 +; AVX1-NEXT: xorq %rdx, %rcx +; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = [4.2200000000000003E+1,0.0E+0] +; AVX1-NEXT: vcomisd %xmm2, %xmm0 ; AVX1-NEXT: ja .LBB131_6 ; AVX1-NEXT: # %bb.5: # %entry ; AVX1-NEXT: vmovapd %xmm0, %xmm1 ; AVX1-NEXT: .LBB131_6: # %entry -; AVX1-NEXT: vsubsd %xmm1, %xmm3, %xmm0 -; AVX1-NEXT: vcvttsd2si %xmm0, %rax -; AVX1-NEXT: setbe %cl -; AVX1-NEXT: movzbl %cl, %ecx -; AVX1-NEXT: shlq $63, %rcx -; AVX1-NEXT: xorq %rax, %rcx -; AVX1-NEXT: vmovq %rcx, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm0 +; AVX1-NEXT: vcvttsd2si %xmm0, %rdx +; AVX1-NEXT: setbe %sil +; AVX1-NEXT: movzbl %sil, %esi +; AVX1-NEXT: shlq $63, %rsi +; AVX1-NEXT: xorq %rdx, %rsi +; AVX1-NEXT: vmovq %rsi, %xmm0 +; AVX1-NEXT: vmovq %rcx, %xmm1 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX1-NEXT: vmovq %rax, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f64: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax -; AVX512-NEXT: vmovq %rax, %xmm0 -; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax -; AVX512-NEXT: vmovq %rax, %xmm1 +; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx +; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx +; AVX512-NEXT: vmovq %rdx, %xmm0 +; AVX512-NEXT: vmovq %rcx, %xmm1 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax ; AVX512-NEXT: vmovq %rax, %xmm1 ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq @@ -5731,26 +5731,26 @@ entry: define <3 x float> @constrained_vector_fptrunc_v3f64() #0 { ; CHECK-LABEL: constrained_vector_fptrunc_v3f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] ; CHECK-NEXT: cvtsd2ss %xmm0, %xmm1 ; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] ; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 -; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.2299999999999997E+1,0.0E+0] -; CHECK-NEXT: cvtsd2ss %xmm1, %xmm1 +; CHECK-NEXT: movsd {{.*#+}} xmm2 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: cvtsd2ss %xmm2, %xmm2 +; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fptrunc_v3f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] ; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.2100000000000001E+1,0.0E+0] ; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.2299999999999997E+1,0.0E+0] -; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vcvtsd2ss %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] ; AVX-NEXT: retq entry: %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64( @@ -5834,14 +5834,14 @@ define <3 x double> @constrained_vector_fpext_v3f32() #0 { ; ; AVX-LABEL: constrained_vector_fpext_v3f32: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] ; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] ; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] -; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-NEXT: vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX-NEXT: retq entry: %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32( @@ -6702,14 +6702,14 @@ define <3 x double> @constrained_vector_sitofp_v3f64_v3i32(<3 x i32> %x) #0 { ; ; AVX-LABEL: constrained_vector_sitofp_v3f64_v3i32: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vextractps $1, %xmm0, %eax +; AVX-NEXT: vextractps $2, %xmm0, %eax ; AVX-NEXT: vcvtsi2sd %eax, %xmm15, %xmm1 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: vcvtsi2sd %eax, %xmm15, %xmm2 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX-NEXT: vpextrd $2, %xmm0, %eax +; AVX-NEXT: vpextrd $1, %xmm0, %eax ; AVX-NEXT: vcvtsi2sd %eax, %xmm15, %xmm0 -; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX-NEXT: retq entry: %result = call <3 x double> @@ -6722,31 +6722,31 @@ entry: define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 { ; CHECK-LABEL: constrained_vector_sitofp_v3f32_v3i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; CHECK-NEXT: movd %xmm1, %eax +; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: cvtsi2ss %eax, %xmm1 ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] ; CHECK-NEXT: movd %xmm2, %eax ; CHECK-NEXT: xorps %xmm2, %xmm2 ; CHECK-NEXT: cvtsi2ss %eax, %xmm2 -; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; CHECK-NEXT: movd %xmm0, %eax ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtsi2ss %eax, %xmm0 -; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_sitofp_v3f32_v3i32: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vextractps $1, %xmm0, %eax +; AVX-NEXT: vextractps $2, %xmm0, %eax ; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm1 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm2 -; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] -; AVX-NEXT: vpextrd $2, %xmm0, %eax +; AVX-NEXT: vpextrd $1, %xmm0, %eax ; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0 -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3] +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; AVX-NEXT: retq entry: %result = call <3 x float> @@ -6769,28 +6769,28 @@ define <3 x double> @constrained_vector_sitofp_v3f64_v3i64(<3 x i64> %x) #0 { ; ; AVX1-LABEL: constrained_vector_sitofp_v3f64_v3i64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpextrq $1, %xmm0, %rax +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovq %xmm1, %rax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_sitofp_v3f64_v3i64: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpextrq $1, %xmm0, %rax +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vmovq %xmm1, %rax ; AVX512-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2 -; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX512-NEXT: vmovq %xmm0, %rax +; AVX512-NEXT: vpextrq $1, %xmm0, %rax ; AVX512-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0 -; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq entry: %result = call <3 x double> @@ -6803,39 +6803,38 @@ entry: define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 { ; CHECK-LABEL: constrained_vector_sitofp_v3f32_v3i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvtsi2ss %rsi, %xmm1 -; CHECK-NEXT: cvtsi2ss %rdi, %xmm0 -; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: cvtsi2ss %rdx, %xmm1 +; CHECK-NEXT: cvtsi2ss %rdi, %xmm0 +; CHECK-NEXT: cvtsi2ss %rsi, %xmm2 +; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_sitofp_v3f32_v3i64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpextrq $1, %xmm0, %rax +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovq %xmm1, %rax ; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2 -; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 -; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] +; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3] +; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_sitofp_v3f32_v3i64: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpextrq $1, %xmm0, %rax +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vmovq %xmm1, %rax ; AVX512-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2 -; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX512-NEXT: vmovq %xmm0, %rax +; AVX512-NEXT: vpextrq $1, %xmm0, %rax ; AVX512-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 -; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] +; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3] +; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq entry: @@ -7415,26 +7414,26 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i32(<3 x i32> %x) #0 { ; ; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vextractps $1, %xmm0, %eax +; AVX1-NEXT: vextractps $2, %xmm0, %eax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX1-NEXT: vpextrd $2, %xmm0, %eax +; AVX1-NEXT: vpextrd $1, %xmm0, %eax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_uitofp_v3f64_v3i32: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vextractps $1, %xmm0, %eax +; AVX512-NEXT: vextractps $2, %xmm0, %eax ; AVX512-NEXT: vcvtusi2sd %eax, %xmm15, %xmm1 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vcvtusi2sd %eax, %xmm15, %xmm2 -; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX512-NEXT: vpextrd $2, %xmm0, %eax +; AVX512-NEXT: vpextrd $1, %xmm0, %eax ; AVX512-NEXT: vcvtusi2sd %eax, %xmm15, %xmm0 -; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq entry: %result = call <3 x double> @@ -7447,43 +7446,43 @@ entry: define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 { ; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; CHECK-NEXT: movd %xmm1, %eax +; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: cvtsi2ss %rax, %xmm1 ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] ; CHECK-NEXT: movd %xmm2, %eax ; CHECK-NEXT: xorps %xmm2, %xmm2 ; CHECK-NEXT: cvtsi2ss %rax, %xmm2 -; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; CHECK-NEXT: movd %xmm0, %eax ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtsi2ss %rax, %xmm0 -; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vextractps $1, %xmm0, %eax +; AVX1-NEXT: vextractps $2, %xmm0, %eax ; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2 -; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] -; AVX1-NEXT: vpextrd $2, %xmm0, %eax +; AVX1-NEXT: vpextrd $1, %xmm0, %eax ; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 -; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] +; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3] +; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_uitofp_v3f32_v3i32: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vextractps $1, %xmm0, %eax +; AVX512-NEXT: vextractps $2, %xmm0, %eax ; AVX512-NEXT: vcvtusi2ss %eax, %xmm15, %xmm1 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vcvtusi2ss %eax, %xmm15, %xmm2 -; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] -; AVX512-NEXT: vpextrd $2, %xmm0, %eax +; AVX512-NEXT: vpextrd $1, %xmm0, %eax ; AVX512-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0 -; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] +; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3] +; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; AVX512-NEXT: retq entry: %result = call <3 x float> @@ -7539,7 +7538,8 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 { ; ; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpextrq $1, %xmm0, %rax +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovq %xmm1, %rax ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: movl %eax, %edx @@ -7565,9 +7565,7 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 { ; AVX1-NEXT: # %bb.3: ; AVX1-NEXT: vaddsd %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: .LBB183_4: # %entry -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: movl %eax, %edx @@ -7580,20 +7578,21 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 { ; AVX1-NEXT: # %bb.5: ; AVX1-NEXT: vaddsd %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: .LBB183_6: # %entry -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_uitofp_v3f64_v3i64: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpextrq $1, %xmm0, %rax +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vmovq %xmm1, %rax ; AVX512-NEXT: vcvtusi2sd %rax, %xmm15, %xmm1 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vcvtusi2sd %rax, %xmm15, %xmm2 -; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX512-NEXT: vmovq %xmm0, %rax +; AVX512-NEXT: vpextrq $1, %xmm0, %rax ; AVX512-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0 -; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq entry: %result = call <3 x double> @@ -7606,13 +7605,13 @@ entry: define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 { ; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdx, %rax ; CHECK-NEXT: shrq %rax -; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: movl %edx, %ecx ; CHECK-NEXT: andl $1, %ecx ; CHECK-NEXT: orq %rax, %rcx -; CHECK-NEXT: testq %rsi, %rsi -; CHECK-NEXT: cmovnsq %rsi, %rcx +; CHECK-NEXT: testq %rdx, %rdx +; CHECK-NEXT: cmovnsq %rdx, %rcx ; CHECK-NEXT: cvtsi2ss %rcx, %xmm1 ; CHECK-NEXT: jns .LBB184_2 ; CHECK-NEXT: # %bb.1: @@ -7630,26 +7629,26 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 { ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: addss %xmm0, %xmm0 ; CHECK-NEXT: .LBB184_4: # %entry -; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: shrq %rax -; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: andl $1, %ecx ; CHECK-NEXT: orq %rax, %rcx -; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: cmovnsq %rdx, %rcx -; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: cvtsi2ss %rcx, %xmm1 +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: cmovnsq %rsi, %rcx +; CHECK-NEXT: cvtsi2ss %rcx, %xmm2 ; CHECK-NEXT: jns .LBB184_6 ; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: addss %xmm1, %xmm1 +; CHECK-NEXT: addss %xmm2, %xmm2 ; CHECK-NEXT: .LBB184_6: # %entry +; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpextrq $1, %xmm0, %rax +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovq %xmm1, %rax ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: movl %eax, %edx @@ -7675,9 +7674,7 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 { ; AVX1-NEXT: # %bb.3: ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: .LBB184_4: # %entry -; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: movl %eax, %edx @@ -7690,21 +7687,22 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 { ; AVX1-NEXT: # %bb.5: ; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: .LBB184_6: # %entry -; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] +; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3] +; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_uitofp_v3f32_v3i64: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpextrq $1, %xmm0, %rax +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vmovq %xmm1, %rax ; AVX512-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2 -; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX512-NEXT: vmovq %xmm0, %rax +; AVX512-NEXT: vpextrq $1, %xmm0, %rax ; AVX512-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0 -; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] +; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3] +; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq entry: