Skip to content

Commit

Permalink
[ISEL] Canonicalise constant splats to RHS.
Browse files Browse the repository at this point in the history
SelectionDAG::getNode() canonicalises constants to the RHS if the
operation is commutative, but it doesn't do so for constant splat
vectors. Doing this early helps making certain folds on vector types,
simplifying the code required for target DAGCombines that are enabled
before Type legalization.

Somewhat to my surprise, DAGCombine doesn't seem to traverse the
DAG in a post-order DFS, so at the time of doing some custom fold where
the input is a MUL, DAGCombiner::visitMUL hasn't yet reordered the
constant splat to the RHS.

This patch leads to a few improvements, but also a few  minor regressions,
which I traced down to D46492. When I tried reverting this change to see
if the changes were still necessary, I ran into some segfaults. Not sure
if there is some latent bug there.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117794
  • Loading branch information
sdesmalen-arm committed Jan 24, 2022
1 parent 7ccacaf commit 4f8fdf7
Show file tree
Hide file tree
Showing 11 changed files with 75 additions and 76 deletions.
19 changes: 8 additions & 11 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5610,22 +5610,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(N1.getOpcode() != ISD::DELETED_NODE &&
N2.getOpcode() != ISD::DELETED_NODE &&
"Operand is DELETED_NODE!");
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);

// Canonicalize constant to RHS if commutative.
if (TLI->isCommutativeBinOp(Opcode)) {
if (N1C && !N2C) {
std::swap(N1C, N2C);
std::swap(N1, N2);
} else if (N1CFP && !N2CFP) {
std::swap(N1CFP, N2CFP);
bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1);
bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2);
bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1);
bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2);
if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP))
std::swap(N1, N2);
}
}

ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);

switch (Opcode) {
default: break;
case ISD::TokenFactor:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ define <4 x i32> @out_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
define <4 x i32> @in_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) {
; CHECK-LABEL: in_constant_mone_vary:
; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v1.16b, v2.16b
; CHECK-NEXT: bic v0.16b, v2.16b, v1.16b
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
%n1 = and <4 x i32> %n0, %mask
Expand All @@ -152,8 +153,9 @@ define <4 x i32> @out_constant_mone_vary_invmask(<4 x i32> %x, <4 x i32> %y, <4
define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) {
; CHECK-LABEL: in_constant_mone_vary_invmask:
; CHECK: // %bb.0:
; CHECK-NEXT: and v0.16b, v1.16b, v2.16b
; CHECK-NEXT: orn v0.16b, v0.16b, v2.16b
; CHECK-NEXT: mvn v0.16b, v1.16b
; CHECK-NEXT: bic v0.16b, v0.16b, v2.16b
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
%n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/PowerPC/combine-fneg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) {
; CHECK-NEXT: xvredp 2, 0
; CHECK-NEXT: xxswapd 1, 1
; CHECK-NEXT: xxlor 3, 1, 1
; CHECK-NEXT: xvmaddadp 3, 0, 2
; CHECK-NEXT: xvnmsubadp 2, 2, 3
; CHECK-NEXT: xvmaddadp 1, 0, 2
; CHECK-NEXT: xvmsubadp 2, 2, 1
; CHECK-NEXT: xvnmsubadp 3, 0, 2
; CHECK-NEXT: xvmaddadp 2, 2, 3
; CHECK-NEXT: xvnmsubadp 1, 0, 2
; CHECK-NEXT: xvnmaddadp 2, 2, 1
; CHECK-NEXT: xvmuldp 34, 34, 2
; CHECK-NEXT: xvmuldp 35, 35, 2
; CHECK-NEXT: blr
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) {
; CHECK-NEXT: lvx 4, 0, 3
; CHECK-NEXT: xxspltw 0, 0, 0
; CHECK-NEXT: xvresp 1, 0
; CHECK-NEXT: xvnmsubasp 35, 0, 1
; CHECK-NEXT: xvmaddasp 35, 0, 1
; CHECK-NEXT: xvmulsp 0, 34, 36
; CHECK-NEXT: xvmaddasp 1, 1, 35
; CHECK-NEXT: xvnmsubasp 1, 1, 35
; CHECK-NEXT: xvmulsp 34, 0, 1
; CHECK-NEXT: blr
%ins = insertelement <4 x float> undef, float %a, i32 0
Expand Down
17 changes: 7 additions & 10 deletions llvm/test/CodeGen/X86/dpbusd_const.ll
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,7 @@ define i32 @mul_4xi4_cz(<4 x i4> %a, i32 %c) {
; AVXVNNI-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVXVNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVXVNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
; AVXVNNI-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,127,0,0,0,0,0,0,0,0,0,0,0,0]
; AVXVNNI-NEXT: {vex} vpdpbusd %xmm0, %xmm2, %xmm1
; AVXVNNI-NEXT: {vex} vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVXVNNI-NEXT: vmovd %xmm1, %eax
; AVXVNNI-NEXT: addl %edi, %eax
; AVXVNNI-NEXT: retq
Expand All @@ -80,10 +79,9 @@ define i32 @mul_4xi4_cz(<4 x i4> %a, i32 %c) {
; AVX512VNNI-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512VNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
; AVX512VNNI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,127,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX512VNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VNNI-NEXT: vpdpbusd %zmm0, %zmm1, %zmm2
; AVX512VNNI-NEXT: vmovd %xmm2, %eax
; AVX512VNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VNNI-NEXT: vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
; AVX512VNNI-NEXT: vmovd %xmm1, %eax
; AVX512VNNI-NEXT: addl %edi, %eax
; AVX512VNNI-NEXT: vzeroupper
; AVX512VNNI-NEXT: retq
Expand All @@ -92,10 +90,9 @@ define i32 @mul_4xi4_cz(<4 x i4> %a, i32 %c) {
; AVX512VLVNNI: # %bb.0: # %entry
; AVX512VLVNNI-NEXT: vpmovdb %xmm0, %xmm0
; AVX512VLVNNI-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512VLVNNI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,127,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX512VLVNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLVNNI-NEXT: vpdpbusd %xmm0, %xmm1, %xmm2
; AVX512VLVNNI-NEXT: vmovd %xmm2, %eax
; AVX512VLVNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VLVNNI-NEXT: vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX512VLVNNI-NEXT: vmovd %xmm1, %eax
; AVX512VLVNNI-NEXT: addl %edi, %eax
; AVX512VLVNNI-NEXT: retq
entry:
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/extractelement-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1070,7 +1070,7 @@ define float @round_v4f32(<4 x float> %x) nounwind {
; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; X64-NEXT: vandps %xmm1, %xmm0, %xmm1
; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
; X64-NEXT: vorps %xmm1, %xmm2, %xmm1
; X64-NEXT: vorps %xmm2, %xmm1, %xmm1
; X64-NEXT: vaddss %xmm1, %xmm0, %xmm0
; X64-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
; X64-NEXT: retq
Expand All @@ -1081,7 +1081,7 @@ define float @round_v4f32(<4 x float> %x) nounwind {
; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; X86-NEXT: vandps %xmm1, %xmm0, %xmm1
; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
; X86-NEXT: vorps %xmm1, %xmm2, %xmm1
; X86-NEXT: vorps %xmm2, %xmm1, %xmm1
; X86-NEXT: vaddss %xmm1, %xmm0, %xmm0
; X86-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
; X86-NEXT: vmovss %xmm0, (%esp)
Expand All @@ -1099,7 +1099,7 @@ define double @round_v4f64(<4 x double> %x) nounwind {
; X64-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; X64-NEXT: vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1]
; X64-NEXT: # xmm2 = mem[0,0]
; X64-NEXT: vorpd %xmm1, %xmm2, %xmm1
; X64-NEXT: vorpd %xmm2, %xmm1, %xmm1
; X64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; X64-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
; X64-NEXT: vzeroupper
Expand All @@ -1114,7 +1114,7 @@ define double @round_v4f64(<4 x double> %x) nounwind {
; X86-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
; X86-NEXT: vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1]
; X86-NEXT: # xmm2 = mem[0,0]
; X86-NEXT: vorpd %xmm1, %xmm2, %xmm1
; X86-NEXT: vorpd %xmm2, %xmm1, %xmm1
; X86-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; X86-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
; X86-NEXT: vmovsd %xmm0, (%esp)
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/X86/fp-round.ll
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ define half @round_f16(half %h) {
; AVX1-NEXT: callq ___extendhfsf2
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
; AVX1-NEXT: vorps %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vorps %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
; AVX1-NEXT: callq ___truncsfhf2
Expand Down Expand Up @@ -94,7 +94,7 @@ define float @round_f32(float %x) {
; AVX1: ## %bb.0:
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
; AVX1-NEXT: vorps %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vorps %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
; AVX1-NEXT: retq
Expand Down Expand Up @@ -130,7 +130,7 @@ define double @round_f64(double %x) {
; AVX1-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1]
; AVX1-NEXT: ## xmm2 = mem[0,0]
; AVX1-NEXT: vorpd %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vorpd %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
; AVX1-NEXT: retq
Expand Down Expand Up @@ -521,11 +521,11 @@ define <16 x float> @round_v16f32(<16 x float> %x) {
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm3
; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
; AVX1-NEXT: vorps %ymm3, %ymm4, %ymm3
; AVX1-NEXT: vorps %ymm4, %ymm3, %ymm3
; AVX1-NEXT: vaddps %ymm3, %ymm0, %ymm0
; AVX1-NEXT: vroundps $11, %ymm0, %ymm0
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm2
; AVX1-NEXT: vorps %ymm2, %ymm4, %ymm2
; AVX1-NEXT: vorps %ymm4, %ymm2, %ymm2
; AVX1-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vroundps $11, %ymm1, %ymm1
; AVX1-NEXT: retq
Expand Down Expand Up @@ -620,11 +620,11 @@ define <8 x double> @round_v8f64(<8 x double> %x) {
; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; AVX1-NEXT: vandpd %ymm2, %ymm0, %ymm3
; AVX1-NEXT: vmovapd {{.*#+}} ymm4 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1]
; AVX1-NEXT: vorpd %ymm3, %ymm4, %ymm3
; AVX1-NEXT: vorpd %ymm4, %ymm3, %ymm3
; AVX1-NEXT: vaddpd %ymm3, %ymm0, %ymm0
; AVX1-NEXT: vroundpd $11, %ymm0, %ymm0
; AVX1-NEXT: vandpd %ymm2, %ymm1, %ymm2
; AVX1-NEXT: vorpd %ymm2, %ymm4, %ymm2
; AVX1-NEXT: vorpd %ymm4, %ymm2, %ymm2
; AVX1-NEXT: vaddpd %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vroundpd $11, %ymm1, %ymm1
; AVX1-NEXT: retq
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/fp128-cast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1326,7 +1326,7 @@ define fp128 @TestTruncCopysign(fp128 %x, i32 %n) nounwind {
; X64-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovddup {{.*#+}} xmm1 = [+Inf,+Inf]
; X64-AVX-NEXT: # xmm1 = mem[0,0]
; X64-AVX-NEXT: vorps %xmm0, %xmm1, %xmm0
; X64-AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: callq __extenddftf2@PLT
; X64-AVX-NEXT: addq $8, %rsp
; X64-AVX-NEXT: .LBB26_2: # %cleanup
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -465,9 +465,9 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-SSE2-NEXT: pmuludq %xmm2, %xmm3
; X86-SSE2-NEXT: pmuludq %xmm1, %xmm2
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X86-SSE2-NEXT: pand %xmm1, %xmm0
Expand All @@ -491,9 +491,9 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
; X64-SSE2-NEXT: pmuludq %xmm2, %xmm1
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X64-SSE2-NEXT: pmuludq %xmm2, %xmm3
; X64-SSE2-NEXT: pmuludq %xmm1, %xmm2
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X64-SSE2-NEXT: pand %xmm1, %xmm0
Expand Down Expand Up @@ -611,9 +611,9 @@ define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwi
; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-SSE2-NEXT: pmuludq %xmm2, %xmm3
; X86-SSE2-NEXT: pmuludq %xmm1, %xmm2
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X86-SSE2-NEXT: pand %xmm1, %xmm0
Expand All @@ -637,9 +637,9 @@ define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwi
; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
; X64-SSE2-NEXT: pmuludq %xmm2, %xmm1
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X64-SSE2-NEXT: pmuludq %xmm2, %xmm3
; X64-SSE2-NEXT: pmuludq %xmm1, %xmm2
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X64-SSE2-NEXT: pand %xmm1, %xmm0
Expand Down
8 changes: 3 additions & 5 deletions llvm/test/CodeGen/X86/pr43509.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@
define <8 x i8> @foo(<8 x float> %arg) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: vcmpgtps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
; CHECK-NEXT: vpmovm2b %k0, %xmm1
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpltps %ymm2, %ymm0, %k1
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpltps %ymm1, %ymm0, %k1
; CHECK-NEXT: vcmpgtps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
; CHECK-NEXT: vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z}
; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
bb:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -336,23 +336,26 @@ define <4 x i32> @in_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32
; CHECK-SSE1-LABEL: in_constant_mone_vary:
; CHECK-SSE1: # %bb.0:
; CHECK-SSE1-NEXT: movq %rdi, %rax
; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
; CHECK-SSE1-NEXT: movaps (%rdx), %xmm0
; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1
; CHECK-SSE1-NEXT: orps %xmm0, %xmm1
; CHECK-SSE1-NEXT: andnps (%rcx), %xmm1
; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
; CHECK-SSE1-NEXT: retq
;
; CHECK-SSE2-LABEL: in_constant_mone_vary:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: movaps (%rsi), %xmm0
; CHECK-SSE2-NEXT: orps (%rdx), %xmm0
; CHECK-SSE2-NEXT: movaps (%rsi), %xmm1
; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
; CHECK-SSE2-NEXT: andnps (%rdx), %xmm0
; CHECK-SSE2-NEXT: xorps %xmm1, %xmm0
; CHECK-SSE2-NEXT: retq
;
; CHECK-XOP-LABEL: in_constant_mone_vary:
; CHECK-XOP: # %bb.0:
; CHECK-XOP-NEXT: vmovaps (%rsi), %xmm0
; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0
; CHECK-XOP-NEXT: vandnps (%rdx), %xmm0, %xmm1
; CHECK-XOP-NEXT: vxorps %xmm0, %xmm1, %xmm0
; CHECK-XOP-NEXT: retq
%x = load <4 x i32>, <4 x i32> *%px, align 16
%y = load <4 x i32>, <4 x i32> *%py, align 16
Expand Down Expand Up @@ -408,30 +411,32 @@ define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py,
; CHECK-SSE1-LABEL: in_constant_mone_vary_invmask:
; CHECK-SSE1: # %bb.0:
; CHECK-SSE1-NEXT: movq %rdi, %rax
; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
; CHECK-SSE1-NEXT: movaps (%rdx), %xmm0
; CHECK-SSE1-NEXT: movaps (%rcx), %xmm1
; CHECK-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-SSE1-NEXT: movaps %xmm0, %xmm2
; CHECK-SSE1-NEXT: andnps %xmm1, %xmm2
; CHECK-SSE1-NEXT: xorps %xmm0, %xmm2
; CHECK-SSE1-NEXT: movaps %xmm2, (%rdi)
; CHECK-SSE1-NEXT: retq
;
; CHECK-SSE2-LABEL: in_constant_mone_vary_invmask:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0
; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
; CHECK-SSE2-NEXT: pand (%rsi), %xmm0
; CHECK-SSE2-NEXT: por %xmm1, %xmm0
; CHECK-SSE2-NEXT: movdqa (%rsi), %xmm1
; CHECK-SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; CHECK-SSE2-NEXT: pxor (%rdx), %xmm2
; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0
; CHECK-SSE2-NEXT: pandn %xmm2, %xmm0
; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0
; CHECK-SSE2-NEXT: retq
;
; CHECK-XOP-LABEL: in_constant_mone_vary_invmask:
; CHECK-XOP: # %bb.0:
; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0
; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1
; CHECK-XOP-NEXT: vpand (%rsi), %xmm0, %xmm0
; CHECK-XOP-NEXT: vpor %xmm0, %xmm1, %xmm0
; CHECK-XOP-NEXT: vpxor (%rdx), %xmm1, %xmm1
; CHECK-XOP-NEXT: vpandn %xmm1, %xmm0, %xmm1
; CHECK-XOP-NEXT: vpxor %xmm0, %xmm1, %xmm0
; CHECK-XOP-NEXT: retq
%x = load <4 x i32>, <4 x i32> *%px, align 16
%y = load <4 x i32>, <4 x i32> *%py, align 16
Expand Down

0 comments on commit 4f8fdf7

Please sign in to comment.