diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9f222268fc127..9945e1d449430 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5704,8 +5704,7 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) { SDValue Y = N1.getOperand(0); EVT XVT = X.getValueType(); SDLoc DL(N); - if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND || - HandOpcode == ISD::SIGN_EXTEND) { + if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode)) { // If both operands have other uses, this transform would create extra // instructions without eliminating anything. if (!N0.hasOneUse() && !N1.hasOneUse()) @@ -5720,8 +5719,9 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) { return SDValue(); // Avoid infinite looping with PromoteIntBinOp. // TODO: Should we apply desirable/legal constraints to all opcodes? - if (HandOpcode == ISD::ANY_EXTEND && LegalTypes && - !TLI.isTypeDesirableForOp(LogicOpcode, XVT)) + if ((HandOpcode == ISD::ANY_EXTEND || + HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) && + LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT)) return SDValue(); // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y) SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); diff --git a/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll b/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll index 3e64af6216963..d0653bcfb29b6 100644 --- a/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll +++ b/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll @@ -181,26 +181,21 @@ define i2 @v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { ; SSE2-LABEL: v2i8: ; SSE2: # %bb.0: ; SSE2-NEXT: pcmpgtb %xmm1, %xmm0 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] ; SSE2-NEXT: pcmpgtb %xmm3, %xmm2 +; SSE2-NEXT: pand %xmm0, %xmm2 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,1,1,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: movmskpd %xmm1, %eax +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,1,1,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: movmskpd %xmm0, %eax ; SSE2-NEXT: # kill: def $al killed $al killed $eax ; SSE2-NEXT: retq ; ; SSSE3-LABEL: v2i8: ; SSSE3: # %bb.0: ; SSSE3-NEXT: pcmpgtb %xmm1, %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] -; SSSE3-NEXT: pshufb %xmm1, %xmm0 ; SSSE3-NEXT: pcmpgtb %xmm3, %xmm2 -; SSSE3-NEXT: pshufb %xmm1, %xmm2 ; SSSE3-NEXT: pand %xmm0, %xmm2 +; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,u,0,u,u,u,u,u,u,u,1] ; SSSE3-NEXT: movmskpd %xmm2, %eax ; SSSE3-NEXT: # kill: def $al killed $al killed $eax ; SSSE3-NEXT: retq @@ -208,10 +203,9 @@ define i2 @v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { ; AVX12-LABEL: v2i8: ; AVX12: # %bb.0: ; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 -; AVX12-NEXT: vpmovsxbq %xmm0, %xmm0 ; AVX12-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm1 -; AVX12-NEXT: vpmovsxbq %xmm1, %xmm1 ; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX12-NEXT: vpmovsxbq %xmm0, %xmm0 ; AVX12-NEXT: vmovmskpd %xmm0, %eax ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: retq @@ -249,23 +243,20 @@ define i2 @v2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> %d) { ; SSE2-SSSE3-LABEL: v2i16: ; SSE2-SSSE3: # %bb.0: ; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 -; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7] -; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] ; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm2 -; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,1,1,4,5,6,7] -; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 -; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax +; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2 +; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,1,1,4,5,6,7] +; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax ; SSE2-SSSE3-NEXT: retq ; ; AVX12-LABEL: v2i16: ; AVX12: # %bb.0: ; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX12-NEXT: vpmovsxwq %xmm0, %xmm0 ; AVX12-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm1 -; AVX12-NEXT: vpmovsxwq %xmm1, %xmm1 ; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX12-NEXT: vpmovsxwq %xmm0, %xmm0 ; AVX12-NEXT: vmovmskpd %xmm0, %eax ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: retq @@ -303,21 +294,19 @@ define i2 @v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) { ; SSE2-SSSE3-LABEL: v2i32: ; SSE2-SSSE3: # %bb.0: ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm2 -; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,1,1] -; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 -; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax +; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2 +; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,1,1] +; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax ; SSE2-SSSE3-NEXT: retq ; ; AVX12-LABEL: v2i32: ; AVX12: # %bb.0: ; AVX12-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX12-NEXT: vpmovsxdq %xmm0, %xmm0 ; AVX12-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1 -; AVX12-NEXT: vpmovsxdq %xmm1, %xmm1 ; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX12-NEXT: vpmovsxdq %xmm0, %xmm0 ; AVX12-NEXT: vmovmskpd %xmm0, %eax ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: retq @@ -447,12 +436,10 @@ define i4 @v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) { ; SSE2-SSSE3-LABEL: v4i8: ; SSE2-SSSE3: # %bb.0: ; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0 -; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm2 +; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2 ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3] -; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2 ; SSE2-SSSE3-NEXT: movmskps %xmm2, %eax ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax ; SSE2-SSSE3-NEXT: retq @@ -460,10 +447,9 @@ define i4 @v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) { ; AVX12-LABEL: v4i8: ; AVX12: # %bb.0: ; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 -; AVX12-NEXT: vpmovsxbd %xmm0, %xmm0 ; AVX12-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm1 -; AVX12-NEXT: vpmovsxbd %xmm1, %xmm1 ; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX12-NEXT: vpmovsxbd %xmm0, %xmm0 ; AVX12-NEXT: vmovmskps %xmm0, %eax ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: retq @@ -501,10 +487,9 @@ define i4 @v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) { ; SSE2-SSSE3-LABEL: v4i16: ; SSE2-SSSE3: # %bb.0: ; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 -; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm2 -; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3] ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2 +; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3] ; SSE2-SSSE3-NEXT: movmskps %xmm2, %eax ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax ; SSE2-SSSE3-NEXT: retq @@ -512,10 +497,9 @@ define i4 @v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) { ; AVX12-LABEL: v4i16: ; AVX12: # %bb.0: ; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX12-NEXT: vpmovsxwd %xmm0, %xmm0 ; AVX12-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm1 -; AVX12-NEXT: vpmovsxwd %xmm1, %xmm1 ; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX12-NEXT: vpmovsxwd %xmm0, %xmm0 ; AVX12-NEXT: vmovmskps %xmm0, %eax ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: retq @@ -553,10 +537,9 @@ define i8 @v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { ; SSE2-SSSE3-LABEL: v8i8: ; SSE2-SSSE3: # %bb.0: ; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0 -; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm2 -; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2 +; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-SSSE3-NEXT: packsswb %xmm2, %xmm2 ; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax @@ -565,10 +548,9 @@ define i8 @v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { ; AVX12-LABEL: v8i8: ; AVX12: # %bb.0: ; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 -; AVX12-NEXT: vpmovsxbw %xmm0, %xmm0 ; AVX12-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm1 -; AVX12-NEXT: vpmovsxbw %xmm1, %xmm1 ; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX12-NEXT: vpmovsxbw %xmm0, %xmm0 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX12-NEXT: vpmovmskb %xmm0, %eax ; AVX12-NEXT: # kill: def $al killed $al killed $eax diff --git a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll index 0db3e47ce8974..92d4830452a8d 100644 --- a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll +++ b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll @@ -939,30 +939,27 @@ define <4 x i1> @eq_or_to_abs_vec4x16(<4 x i16> %x) { ; AVX2-LABEL: eq_or_to_abs_vec4x16: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; AVX2-NEXT: vpmovsxwd %xmm1, %xmm1 ; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpmovsxwd %xmm0, %xmm0 ; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpmovsxwd %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; SSE41-LABEL: eq_or_to_abs_vec4x16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u> ; SSE41-NEXT: pcmpeqw %xmm0, %xmm1 -; SSE41-NEXT: pmovsxwd %xmm1, %xmm1 ; SSE41-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 ; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 ; SSE41-NEXT: retq ; ; SSE2-LABEL: eq_or_to_abs_vec4x16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u> ; SSE2-NEXT: pcmpeqw %xmm0, %xmm1 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] ; SSE2-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSE2-NEXT: retq %cmp1 = icmp eq <4 x i16> %x, %cmp2 = icmp eq <4 x i16> %x, @@ -1035,12 +1032,10 @@ define <4 x i1> @ne_and_to_abs_vec4x8(<4 x i8> %x) { ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpmovsxbd %xmm1, %xmm1 ; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpmovsxbd %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; ; SSE41-LABEL: ne_and_to_abs_vec4x8: @@ -1048,26 +1043,22 @@ define <4 x i1> @ne_and_to_abs_vec4x8(<4 x i8> %x) { ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u> ; SSE41-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE41-NEXT: pxor %xmm2, %xmm1 -; SSE41-NEXT: pmovsxbd %xmm1, %xmm1 ; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: pxor %xmm2, %xmm0 ; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; SSE2-LABEL: ne_and_to_abs_vec4x8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u> ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] -; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: retq %cmp1 = icmp ne <4 x i8> %x, %cmp2 = icmp ne <4 x i8> %x, diff --git a/llvm/test/CodeGen/X86/vector-ext-logic.ll b/llvm/test/CodeGen/X86/vector-ext-logic.ll index cfbc83d5e84e6..722f8c7b20363 100644 --- a/llvm/test/CodeGen/X86/vector-ext-logic.ll +++ b/llvm/test/CodeGen/X86/vector-ext-logic.ll @@ -155,17 +155,15 @@ define <8 x i16> @zext_and_v8i16(<8 x i8> %x, <8 x i8> %y) { define <8 x i16> @zext_or_v8i16(<8 x i8> %x, <8 x i8> %y) { ; SSE2-LABEL: zext_or_v8i16: ; SSE2: # %bb.0: -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] ; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; SSE2-NEXT: retq ; ; AVX2-LABEL: zext_or_v8i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX2-NEXT: retq %xz = zext <8 x i8> %x to <8 x i16> %yz = zext <8 x i8> %y to <8 x i16> @@ -176,17 +174,15 @@ define <8 x i16> @zext_or_v8i16(<8 x i8> %x, <8 x i8> %y) { define <8 x i16> @zext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) { ; SSE2-LABEL: zext_xor_v8i16: ; SSE2: # %bb.0: -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] ; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; SSE2-NEXT: retq ; ; AVX2-LABEL: zext_xor_v8i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX2-NEXT: retq %xz = zext <8 x i8> %x to <8 x i16> %yz = zext <8 x i8> %y to <8 x i16> @@ -197,17 +193,15 @@ define <8 x i16> @zext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) { define <8 x i16> @sext_and_v8i16(<8 x i8> %x, <8 x i8> %y) { ; SSE2-LABEL: sext_and_v8i16: ; SSE2: # %bb.0: -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: psraw $8, %xmm0 ; SSE2-NEXT: retq ; ; AVX2-LABEL: sext_and_v8i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0 -; AVX2-NEXT: vpmovsxbw %xmm1, %xmm1 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0 ; AVX2-NEXT: retq %xs = sext <8 x i8> %x to <8 x i16> %ys = sext <8 x i8> %y to <8 x i16> @@ -218,17 +212,15 @@ define <8 x i16> @sext_and_v8i16(<8 x i8> %x, <8 x i8> %y) { define <8 x i16> @sext_or_v8i16(<8 x i8> %x, <8 x i8> %y) { ; SSE2-LABEL: sext_or_v8i16: ; SSE2: # %bb.0: -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-NEXT: por %xmm2, %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: psraw $8, %xmm0 ; SSE2-NEXT: retq ; ; AVX2-LABEL: sext_or_v8i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0 -; AVX2-NEXT: vpmovsxbw %xmm1, %xmm1 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0 ; AVX2-NEXT: retq %xs = sext <8 x i8> %x to <8 x i16> %ys = sext <8 x i8> %y to <8 x i16> @@ -239,17 +231,15 @@ define <8 x i16> @sext_or_v8i16(<8 x i8> %x, <8 x i8> %y) { define <8 x i16> @sext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) { ; SSE2-LABEL: sext_xor_v8i16: ; SSE2: # %bb.0: -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: psraw $8, %xmm0 ; SSE2-NEXT: retq ; ; AVX2-LABEL: sext_xor_v8i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0 -; AVX2-NEXT: vpmovsxbw %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0 ; AVX2-NEXT: retq %xs = sext <8 x i8> %x to <8 x i16> %ys = sext <8 x i8> %y to <8 x i16>