Expand Up
@@ -290,10 +290,9 @@ define i8 @xor_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: shrb %cl , %dil
; CHECK-NEXT: xorl %esi , %edi
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shrb %cl, %sil
; CHECK-NEXT: xorb %sil, %al
; CHECK-NEXT: shrb %cl, %dil
; CHECK-NEXT: xorb %dil, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
Expand All
@@ -309,11 +308,10 @@ define i32 @xor_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: shrl %cl , %edi
; CHECK-NEXT: xorl %esi , %edi
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shrl %cl, %esi
; CHECK-NEXT: xorl %edi, %esi
; CHECK-NEXT: xorl %esi, %eax
; CHECK-NEXT: shrl %cl, %edi
; CHECK-NEXT: xorl %edi, %eax
; CHECK-NEXT: retq
%sh1 = lshr i32 %x0 , %y
%sh2 = lshr i32 %x1 , %y
Expand All
@@ -325,17 +323,13 @@ define i32 @xor_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
define <8 x i16 > @xor_lshr_commute2 (<8 x i16 > %x0 , <8 x i16 > %x1 , <8 x i16 > %y , <8 x i16 > %z ) {
; CHECK-LABEL: xor_lshr_commute2:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; CHECK-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm4
; CHECK-NEXT: vpackusdw %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; CHECK-NEXT: vpsrlvd %ymm2, %ymm1, %ymm1
; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2
; CHECK-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpxor %xmm1, %xmm3, %xmm1
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%sh1 = lshr <8 x i16 > %x0 , %y
Expand All
@@ -348,10 +342,9 @@ define <8 x i16> @xor_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y,
define <2 x i64 > @xor_lshr_commute3 (<2 x i64 > %x0 , <2 x i64 > %x1 , <2 x i64 > %y , <2 x i64 > %z ) {
; CHECK-LABEL: xor_lshr_commute3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpsrlvq %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpsrlvq %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpxor %xmm0, %xmm3, %xmm0
; CHECK-NEXT: vpsrlvq %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0
; CHECK-NEXT: retq
%sh1 = lshr <2 x i64 > %x0 , %y
%sh2 = lshr <2 x i64 > %x1 , %y
Expand All
@@ -365,13 +358,11 @@ define i16 @xor_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %r8d
; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: movswl %si, %eax
; CHECK-NEXT: movswl %di, %edx
; CHECK-NEXT: sarl %cl, %edx
; CHECK-NEXT: xorl %esi, %edi
; CHECK-NEXT: movswl %di, %eax
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: sarl %cl, %eax
; CHECK-NEXT: xorl %r8d, %eax
; CHECK-NEXT: xorl %edx, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%sh1 = ashr i16 %x0 , %y
Expand All
@@ -386,11 +377,10 @@ define i64 @xor_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rcx, %rax
; CHECK-NEXT: movq %rdx, %rcx
; CHECK-NEXT: sarq %cl , %rdi
; CHECK-NEXT: xorq %rsi , %rdi
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
; CHECK-NEXT: sarq %cl, %rsi
; CHECK-NEXT: xorq %rdi, %rsi
; CHECK-NEXT: xorq %rsi, %rax
; CHECK-NEXT: sarq %cl, %rdi
; CHECK-NEXT: xorq %rdi, %rax
; CHECK-NEXT: retq
%sh1 = ashr i64 %x0 , %y
%sh2 = ashr i64 %x1 , %y
Expand All
@@ -402,10 +392,9 @@ define i64 @xor_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
define <4 x i32 > @xor_ashr_commute2 (<4 x i32 > %x0 , <4 x i32 > %x1 , <4 x i32 > %y , <4 x i32 > %z ) {
; CHECK-LABEL: xor_ashr_commute2:
; CHECK: # %bb.0:
; CHECK-NEXT: vpsravd %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpsravd %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpxor %xmm1, %xmm3, %xmm1
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpsravd %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0
; CHECK-NEXT: retq
%sh1 = ashr <4 x i32 > %x0 , %y
%sh2 = ashr <4 x i32 > %x1 , %y
Expand All
@@ -417,49 +406,32 @@ define <4 x i32> @xor_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y,
define <16 x i8 > @xor_ashr_commute3 (<16 x i8 > %x0 , <16 x i8 > %x1 , <16 x i8 > %y , <16 x i8 > %z ) {
; CHECK-LABEL: xor_ashr_commute3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-NEXT: vpsraw $4, %xmm4, %xmm5
; CHECK-NEXT: vpsllw $5, %xmm2, %xmm2
; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4
; CHECK-NEXT: vpsraw $2, %xmm4, %xmm5
; CHECK-NEXT: vpaddw %xmm6, %xmm6, %xmm7
; CHECK-NEXT: vpblendvb %xmm7, %xmm5, %xmm4, %xmm4
; CHECK-NEXT: vpsraw $1, %xmm4, %xmm5
; CHECK-NEXT: vpaddw %xmm7, %xmm7, %xmm8
; CHECK-NEXT: vpblendvb %xmm8, %xmm5, %xmm4, %xmm4
; CHECK-NEXT: vpsrlw $8, %xmm4, %xmm9
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpsraw $4, %xmm0, %xmm5
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpblendvb %xmm2, %xmm5, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $2, %xmm0, %xmm5
; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm4
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $1, %xmm0, %xmm5
; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm10
; CHECK-NEXT: vpblendvb %xmm10, %xmm5, %xmm0, %xmm0
; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0
; CHECK-NEXT: vpackuswb %xmm9, %xmm0, %xmm9
; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-NEXT: vpsraw $4, %xmm5, %xmm0
; CHECK-NEXT: vpblendvb %xmm6, %xmm0, %xmm5, %xmm0
; CHECK-NEXT: vpsraw $2, %xmm0, %xmm5
; CHECK-NEXT: vpblendvb %xmm7, %xmm5, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $1, %xmm0, %xmm5
; CHECK-NEXT: vpblendvb %xmm8, %xmm5, %xmm0, %xmm0
; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-NEXT: vpsraw $4, %xmm1, %xmm5
; CHECK-NEXT: vpblendvb %xmm2, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $2, %xmm1, %xmm2
; CHECK-NEXT: vpblendvb %xmm4, %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $1, %xmm1, %xmm2
; CHECK-NEXT: vpblendvb %xmm10, %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $2, %xmm1, %xmm5
; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $1, %xmm1, %xmm5
; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsrlw $8, %xmm1, %xmm1
; CHECK-NEXT: vpackuswb %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vpxor %xmm0, %xmm9, %xmm0
; CHECK-NEXT: vpxor %xmm0, %xmm3, %xmm0
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpsraw $4, %xmm0, %xmm4
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $2, %xmm0, %xmm4
; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $1, %xmm0, %xmm4
; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0
; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0
; CHECK-NEXT: retq
%sh1 = ashr <16 x i8 > %x0 , %y
%sh2 = ashr <16 x i8 > %x1 , %y
Expand All
@@ -473,10 +445,9 @@ define i32 @xor_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: shll %cl , %edi
; CHECK-NEXT: xorl %esi , %edi
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shll %cl, %esi
; CHECK-NEXT: xorl %esi, %eax
; CHECK-NEXT: shll %cl, %edi
; CHECK-NEXT: xorl %edi, %eax
; CHECK-NEXT: retq
%sh1 = shl i32 %x0 , %y
Expand All
@@ -491,11 +462,10 @@ define i8 @xor_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: shlb %cl , %dil
; CHECK-NEXT: xorl %esi , %edi
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shlb %cl, %sil
; CHECK-NEXT: xorb %dil, %sil
; CHECK-NEXT: xorb %sil, %al
; CHECK-NEXT: shlb %cl, %dil
; CHECK-NEXT: xorb %dil, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%sh1 = shl i8 %x0 , %y
Expand All
@@ -508,10 +478,9 @@ define i8 @xor_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
define <2 x i64 > @xor_shl_commute2 (<2 x i64 > %x0 , <2 x i64 > %x1 , <2 x i64 > %y , <2 x i64 > %z ) {
; CHECK-LABEL: xor_shl_commute2:
; CHECK: # %bb.0:
; CHECK-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpsllvq %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpxor %xmm1, %xmm3, %xmm1
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0
; CHECK-NEXT: retq
%sh1 = shl <2 x i64 > %x0 , %y
%sh2 = shl <2 x i64 > %x1 , %y
Expand All
@@ -523,18 +492,13 @@ define <2 x i64> @xor_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <
define <8 x i16 > @xor_shl_commute3 (<8 x i16 > %x0 , <8 x i16 > %x1 , <8 x i16 > %y , <8 x i16 > %z ) {
; CHECK-LABEL: xor_shl_commute3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; CHECK-NEXT: vpsllvd %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; CHECK-NEXT: vpshufb %ymm4, %ymm0, %ymm0
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; CHECK-NEXT: vpsllvd %ymm2, %ymm1, %ymm1
; CHECK-NEXT: vpshufb %ymm4, %ymm1, %ymm1
; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpxor %xmm0, %xmm3, %xmm0
; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%sh1 = shl <8 x i16 > %x0 , %y
Expand Down
Expand Up
@@ -610,10 +574,9 @@ define i8 @and_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: shrb %cl , %dil
; CHECK-NEXT: andl %esi , %edi
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shrb %cl, %sil
; CHECK-NEXT: andb %sil, %al
; CHECK-NEXT: shrb %cl, %dil
; CHECK-NEXT: andb %dil, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
Expand All
@@ -629,11 +592,10 @@ define i32 @and_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: shrl %cl , %edi
; CHECK-NEXT: andl %esi , %edi
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shrl %cl, %esi
; CHECK-NEXT: andl %edi, %esi
; CHECK-NEXT: andl %esi, %eax
; CHECK-NEXT: shrl %cl, %edi
; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: retq
%sh1 = lshr i32 %x0 , %y
%sh2 = lshr i32 %x1 , %y
Expand All
@@ -645,17 +607,13 @@ define i32 @and_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
define <8 x i16 > @and_lshr_commute2 (<8 x i16 > %x0 , <8 x i16 > %x1 , <8 x i16 > %y , <8 x i16 > %z ) {
; CHECK-LABEL: and_lshr_commute2:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; CHECK-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm4
; CHECK-NEXT: vpackusdw %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; CHECK-NEXT: vpsrlvd %ymm2, %ymm1, %ymm1
; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2
; CHECK-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpand %xmm1, %xmm3, %xmm1
; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpand %xmm3, %xmm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%sh1 = lshr <8 x i16 > %x0 , %y
Expand All
@@ -668,10 +626,9 @@ define <8 x i16> @and_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y,
define <2 x i64 > @and_lshr_commute3 (<2 x i64 > %x0 , <2 x i64 > %x1 , <2 x i64 > %y , <2 x i64 > %z ) {
; CHECK-LABEL: and_lshr_commute3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpsrlvq %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpsrlvq %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpand %xmm0, %xmm3, %xmm0
; CHECK-NEXT: vpsrlvq %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpand %xmm3, %xmm0, %xmm0
; CHECK-NEXT: retq
%sh1 = lshr <2 x i64 > %x0 , %y
%sh2 = lshr <2 x i64 > %x1 , %y
Expand All
@@ -685,13 +642,11 @@ define i16 @and_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %r8d
; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: movswl %si, %eax
; CHECK-NEXT: movswl %di, %edx
; CHECK-NEXT: sarl %cl, %edx
; CHECK-NEXT: andl %esi, %edi
; CHECK-NEXT: movswl %di, %eax
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: sarl %cl, %eax
; CHECK-NEXT: andl %r8d, %eax
; CHECK-NEXT: andl %edx, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%sh1 = ashr i16 %x0 , %y
Expand All
@@ -706,11 +661,10 @@ define i64 @and_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rcx, %rax
; CHECK-NEXT: movq %rdx, %rcx
; CHECK-NEXT: sarq %cl , %rdi
; CHECK-NEXT: andq %rsi , %rdi
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
; CHECK-NEXT: sarq %cl, %rsi
; CHECK-NEXT: andq %rdi, %rsi
; CHECK-NEXT: andq %rsi, %rax
; CHECK-NEXT: sarq %cl, %rdi
; CHECK-NEXT: andq %rdi, %rax
; CHECK-NEXT: retq
%sh1 = ashr i64 %x0 , %y
%sh2 = ashr i64 %x1 , %y
Expand All
@@ -722,10 +676,9 @@ define i64 @and_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
define <4 x i32 > @and_ashr_commute2 (<4 x i32 > %x0 , <4 x i32 > %x1 , <4 x i32 > %y , <4 x i32 > %z ) {
; CHECK-LABEL: and_ashr_commute2:
; CHECK: # %bb.0:
; CHECK-NEXT: vpsravd %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpsravd %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpand %xmm1, %xmm3, %xmm1
; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpsravd %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpand %xmm3, %xmm0, %xmm0
; CHECK-NEXT: retq
%sh1 = ashr <4 x i32 > %x0 , %y
%sh2 = ashr <4 x i32 > %x1 , %y
Expand All
@@ -737,49 +690,32 @@ define <4 x i32> @and_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y,
define <16 x i8 > @and_ashr_commute3 (<16 x i8 > %x0 , <16 x i8 > %x1 , <16 x i8 > %y , <16 x i8 > %z ) {
; CHECK-LABEL: and_ashr_commute3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-NEXT: vpsraw $4, %xmm4, %xmm5
; CHECK-NEXT: vpsllw $5, %xmm2, %xmm2
; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4
; CHECK-NEXT: vpsraw $2, %xmm4, %xmm5
; CHECK-NEXT: vpaddw %xmm6, %xmm6, %xmm7
; CHECK-NEXT: vpblendvb %xmm7, %xmm5, %xmm4, %xmm4
; CHECK-NEXT: vpsraw $1, %xmm4, %xmm5
; CHECK-NEXT: vpaddw %xmm7, %xmm7, %xmm8
; CHECK-NEXT: vpblendvb %xmm8, %xmm5, %xmm4, %xmm4
; CHECK-NEXT: vpsrlw $8, %xmm4, %xmm9
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpsraw $4, %xmm0, %xmm5
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpblendvb %xmm2, %xmm5, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $2, %xmm0, %xmm5
; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm4
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $1, %xmm0, %xmm5
; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm10
; CHECK-NEXT: vpblendvb %xmm10, %xmm5, %xmm0, %xmm0
; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0
; CHECK-NEXT: vpackuswb %xmm9, %xmm0, %xmm9
; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-NEXT: vpsraw $4, %xmm5, %xmm0
; CHECK-NEXT: vpblendvb %xmm6, %xmm0, %xmm5, %xmm0
; CHECK-NEXT: vpsraw $2, %xmm0, %xmm5
; CHECK-NEXT: vpblendvb %xmm7, %xmm5, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $1, %xmm0, %xmm5
; CHECK-NEXT: vpblendvb %xmm8, %xmm5, %xmm0, %xmm0
; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-NEXT: vpsraw $4, %xmm1, %xmm5
; CHECK-NEXT: vpblendvb %xmm2, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $2, %xmm1, %xmm2
; CHECK-NEXT: vpblendvb %xmm4, %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $1, %xmm1, %xmm2
; CHECK-NEXT: vpblendvb %xmm10, %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $2, %xmm1, %xmm5
; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $1, %xmm1, %xmm5
; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsrlw $8, %xmm1, %xmm1
; CHECK-NEXT: vpackuswb %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vpand %xmm0, %xmm9, %xmm0
; CHECK-NEXT: vpand %xmm0, %xmm3, %xmm0
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpsraw $4, %xmm0, %xmm4
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $2, %xmm0, %xmm4
; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $1, %xmm0, %xmm4
; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0
; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpand %xmm3, %xmm0, %xmm0
; CHECK-NEXT: retq
%sh1 = ashr <16 x i8 > %x0 , %y
%sh2 = ashr <16 x i8 > %x1 , %y
Expand All
@@ -793,10 +729,9 @@ define i32 @and_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: shll %cl , %edi
; CHECK-NEXT: andl %esi , %edi
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shll %cl, %esi
; CHECK-NEXT: andl %esi, %eax
; CHECK-NEXT: shll %cl, %edi
; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: retq
%sh1 = shl i32 %x0 , %y
Expand All
@@ -811,11 +746,10 @@ define i8 @and_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: shlb %cl , %dil
; CHECK-NEXT: andl %esi , %edi
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shlb %cl, %sil
; CHECK-NEXT: andb %dil, %sil
; CHECK-NEXT: andb %sil, %al
; CHECK-NEXT: shlb %cl, %dil
; CHECK-NEXT: andb %dil, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%sh1 = shl i8 %x0 , %y
Expand All
@@ -828,10 +762,9 @@ define i8 @and_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
define <2 x i64 > @and_shl_commute2 (<2 x i64 > %x0 , <2 x i64 > %x1 , <2 x i64 > %y , <2 x i64 > %z ) {
; CHECK-LABEL: and_shl_commute2:
; CHECK: # %bb.0:
; CHECK-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpsllvq %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpand %xmm1, %xmm3, %xmm1
; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpand %xmm3, %xmm0, %xmm0
; CHECK-NEXT: retq
%sh1 = shl <2 x i64 > %x0 , %y
%sh2 = shl <2 x i64 > %x1 , %y
Expand All
@@ -843,18 +776,13 @@ define <2 x i64> @and_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <
define <8 x i16 > @and_shl_commute3 (<8 x i16 > %x0 , <8 x i16 > %x1 , <8 x i16 > %y , <8 x i16 > %z ) {
; CHECK-LABEL: and_shl_commute3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; CHECK-NEXT: vpsllvd %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; CHECK-NEXT: vpshufb %ymm4, %ymm0, %ymm0
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; CHECK-NEXT: vpsllvd %ymm2, %ymm1, %ymm1
; CHECK-NEXT: vpshufb %ymm4, %ymm1, %ymm1
; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpand %xmm0, %xmm3, %xmm0
; CHECK-NEXT: vpand %xmm3, %xmm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%sh1 = shl <8 x i16 > %x0 , %y
Expand Down