78 changes: 44 additions & 34 deletions llvm/test/CodeGen/X86/setcc-non-simple-type.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,47 +46,52 @@ define void @failing(ptr %0, ptr %1) nounwind {
; CHECK-NEXT: movq 24(%rsi), %rcx
; CHECK-NEXT: movq 32(%rsi), %rdx
; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [1,1]
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_1: # %vector.ph
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB0_2 Depth 2
; CHECK-NEXT: xorpd %xmm3, %xmm3
; CHECK-NEXT: movq $-1024, %rdi # imm = 0xFC00
; CHECK-NEXT: movq $-1024, %rsi # imm = 0xFC00
; CHECK-NEXT: movdqa %xmm0, %xmm4
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_2: # %vector.body
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: cmpq 1024(%rdx,%rdi), %rsi
; CHECK-NEXT: movq %rcx, %r8
; CHECK-NEXT: sbbq 1032(%rdx,%rdi), %r8
; CHECK-NEXT: setge %r8b
; CHECK-NEXT: movzbl %r8b, %r8d
; CHECK-NEXT: andl $1, %r8d
; CHECK-NEXT: movdqu 1024(%rdx,%rsi), %xmm5
; CHECK-NEXT: movdqu 1040(%rdx,%rsi), %xmm6
; CHECK-NEXT: movq %xmm5, %rdi
; CHECK-NEXT: movq %xmm6, %r8
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
; CHECK-NEXT: movq %xmm5, %r9
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm6[2,3,2,3]
; CHECK-NEXT: movq %xmm5, %r10
; CHECK-NEXT: negq %r8
; CHECK-NEXT: movq %r8, %xmm5
; CHECK-NEXT: cmpq 1040(%rdx,%rdi), %rsi
; CHECK-NEXT: movq %rcx, %r8
; CHECK-NEXT: sbbq 1048(%rdx,%rdi), %r8
; CHECK-NEXT: sbbq %r10, %r8
; CHECK-NEXT: setge %r8b
; CHECK-NEXT: movzbl %r8b, %r8d
; CHECK-NEXT: andl $1, %r8d
; CHECK-NEXT: negq %r8
; CHECK-NEXT: movq %r8, %xmm6
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0]
; CHECK-NEXT: movdqa %xmm1, %xmm6
; CHECK-NEXT: psllq %xmm4, %xmm6
; CHECK-NEXT: movq %r8, %xmm5
; CHECK-NEXT: negq %rdi
; CHECK-NEXT: movq %rcx, %rdi
; CHECK-NEXT: sbbq %r9, %rdi
; CHECK-NEXT: setge %dil
; CHECK-NEXT: movzbl %dil, %edi
; CHECK-NEXT: negq %rdi
; CHECK-NEXT: movq %rdi, %xmm6
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm5[0]
; CHECK-NEXT: movdqa %xmm1, %xmm5
; CHECK-NEXT: psllq %xmm4, %xmm5
; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,2,3]
; CHECK-NEXT: movdqa %xmm1, %xmm8
; CHECK-NEXT: psllq %xmm7, %xmm8
; CHECK-NEXT: movsd {{.*#+}} xmm8 = xmm6[0],xmm8[1]
; CHECK-NEXT: andpd %xmm5, %xmm8
; CHECK-NEXT: movsd {{.*#+}} xmm8 = xmm5[0],xmm8[1]
; CHECK-NEXT: andpd %xmm6, %xmm8
; CHECK-NEXT: orpd %xmm8, %xmm3
; CHECK-NEXT: paddq %xmm2, %xmm4
; CHECK-NEXT: addq $32, %rdi
; CHECK-NEXT: addq $32, %rsi
; CHECK-NEXT: jne .LBB0_2
; CHECK-NEXT: # %bb.3: # %middle.block
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
Expand All @@ -101,42 +106,47 @@ define void @failing(ptr %0, ptr %1) nounwind {
; CHECK-AVX2-NEXT: movq 24(%rsi), %rcx
; CHECK-AVX2-NEXT: movq 32(%rsi), %rdx
; CHECK-AVX2-NEXT: vpmovsxbq {{.*#+}} xmm0 = [0,1]
; CHECK-AVX2-NEXT: xorl %esi, %esi
; CHECK-AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [1,1]
; CHECK-AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [2,2]
; CHECK-AVX2-NEXT: .p2align 4, 0x90
; CHECK-AVX2-NEXT: .LBB0_1: # %vector.ph
; CHECK-AVX2-NEXT: # =>This Loop Header: Depth=1
; CHECK-AVX2-NEXT: # Child Loop BB0_2 Depth 2
; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-AVX2-NEXT: movq $-1024, %rdi # imm = 0xFC00
; CHECK-AVX2-NEXT: movq $-1024, %rsi # imm = 0xFC00
; CHECK-AVX2-NEXT: vmovdqa %xmm0, %xmm4
; CHECK-AVX2-NEXT: .p2align 4, 0x90
; CHECK-AVX2-NEXT: .LBB0_2: # %vector.body
; CHECK-AVX2-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-AVX2-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-AVX2-NEXT: cmpq 1024(%rdx,%rdi), %rsi
; CHECK-AVX2-NEXT: movq %rcx, %r8
; CHECK-AVX2-NEXT: sbbq 1032(%rdx,%rdi), %r8
; CHECK-AVX2-NEXT: vmovdqu 1024(%rdx,%rsi), %xmm5
; CHECK-AVX2-NEXT: vmovdqu 1040(%rdx,%rsi), %xmm6
; CHECK-AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm7 = xmm5[0],xmm6[0]
; CHECK-AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm5 = xmm5[1],xmm6[1]
; CHECK-AVX2-NEXT: vmovq %xmm5, %rdi
; CHECK-AVX2-NEXT: vpextrq $1, %xmm5, %r8
; CHECK-AVX2-NEXT: vmovq %xmm7, %r9
; CHECK-AVX2-NEXT: vpextrq $1, %xmm7, %r10
; CHECK-AVX2-NEXT: negq %r10
; CHECK-AVX2-NEXT: movq %rcx, %r10
; CHECK-AVX2-NEXT: sbbq %r8, %r10
; CHECK-AVX2-NEXT: setge %r8b
; CHECK-AVX2-NEXT: movzbl %r8b, %r8d
; CHECK-AVX2-NEXT: andl $1, %r8d
; CHECK-AVX2-NEXT: negq %r8
; CHECK-AVX2-NEXT: vmovq %r8, %xmm5
; CHECK-AVX2-NEXT: cmpq 1040(%rdx,%rdi), %rsi
; CHECK-AVX2-NEXT: negq %r9
; CHECK-AVX2-NEXT: movq %rcx, %r8
; CHECK-AVX2-NEXT: sbbq 1048(%rdx,%rdi), %r8
; CHECK-AVX2-NEXT: setge %r8b
; CHECK-AVX2-NEXT: movzbl %r8b, %r8d
; CHECK-AVX2-NEXT: andl $1, %r8d
; CHECK-AVX2-NEXT: negq %r8
; CHECK-AVX2-NEXT: vmovq %r8, %xmm6
; CHECK-AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0]
; CHECK-AVX2-NEXT: sbbq %rdi, %r8
; CHECK-AVX2-NEXT: setge %dil
; CHECK-AVX2-NEXT: movzbl %dil, %edi
; CHECK-AVX2-NEXT: negq %rdi
; CHECK-AVX2-NEXT: vmovq %rdi, %xmm6
; CHECK-AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm6[0],xmm5[0]
; CHECK-AVX2-NEXT: vpsllvq %xmm4, %xmm1, %xmm6
; CHECK-AVX2-NEXT: vpand %xmm6, %xmm5, %xmm5
; CHECK-AVX2-NEXT: vpor %xmm3, %xmm5, %xmm3
; CHECK-AVX2-NEXT: vpaddq %xmm2, %xmm4, %xmm4
; CHECK-AVX2-NEXT: addq $32, %rdi
; CHECK-AVX2-NEXT: addq $32, %rsi
; CHECK-AVX2-NEXT: jne .LBB0_2
; CHECK-AVX2-NEXT: # %bb.3: # %middle.block
; CHECK-AVX2-NEXT: # in Loop: Header=BB0_1 Depth=1
Expand Down
14 changes: 5 additions & 9 deletions llvm/test/CodeGen/X86/vec_saddo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1045,16 +1045,12 @@ define <4 x i32> @saddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, ptr %p2) nounwind {
;
; AVX512-LABEL: saddo_v4i1:
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm2
; AVX512-NEXT: vpslld $31, %xmm2, %xmm2
; AVX512-NEXT: vptestmd %xmm2, %xmm2, %k0
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0
; AVX512-NEXT: vpslld $31, %xmm1, %xmm0
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k2
; AVX512-NEXT: kandw %k1, %k0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: kshiftlw $12, %k2, %k0
; AVX512-NEXT: kshiftrw $12, %k0, %k0
; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: movb %al, (%rdi)
; AVX512-NEXT: retq
Expand Down
14 changes: 5 additions & 9 deletions llvm/test/CodeGen/X86/vec_ssubo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1062,16 +1062,12 @@ define <4 x i32> @ssubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, ptr %p2) nounwind {
;
; AVX512-LABEL: ssubo_v4i1:
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm2
; AVX512-NEXT: vpslld $31, %xmm2, %xmm2
; AVX512-NEXT: vptestmd %xmm2, %xmm2, %k0
; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0
; AVX512-NEXT: vpslld $31, %xmm1, %xmm1
; AVX512-NEXT: vptestmd %xmm1, %xmm1, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k0
; AVX512-NEXT: vptestnmd %xmm0, %xmm0, %k1 {%k1}
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: kshiftlw $12, %k0, %k0
; AVX512-NEXT: kshiftrw $12, %k0, %k0
; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: movb %al, (%rdi)
; AVX512-NEXT: retq
Expand Down
14 changes: 5 additions & 9 deletions llvm/test/CodeGen/X86/vec_uaddo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1098,16 +1098,12 @@ define <4 x i32> @uaddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, ptr %p2) nounwind {
;
; AVX512-LABEL: uaddo_v4i1:
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm2
; AVX512-NEXT: vpslld $31, %xmm2, %xmm2
; AVX512-NEXT: vptestmd %xmm2, %xmm2, %k0
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0
; AVX512-NEXT: vpslld $31, %xmm1, %xmm0
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k2
; AVX512-NEXT: kandw %k1, %k0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: kshiftlw $12, %k2, %k0
; AVX512-NEXT: kshiftrw $12, %k0, %k0
; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: movb %al, (%rdi)
; AVX512-NEXT: retq
Expand Down
14 changes: 5 additions & 9 deletions llvm/test/CodeGen/X86/vec_usubo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1145,16 +1145,12 @@ define <4 x i32> @usubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, ptr %p2) nounwind {
;
; AVX512-LABEL: usubo_v4i1:
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm2
; AVX512-NEXT: vpslld $31, %xmm2, %xmm2
; AVX512-NEXT: vptestmd %xmm2, %xmm2, %k0
; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0
; AVX512-NEXT: vpslld $31, %xmm1, %xmm1
; AVX512-NEXT: vptestmd %xmm1, %xmm1, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k0
; AVX512-NEXT: vptestnmd %xmm0, %xmm0, %k1 {%k1}
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: kshiftlw $12, %k0, %k0
; AVX512-NEXT: kshiftrw $12, %k0, %k0
; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: movb %al, (%rdi)
; AVX512-NEXT: retq
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/X86/vector-bo-select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3137,11 +3137,11 @@ define <8 x i64> @mul_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef
; AVX512-LABEL: mul_v8i64_cast_cond:
; AVX512: # %bb.0:
; AVX512-NEXT: kmovw %edi, %k1
; AVX512-NEXT: vpsrlq $32, %zmm1, %zmm2
; AVX512-NEXT: vpmuludq %zmm2, %zmm0, %zmm2
; AVX512-NEXT: vpsrlq $32, %zmm0, %zmm3
; AVX512-NEXT: vpmuludq %zmm1, %zmm3, %zmm3
; AVX512-NEXT: vpaddq %zmm3, %zmm2, %zmm2
; AVX512-NEXT: vpsrlq $32, %zmm0, %zmm2
; AVX512-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
; AVX512-NEXT: vpsrlq $32, %zmm1, %zmm3
; AVX512-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
; AVX512-NEXT: vpaddq %zmm2, %zmm3, %zmm2
; AVX512-NEXT: vpsllq $32, %zmm2, %zmm2
; AVX512-NEXT: vpmuludq %zmm1, %zmm0, %zmm1
; AVX512-NEXT: vpaddq %zmm2, %zmm1, %zmm0 {%k1}
Expand Down
86 changes: 43 additions & 43 deletions llvm/test/CodeGen/X86/vector-fshr-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
; SSE41-NEXT: psrlq %xmm4, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm1[4,5,6,7]
; SSE41-NEXT: pandn %xmm3, %xmm2
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
; SSE41-NEXT: paddq %xmm0, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm3
; SSE41-NEXT: psllq %xmm1, %xmm3
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psllq %xmm2, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; SSE41-NEXT: psllq %xmm2, %xmm0
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; SSE41-NEXT: por %xmm5, %xmm0
; SSE41-NEXT: retq
;
Expand All @@ -76,11 +76,11 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm5[0,1,2,3],xmm1[4,5,6,7]
; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3
; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
Expand Down Expand Up @@ -158,13 +158,13 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
; XOPAVX1-LABEL: var_funnnel_v2i64:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
; XOPAVX1-NEXT: vpsubq %xmm4, %xmm5, %xmm4
; XOPAVX1-NEXT: vpshlq %xmm4, %xmm1, %xmm1
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0
; XOPAVX1-NEXT: vpshlq %xmm2, %xmm0, %xmm0
; XOPAVX1-NEXT: vpshlq %xmm4, %xmm0, %xmm0
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; XOPAVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2
; XOPAVX1-NEXT: vpshlq %xmm2, %xmm1, %xmm1
; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
Expand Down Expand Up @@ -366,13 +366,13 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt)
; XOPAVX1-LABEL: var_funnnel_v4i32:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31]
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
; XOPAVX1-NEXT: vpsubd %xmm4, %xmm5, %xmm4
; XOPAVX1-NEXT: vpshld %xmm4, %xmm1, %xmm1
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; XOPAVX1-NEXT: vpshld %xmm2, %xmm0, %xmm0
; XOPAVX1-NEXT: vpshld %xmm4, %xmm0, %xmm0
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; XOPAVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2
; XOPAVX1-NEXT: vpshld %xmm2, %xmm1, %xmm1
; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
Expand Down Expand Up @@ -646,26 +646,26 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt)
; XOPAVX1-LABEL: var_funnnel_v8i16:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
; XOPAVX1-NEXT: vpsubw %xmm4, %xmm5, %xmm4
; XOPAVX1-NEXT: vpshlw %xmm4, %xmm1, %xmm1
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0
; XOPAVX1-NEXT: vpshlw %xmm2, %xmm0, %xmm0
; XOPAVX1-NEXT: vpshlw %xmm4, %xmm0, %xmm0
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; XOPAVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm2
; XOPAVX1-NEXT: vpshlw %xmm2, %xmm1, %xmm1
; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
; XOPAVX2-LABEL: var_funnnel_v8i16:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX2-NEXT: vpxor %xmm5, %xmm5, %xmm5
; XOPAVX2-NEXT: vpsubw %xmm4, %xmm5, %xmm4
; XOPAVX2-NEXT: vpshlw %xmm4, %xmm1, %xmm1
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX2-NEXT: vpaddw %xmm0, %xmm0, %xmm0
; XOPAVX2-NEXT: vpshlw %xmm2, %xmm0, %xmm0
; XOPAVX2-NEXT: vpshlw %xmm4, %xmm0, %xmm0
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; XOPAVX2-NEXT: vpsubw %xmm2, %xmm3, %xmm2
; XOPAVX2-NEXT: vpshlw %xmm2, %xmm1, %xmm1
; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX2-NEXT: retq
;
Expand Down Expand Up @@ -995,26 +995,26 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %amt)
; XOPAVX1-LABEL: var_funnnel_v16i8:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
; XOPAVX1-NEXT: vpsubb %xmm4, %xmm5, %xmm4
; XOPAVX1-NEXT: vpshlb %xmm4, %xmm1, %xmm1
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm0
; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm0
; XOPAVX1-NEXT: vpshlb %xmm4, %xmm0, %xmm0
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; XOPAVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm2
; XOPAVX1-NEXT: vpshlb %xmm2, %xmm1, %xmm1
; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
; XOPAVX2-LABEL: var_funnnel_v16i8:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpbroadcastb {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX2-NEXT: vpxor %xmm5, %xmm5, %xmm5
; XOPAVX2-NEXT: vpsubb %xmm4, %xmm5, %xmm4
; XOPAVX2-NEXT: vpshlb %xmm4, %xmm1, %xmm1
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX2-NEXT: vpaddb %xmm0, %xmm0, %xmm0
; XOPAVX2-NEXT: vpshlb %xmm2, %xmm0, %xmm0
; XOPAVX2-NEXT: vpshlb %xmm4, %xmm0, %xmm0
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; XOPAVX2-NEXT: vpsubb %xmm2, %xmm3, %xmm2
; XOPAVX2-NEXT: vpshlb %xmm2, %xmm1, %xmm1
; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX2-NEXT: retq
;
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/X86/vector-fshr-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -486,22 +486,22 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %
; XOPAVX2-LABEL: var_funnnel_v16i16:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm4
; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm4
; XOPAVX2-NEXT: vextracti128 $1, %ymm4, %xmm5
; XOPAVX2-NEXT: vpxor %xmm6, %xmm6, %xmm6
; XOPAVX2-NEXT: vpsubw %xmm5, %xmm6, %xmm5
; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm7
; XOPAVX2-NEXT: vpshlw %xmm5, %xmm7, %xmm5
; XOPAVX2-NEXT: vpsubw %xmm4, %xmm6, %xmm4
; XOPAVX2-NEXT: vpshlw %xmm4, %xmm1, %xmm1
; XOPAVX2-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm1
; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2
; XOPAVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
; XOPAVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
; XOPAVX2-NEXT: vpshlw %xmm3, %xmm4, %xmm3
; XOPAVX2-NEXT: vpshlw %xmm2, %xmm0, %xmm0
; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm6
; XOPAVX2-NEXT: vpshlw %xmm5, %xmm6, %xmm5
; XOPAVX2-NEXT: vpshlw %xmm4, %xmm0, %xmm0
; XOPAVX2-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0
; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
; XOPAVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
; XOPAVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; XOPAVX2-NEXT: vpsubw %xmm3, %xmm4, %xmm3
; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm5
; XOPAVX2-NEXT: vpshlw %xmm3, %xmm5, %xmm3
; XOPAVX2-NEXT: vpsubw %xmm2, %xmm4, %xmm2
; XOPAVX2-NEXT: vpshlw %xmm2, %xmm1, %xmm1
; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
%res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %amt)
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/vector-fshr-sub128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -185,13 +185,13 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt)
; XOPAVX1-LABEL: var_funnnel_v2i32:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31]
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
; XOPAVX1-NEXT: vpsubd %xmm4, %xmm5, %xmm4
; XOPAVX1-NEXT: vpshld %xmm4, %xmm1, %xmm1
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; XOPAVX1-NEXT: vpshld %xmm2, %xmm0, %xmm0
; XOPAVX1-NEXT: vpshld %xmm4, %xmm0, %xmm0
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; XOPAVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2
; XOPAVX1-NEXT: vpshld %xmm2, %xmm1, %xmm1
; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/vector-shift-shl-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -927,9 +927,9 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
; SSE2-LABEL: constant_shift_v2i64:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psllq $7, %xmm1
; SSE2-NEXT: paddq %xmm0, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: paddq %xmm0, %xmm1
; SSE2-NEXT: psllq $7, %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: constant_shift_v2i64:
Expand Down Expand Up @@ -975,9 +975,9 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
; X86-SSE-LABEL: constant_shift_v2i64:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movdqa %xmm0, %xmm1
; X86-SSE-NEXT: psllq $7, %xmm1
; X86-SSE-NEXT: paddq %xmm0, %xmm0
; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; X86-SSE-NEXT: paddq %xmm0, %xmm1
; X86-SSE-NEXT: psllq $7, %xmm0
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X86-SSE-NEXT: retl
%shift = shl <2 x i64> %a, <i64 1, i64 7>
ret <2 x i64> %shift
Expand Down