44 changes: 32 additions & 12 deletions llvm/test/CodeGen/X86/avx2-vector-shifts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@ define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
; X32-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; X32-NEXT: vzeroupper
; X32-NEXT: retl
; X32-NEXT: ## -- End function
;
; X64-LABEL: srl_trunc_and_v4i64:
; X64: ## BB#0:
Expand All @@ -391,6 +392,7 @@ define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
; X64-NEXT: ## -- End function
%and = and <4 x i64> %y, <i64 8, i64 8, i64 8, i64 8>
%trunc = trunc <4 x i64> %and to <4 x i32>
%sra = lshr <4 x i32> %x, %trunc
Expand All @@ -412,6 +414,7 @@ define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X32-NEXT: vzeroupper
; X32-NEXT: retl
; X32-NEXT: ## -- End function
;
; X64-LABEL: shl_8i16:
; X64: ## BB#0:
Expand All @@ -423,6 +426,7 @@ define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X64-NEXT: vzeroupper
; X64-NEXT: retq
; X64-NEXT: ## -- End function
%shl = shl <8 x i16> %r, %a
ret <8 x i16> %shl
}
Expand All @@ -434,27 +438,29 @@ define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X32-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
; X32-NEXT: vpsrld $16, %ymm3, %ymm3
; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; X32-NEXT: vpsrld $16, %ymm3, %ymm1
; X32-NEXT: vpsrld $16, %ymm0, %ymm0
; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
; X32-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
; X32-NEXT: ## -- End function
;
; X64-LABEL: shl_16i16:
; X64: ## BB#0:
; X64-NEXT: vpxor %ymm2, %ymm2, %ymm2
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X64-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
; X64-NEXT: vpsrld $16, %ymm3, %ymm3
; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; X64-NEXT: vpsrld $16, %ymm3, %ymm1
; X64-NEXT: vpsrld $16, %ymm0, %ymm0
; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
; X64-NEXT: ## -- End function
%shl = shl <16 x i16> %r, %a
ret <16 x i16> %shl
}
Expand All @@ -474,6 +480,7 @@ define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; X32-NEXT: retl
; X32-NEXT: ## -- End function
;
; X64-LABEL: shl_32i8:
; X64: ## BB#0:
Expand All @@ -489,6 +496,7 @@ define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; X64-NEXT: retq
; X64-NEXT: ## -- End function
%shl = shl <32 x i8> %r, %a
ret <32 x i8> %shl
}
Expand All @@ -504,6 +512,7 @@ define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X32-NEXT: vzeroupper
; X32-NEXT: retl
; X32-NEXT: ## -- End function
;
; X64-LABEL: ashr_8i16:
; X64: ## BB#0:
Expand All @@ -515,6 +524,7 @@ define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X64-NEXT: vzeroupper
; X64-NEXT: retq
; X64-NEXT: ## -- End function
%ashr = ashr <8 x i16> %r, %a
ret <8 x i16> %ashr
}
Expand All @@ -526,27 +536,29 @@ define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X32-NEXT: vpsravd %ymm3, %ymm4, %ymm3
; X32-NEXT: vpsrld $16, %ymm3, %ymm3
; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; X32-NEXT: vpsrld $16, %ymm3, %ymm1
; X32-NEXT: vpsrld $16, %ymm0, %ymm0
; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
; X32-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
; X32-NEXT: ## -- End function
;
; X64-LABEL: ashr_16i16:
; X64: ## BB#0:
; X64-NEXT: vpxor %ymm2, %ymm2, %ymm2
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X64-NEXT: vpsravd %ymm3, %ymm4, %ymm3
; X64-NEXT: vpsrld $16, %ymm3, %ymm3
; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; X64-NEXT: vpsrld $16, %ymm3, %ymm1
; X64-NEXT: vpsrld $16, %ymm0, %ymm0
; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
; X64-NEXT: ## -- End function
%ashr = ashr <16 x i16> %r, %a
ret <16 x i16> %ashr
}
Expand Down Expand Up @@ -579,6 +591,7 @@ define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; X32-NEXT: vpsrlw $8, %ymm0, %ymm0
; X32-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
; X32-NEXT: retl
; X32-NEXT: ## -- End function
;
; X64-LABEL: ashr_32i8:
; X64: ## BB#0:
Expand Down Expand Up @@ -607,6 +620,7 @@ define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; X64-NEXT: vpsrlw $8, %ymm0, %ymm0
; X64-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
; X64-NEXT: retq
; X64-NEXT: ## -- End function
%ashr = ashr <32 x i8> %r, %a
ret <32 x i8> %ashr
}
Expand All @@ -622,6 +636,7 @@ define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X32-NEXT: vzeroupper
; X32-NEXT: retl
; X32-NEXT: ## -- End function
;
; X64-LABEL: lshr_8i16:
; X64: ## BB#0:
Expand All @@ -633,6 +648,7 @@ define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X64-NEXT: vzeroupper
; X64-NEXT: retq
; X64-NEXT: ## -- End function
%lshr = lshr <8 x i16> %r, %a
ret <8 x i16> %lshr
}
Expand All @@ -644,27 +660,29 @@ define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X32-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
; X32-NEXT: vpsrld $16, %ymm3, %ymm3
; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; X32-NEXT: vpsrld $16, %ymm3, %ymm1
; X32-NEXT: vpsrld $16, %ymm0, %ymm0
; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
; X32-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
; X32-NEXT: ## -- End function
;
; X64-LABEL: lshr_16i16:
; X64: ## BB#0:
; X64-NEXT: vpxor %ymm2, %ymm2, %ymm2
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X64-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
; X64-NEXT: vpsrld $16, %ymm3, %ymm3
; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; X64-NEXT: vpsrld $16, %ymm3, %ymm1
; X64-NEXT: vpsrld $16, %ymm0, %ymm0
; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
; X64-NEXT: ## -- End function
%lshr = lshr <16 x i16> %r, %a
ret <16 x i16> %lshr
}
Expand All @@ -685,6 +703,7 @@ define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; X32-NEXT: retl
; X32-NEXT: ## -- End function
;
; X64-LABEL: lshr_32i8:
; X64: ## BB#0:
Expand All @@ -701,6 +720,7 @@ define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; X64-NEXT: retq
; X64-NEXT: ## -- End function
%lshr = lshr <32 x i8> %r, %a
ret <32 x i8> %lshr
}
4 changes: 3 additions & 1 deletion llvm/test/CodeGen/X86/avx512-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ define double @test1(double %a, double %b) nounwind {
; ALL-NEXT: LBB0_2: ## %l2
; ALL-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; ALL-NEXT: retq
; ALL-NEXT: ## -- End function
%tobool = fcmp une double %a, %b
br i1 %tobool, label %l1, label %l2

Expand All @@ -36,6 +37,7 @@ define float @test2(float %a, float %b) nounwind {
; ALL-NEXT: LBB1_2: ## %l2
; ALL-NEXT: vaddss %xmm1, %xmm0, %xmm0
; ALL-NEXT: retq
; ALL-NEXT: ## -- End function
%tobool = fcmp olt float %a, %b
br i1 %tobool, label %l1, label %l2

Expand Down Expand Up @@ -124,11 +126,11 @@ entry:
define i32 @test8(i32 %a1, i32 %a2, i32 %a3) {
; ALL-LABEL: test8:
; ALL: ## BB#0:
; ALL-NEXT: notl %edi
; ALL-NEXT: xorl $-2147483648, %esi ## imm = 0x80000000
; ALL-NEXT: testl %edx, %edx
; ALL-NEXT: movl $1, %eax
; ALL-NEXT: cmovel %eax, %edx
; ALL-NEXT: notl %edi
; ALL-NEXT: orl %edi, %esi
; ALL-NEXT: cmovnel %edx, %eax
; ALL-NEXT: retq
Expand Down
54 changes: 41 additions & 13 deletions llvm/test/CodeGen/X86/avx512-cvt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1545,19 +1545,19 @@ define <4 x double> @uitofp_4i1_double(<4 x i32> %a) {
}

define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
; NOVL-LABEL: uitofp_2i1_float:
; NOVL: # BB#0:
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NOVL-NEXT: vpextrb $8, %xmm0, %eax
; NOVL-NEXT: andl $1, %eax
; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1
; NOVL-NEXT: vpextrb $0, %xmm0, %eax
; NOVL-NEXT: andl $1, %eax
; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; NOVL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; NOVL-NEXT: retq
; KNL-LABEL: uitofp_2i1_float:
; KNL: # BB#0:
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; KNL-NEXT: vpextrb $8, %xmm0, %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: vpextrb $0, %xmm0, %ecx
; KNL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; KNL-NEXT: andl $1, %ecx
; KNL-NEXT: vcvtsi2ssl %ecx, %xmm2, %xmm1
; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; KNL-NEXT: retq
;
; VL-LABEL: uitofp_2i1_float:
; VL: # BB#0:
Expand All @@ -1567,6 +1567,34 @@ define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
; VL-NEXT: vcvtudq2ps %xmm0, %xmm0
; VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_2i1_float:
; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; AVX512DQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; AVX512DQ-NEXT: vpextrb $8, %xmm0, %eax
; AVX512DQ-NEXT: andl $1, %eax
; AVX512DQ-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1
; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
; AVX512DQ-NEXT: andl $1, %eax
; AVX512DQ-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: uitofp_2i1_float:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; AVX512BW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
; AVX512BW-NEXT: andl $1, %eax
; AVX512BW-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
; AVX512BW-NEXT: andl $1, %eax
; AVX512BW-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; AVX512BW-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512BW-NEXT: retq
%mask = icmp ult <2 x i32> %a, zeroinitializer
%1 = uitofp <2 x i1> %mask to <2 x float>
ret <2 x float> %1
Expand Down
166 changes: 93 additions & 73 deletions llvm/test/CodeGen/X86/avx512-insert-extract.ll

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2880,7 +2880,6 @@ declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8
define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
; CHECK-LABEL: test_mask_vextractf32x4:
; CHECK: ## BB#0:
; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm1
; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: kshiftlw $12, %k0, %k1
; CHECK-NEXT: kshiftrw $15, %k1, %k1
Expand All @@ -2898,6 +2897,7 @@ define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8
; CHECK-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; CHECK-NEXT: kmovw %k1, %eax
; CHECK-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm1
; CHECK-NEXT: vpslld $31, %xmm2, %xmm2
; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
Expand Down Expand Up @@ -2941,7 +2941,6 @@ declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i
define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
; CHECK-LABEL: test_maskz_vextracti32x4:
; CHECK: ## BB#0:
; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0
; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: kshiftlw $12, %k0, %k1
; CHECK-NEXT: kshiftrw $15, %k1, %k1
Expand All @@ -2959,6 +2958,7 @@ define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
; CHECK-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; CHECK-NEXT: kmovw %k1, %eax
; CHECK-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0
; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
; CHECK-NEXT: vpsrad $31, %xmm1, %xmm1
; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
Expand Down
646 changes: 305 additions & 341 deletions llvm/test/CodeGen/X86/avx512-mask-op.ll

Large diffs are not rendered by default.

97 changes: 75 additions & 22 deletions llvm/test/CodeGen/X86/avx512-vec-cmp.ll

Large diffs are not rendered by default.

1,480 changes: 738 additions & 742 deletions llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll

Large diffs are not rendered by default.

128 changes: 64 additions & 64 deletions llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2695,32 +2695,32 @@ declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32
define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
; CHECK-LABEL: test_cmp_b_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
; CHECK-NEXT: kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0]
; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 ## encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 ## encoding: [0x62,0xf3,0x75,0x28,0x3f,0xc0,0x02]
; CHECK-NEXT: kmovd %k0, %edi ## encoding: [0xc5,0xfb,0x93,0xf8]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
; CHECK-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vmovd %eax, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd1,0x01]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd2,0x02]
; CHECK-NEXT: kxnord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x46,0xc0]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03]
; CHECK-NEXT: vmovd %ecx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
; CHECK-NEXT: vmovd %r8d, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0]
; CHECK-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
; CHECK-NEXT: ## xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; CHECK-NEXT: vmovd %edx, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
; CHECK-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
; CHECK-NEXT: ## xmm1 = xmm1[0],xmm2[0]
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
; CHECK-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd0,0x03]
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 ## encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vmovd %eax, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc8]
; CHECK-NEXT: vpunpckldq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x62,0xc0]
; CHECK-NEXT: ## xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-NEXT: vmovd %edx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6c,0xc1]
; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc2,0x01]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
%vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
Expand Down Expand Up @@ -2750,23 +2750,23 @@ define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0]
; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02]
; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0]
; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x3f,0xc0,0x02]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x01]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02]
; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8]
; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02]
; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xca,0x02]
; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xc8,0x03]
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
Expand All @@ -2793,32 +2793,32 @@ declare i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8>, <32 x i8>, i32, i32) noun
define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
; CHECK-LABEL: test_ucmp_b_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
; CHECK-NEXT: kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0]
; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05]
; CHECK-NEXT: kmovd %k0, %edi ## encoding: [0xc5,0xfb,0x93,0xf8]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
; CHECK-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vmovd %eax, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd1,0x01]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd2,0x02]
; CHECK-NEXT: kxnord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x46,0xc0]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03]
; CHECK-NEXT: vmovd %ecx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
; CHECK-NEXT: vmovd %r8d, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0]
; CHECK-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
; CHECK-NEXT: ## xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; CHECK-NEXT: vmovd %edx, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
; CHECK-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
; CHECK-NEXT: ## xmm1 = xmm1[0],xmm2[0]
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
; CHECK-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd0,0x03]
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vmovd %eax, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc8]
; CHECK-NEXT: vpunpckldq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x62,0xc0]
; CHECK-NEXT: ## xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-NEXT: vmovd %edx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6c,0xc1]
; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc2,0x01]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
%vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
Expand Down Expand Up @@ -2848,23 +2848,23 @@ define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask)
; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01]
; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02]
; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0]
; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x01]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02]
; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8]
; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02]
; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xca,0x02]
; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xc8,0x03]
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -453,10 +453,10 @@ define i32 @v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) {
; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm0
; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm1
; SSE2-SSSE3-NEXT: pcmpgtb %xmm6, %xmm4
; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
; SSE2-SSSE3-NEXT: pcmpgtb %xmm7, %xmm5
; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5
; SSE2-SSSE3-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp)
; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
; SSE2-SSSE3-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp)
; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
; SSE2-SSSE3-NEXT: andb $1, %al
Expand Down
48 changes: 25 additions & 23 deletions llvm/test/CodeGen/X86/extractelement-legalization-store-ordering.ll
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple i386-apple-darwin -mcpu=yonah | FileCheck %s

target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
Expand All @@ -6,31 +7,32 @@ target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
; into loads, off the stack or a previous store.
; Be very explicit about the ordering/stack offsets.

; CHECK-LABEL: test_extractelement_legalization_storereuse:
; CHECK: # BB#0
; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: movl 16(%esp), %eax
; CHECK-NEXT: movl 24(%esp), %ecx
; CHECK-NEXT: movl 20(%esp), %edx
; CHECK-NEXT: paddd (%edx), %xmm0
; CHECK-NEXT: movdqa %xmm0, (%edx)
; CHECK-NEXT: movl (%edx), %esi
; CHECK-NEXT: movl 4(%edx), %edi
; CHECK-NEXT: shll $4, %ecx
; CHECK-NEXT: movl 8(%edx), %ebx
; CHECK-NEXT: movl 12(%edx), %edx
; CHECK-NEXT: movl %esi, 12(%eax,%ecx)
; CHECK-NEXT: movl %edi, (%eax,%ecx)
; CHECK-NEXT: movl %ebx, 8(%eax,%ecx)
; CHECK-NEXT: movl %edx, 4(%eax,%ecx)
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK-NEXT: popl %ebx
; CHECK-NEXT: retl

define void @test_extractelement_legalization_storereuse(<4 x i32> %a, i32* nocapture %x, i32* nocapture readonly %y, i32 %i) #0 {
; CHECK-LABEL: _test_extractelement_legalization_storereuse: ## @test_extractelement_legalization_storereuse
; CHECK: ## BB#0: ## %entry
; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: paddd (%ecx), %xmm0
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: movdqa %xmm0, (%ecx)
; CHECK-NEXT: movl (%ecx), %esi
; CHECK-NEXT: movl 4(%ecx), %edi
; CHECK-NEXT: shll $4, %edx
; CHECK-NEXT: movl 8(%ecx), %ebx
; CHECK-NEXT: movl 12(%ecx), %ecx
; CHECK-NEXT: movl %esi, 12(%eax,%edx)
; CHECK-NEXT: movl %edi, (%eax,%edx)
; CHECK-NEXT: movl %ebx, 8(%eax,%edx)
; CHECK-NEXT: movl %ecx, 4(%eax,%edx)
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK-NEXT: popl %ebx
; CHECK-NEXT: retl
; CHECK-NEXT: ## -- End function
entry:
%0 = bitcast i32* %y to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 16
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/fp128-i128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ define void @TestUnionLD1(fp128 %s, i64 %n) #0 {
; CHECK-NEXT: andq %rdi, %rcx
; CHECK-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000
; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: orq %rcx, %rdx
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: orq %rcx, %rdx
; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
; CHECK-NEXT: jmp foo # TAILCALL
Expand Down
17 changes: 8 additions & 9 deletions llvm/test/CodeGen/X86/gather-addresses.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@
; LIN: sarq $32, %r[[REG2]]
; LIN: movslq %e[[REG4]], %r[[REG3:.+]]
; LIN: sarq $32, %r[[REG4]]
; LIN: movsd (%rdi,%r[[REG1]],8), %xmm0
; LIN: movhpd (%rdi,%r[[REG2]],8), %xmm0
; LIN: movsd (%rdi,%r[[REG3]],8), %xmm1
; LIN: movhpd (%rdi,%r[[REG4]],8), %xmm1

; LIN: movsd (%rdi,%rsi,8), %xmm1
; LIN: movhpd (%rdi,%rax,8), %xmm1
; LIN: movdqa (%rsi), %xmm0
; LIN: movq %rdi, %xmm1
; WIN: movdqa (%rdx), %xmm0
; WIN: pand (%r8), %xmm0
; WIN: pextrq $1, %xmm0, %r[[REG4:.+]]
Expand All @@ -29,10 +28,10 @@
; WIN: sarq $32, %r[[REG2]]
; WIN: movslq %e[[REG4]], %r[[REG3:.+]]
; WIN: sarq $32, %r[[REG4]]
; WIN: movsd (%rcx,%r[[REG1]],8), %xmm0
; WIN: movhpd (%rcx,%r[[REG2]],8), %xmm0
; WIN: movsd (%rcx,%r[[REG3]],8), %xmm1
; WIN: movhpd (%rcx,%r[[REG4]],8), %xmm1
; WIN: movsd (%rcx,%r9,8), %xmm1
; WIN: movhpd (%rcx,%rax,8), %xmm1
; WIN: movdqa (%rdx), %xmm0
; WIN: movq %rdx, %xmm1

define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
%a = load <4 x i32>, <4 x i32>* %i
Expand Down
1,045 changes: 834 additions & 211 deletions llvm/test/CodeGen/X86/half.ll

Large diffs are not rendered by default.

34 changes: 17 additions & 17 deletions llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -112,23 +112,23 @@ define void @i56_and_or(i56* %a) {
define void @i56_insert_bit(i56* %a, i1 zeroext %bit) {
; CHECK-LABEL: i56_insert_bit:
; CHECK: # BB#0:
; CHECK-NEXT: movzbl %sil, %eax
; CHECK-NEXT: movzwl 4(%rdi), %ecx
; CHECK-NEXT: movzbl 6(%rdi), %edx
; CHECK-NEXT: movl (%rdi), %esi
; CHECK-NEXT: movb %dl, 6(%rdi)
; CHECK-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<kill> %RDX<def>
; CHECK-NEXT: shll $16, %edx
; CHECK-NEXT: orl %ecx, %edx
; CHECK-NEXT: shlq $32, %rdx
; CHECK-NEXT: orq %rdx, %rsi
; CHECK-NEXT: shlq $13, %rax
; CHECK-NEXT: movabsq $72057594037919743, %rcx # imm = 0xFFFFFFFFFFDFFF
; CHECK-NEXT: andq %rsi, %rcx
; CHECK-NEXT: orq %rax, %rcx
; CHECK-NEXT: movl %ecx, (%rdi)
; CHECK-NEXT: shrq $32, %rcx
; CHECK-NEXT: movw %cx, 4(%rdi)
; CHECK-NEXT: movzwl 4(%rdi), %eax
; CHECK-NEXT: movzbl 6(%rdi), %ecx
; CHECK-NEXT: movl (%rdi), %edx
; CHECK-NEXT: movb %cl, 6(%rdi)
; CHECK-NEXT: movzbl %sil, %esi
; CHECK-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<kill> %RCX<def>
; CHECK-NEXT: shll $16, %ecx
; CHECK-NEXT: orl %eax, %ecx
; CHECK-NEXT: shlq $32, %rcx
; CHECK-NEXT: orq %rcx, %rdx
; CHECK-NEXT: shlq $13, %rsi
; CHECK-NEXT: movabsq $72057594037919743, %rax # imm = 0xFFFFFFFFFFDFFF
; CHECK-NEXT: andq %rdx, %rax
; CHECK-NEXT: orq %rsi, %rax
; CHECK-NEXT: movl %eax, (%rdi)
; CHECK-NEXT: shrq $32, %rax
; CHECK-NEXT: movw %ax, 4(%rdi)
; CHECK-NEXT: retq
%extbit = zext i1 %bit to i56
%b = load i56, i56* %a, align 1
Expand Down
206 changes: 103 additions & 103 deletions llvm/test/CodeGen/X86/mul-constant-i32.ll

Large diffs are not rendered by default.

132 changes: 66 additions & 66 deletions llvm/test/CodeGen/X86/mul-constant-i64.ll

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/pr32329.ll
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ define void @foo() local_unnamed_addr {
; X86-NEXT: cmovnel %ecx, %esi
; X86-NEXT: cmpl %edx, %edi
; X86-NEXT: movl %ebp, var_50+4
; X86-NEXT: movl %esi, var_50
; X86-NEXT: setge var_205
; X86-NEXT: movl %esi, var_50
; X86-NEXT: imull %eax, %ebx
; X86-NEXT: movb %bl, var_218
; X86-NEXT: popl %esi
Expand Down
224 changes: 112 additions & 112 deletions llvm/test/CodeGen/X86/recip-fastmath.ll

Large diffs are not rendered by default.

442 changes: 221 additions & 221 deletions llvm/test/CodeGen/X86/recip-fastmath2.ll

Large diffs are not rendered by default.

466 changes: 233 additions & 233 deletions llvm/test/CodeGen/X86/sse-schedule.ll

Large diffs are not rendered by default.

1,108 changes: 554 additions & 554 deletions llvm/test/CodeGen/X86/sse2-schedule.ll

Large diffs are not rendered by default.

88 changes: 44 additions & 44 deletions llvm/test/CodeGen/X86/sse3-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SANDY-LABEL: test_addsubpd:
; SANDY: # BB#0:
; SANDY-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_addsubpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_addsubpd:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -74,14 +74,14 @@ define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; SANDY-LABEL: test_addsubps:
; SANDY: # BB#0:
; SANDY-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_addsubps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_addsubps:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -116,15 +116,15 @@ define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double
;
; SANDY-LABEL: test_haddpd:
; SANDY: # BB#0:
; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_haddpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_haddpd:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -159,15 +159,15 @@ define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
;
; SANDY-LABEL: test_haddps:
; SANDY: # BB#0:
; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_haddps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_haddps:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -202,15 +202,15 @@ define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double
;
; SANDY-LABEL: test_hsubpd:
; SANDY: # BB#0:
; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_hsubpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_hsubpd:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -245,15 +245,15 @@ define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
;
; SANDY-LABEL: test_hsubps:
; SANDY: # BB#0:
; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_hsubps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_hsubps:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -287,13 +287,13 @@ define <16 x i8> @test_lddqu(i8* %a0) {
;
; SANDY-LABEL: test_lddqu:
; SANDY: # BB#0:
; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [4:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lddqu:
; HASWELL: # BB#0:
; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [4:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [?:5.000000e-01]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lddqu:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -330,16 +330,16 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
; SANDY-LABEL: test_movddup:
; SANDY: # BB#0:
; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [4:0.50]
; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50]
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_movddup:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [4:0.50]
; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [?:5.000000e-01]
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movddup:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -380,16 +380,16 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
; SANDY-LABEL: test_movshdup:
; SANDY: # BB#0:
; SANDY-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:0.50]
; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_movshdup:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
; HASWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:0.50]
; HASWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [?:5.000000e-01]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movshdup:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -430,16 +430,16 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
; SANDY-LABEL: test_movsldup:
; SANDY: # BB#0:
; SANDY-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:0.50]
; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_movsldup:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
; HASWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:0.50]
; HASWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [?:5.000000e-01]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movsldup:
; BTVER2: # BB#0:
Expand Down
432 changes: 216 additions & 216 deletions llvm/test/CodeGen/X86/sse41-schedule.ll

Large diffs are not rendered by default.

70 changes: 35 additions & 35 deletions llvm/test/CodeGen/X86/sse42-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,16 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) {
; SANDY-LABEL: crc32_32_8:
; SANDY: # BB#0:
; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00]
; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: crc32_32_8:
; HASWELL: # BB#0:
; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00]
; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: crc32_32_8:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -68,16 +68,16 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) {
; SANDY-LABEL: crc32_32_16:
; SANDY: # BB#0:
; SANDY-NEXT: crc32w %si, %edi # sched: [3:1.00]
; SANDY-NEXT: crc32w (%rdx), %edi # sched: [7:1.00]
; SANDY-NEXT: crc32w (%rdx), %edi # sched: [8:1.00]
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: crc32_32_16:
; HASWELL: # BB#0:
; HASWELL-NEXT: crc32w %si, %edi # sched: [3:1.00]
; HASWELL-NEXT: crc32w (%rdx), %edi # sched: [7:1.00]
; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: crc32_32_16:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -112,14 +112,14 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) {
; SANDY-NEXT: crc32l %esi, %edi # sched: [3:1.00]
; SANDY-NEXT: crc32l (%rdx), %edi # sched: [7:1.00]
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: crc32_32_32:
; HASWELL: # BB#0:
; HASWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00]
; HASWELL-NEXT: crc32l (%rdx), %edi # sched: [7:1.00]
; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: crc32_32_32:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -152,16 +152,16 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind {
; SANDY-LABEL: crc32_64_8:
; SANDY: # BB#0:
; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00]
; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: crc32_64_8:
; HASWELL: # BB#0:
; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00]
; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: crc32_64_8:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -196,14 +196,14 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) {
; SANDY-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
; SANDY-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00]
; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: crc32_64_64:
; HASWELL: # BB#0:
; HASWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
; HASWELL-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00]
; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: crc32_64_64:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -256,20 +256,20 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SANDY-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33]
; SANDY-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SANDY-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpestri:
; HASWELL: # BB#0:
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
; HASWELL-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [11:3.00]
; HASWELL-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
; HASWELL-NEXT: movl %ecx, %esi # sched: [1:0.25]
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
; HASWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [11:3.00]
; HASWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [18:4.00]
; HASWELL-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; HASWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pcmpestri:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -320,17 +320,17 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SANDY-NEXT: movl $7, %eax # sched: [1:0.33]
; SANDY-NEXT: movl $7, %edx # sched: [1:0.33]
; SANDY-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpestrm:
; HASWELL: # BB#0:
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
; HASWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [10:4.00]
; HASWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
; HASWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [10:3.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:4.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pcmpestrm:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -369,12 +369,12 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
;
; SANDY-LABEL: test_pcmpistri:
; SANDY: # BB#0:
; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
; SANDY-NEXT: movl %ecx, %eax # sched: [1:0.33]
; SANDY-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
; SANDY-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SANDY-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpistri:
; HASWELL: # BB#0:
Expand All @@ -383,7 +383,7 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; HASWELL-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [11:3.00]
; HASWELL-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; HASWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pcmpistri:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -416,15 +416,15 @@ define <16 x i8> @test_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
;
; SANDY-LABEL: test_pcmpistrm:
; SANDY: # BB#0:
; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpistrm:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00]
; HASWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [10:3.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
; HASWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:3.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pcmpistrm:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -453,15 +453,15 @@ define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
;
; SANDY-LABEL: test_pcmpgtq:
; SANDY: # BB#0:
; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpgtq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pcmpgtq:
; BTVER2: # BB#0:
Expand Down
134 changes: 67 additions & 67 deletions llvm/test/CodeGen/X86/ssse3-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,16 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) {
; SANDY-LABEL: test_pabsb:
; SANDY: # BB#0:
; SANDY-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [5:0.50]
; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pabsb:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [5:0.50]
; HASWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [1:0.50]
; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pabsb:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -86,16 +86,16 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) {
; SANDY-LABEL: test_pabsd:
; SANDY: # BB#0:
; SANDY-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [5:0.50]
; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pabsd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [5:0.50]
; HASWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [1:0.50]
; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pabsd:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -136,12 +136,12 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) {
; SANDY-LABEL: test_pabsw:
; SANDY: # BB#0:
; SANDY-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pabsw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pabsw:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -182,14 +182,14 @@ define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SANDY-LABEL: test_palignr:
; SANDY: # BB#0:
; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_palignr:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_palignr:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -223,15 +223,15 @@ define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; SANDY-LABEL: test_phaddd:
; SANDY: # BB#0:
; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_phaddd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_phaddd:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -274,15 +274,15 @@ define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; SANDY-LABEL: test_phaddsw:
; SANDY: # BB#0:
; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_phaddsw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_phaddsw:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -317,15 +317,15 @@ define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; SANDY-LABEL: test_phaddw:
; SANDY: # BB#0:
; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_phaddw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_phaddw:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -360,15 +360,15 @@ define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; SANDY-LABEL: test_phsubd:
; SANDY: # BB#0:
; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_phsubd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_phsubd:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -411,15 +411,15 @@ define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; SANDY-LABEL: test_phsubsw:
; SANDY: # BB#0:
; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_phsubsw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_phsubsw:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -454,15 +454,15 @@ define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; SANDY-LABEL: test_phsubw:
; SANDY: # BB#0:
; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_phsubw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_phsubw:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -497,15 +497,15 @@ define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
;
; SANDY-LABEL: test_pmaddubsw:
; SANDY: # BB#0:
; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmaddubsw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmaddubsw:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -538,13 +538,13 @@ define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; SANDY-LABEL: test_pmulhrsw:
; SANDY: # BB#0:
; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmulhrsw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmulhrsw:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -579,14 +579,14 @@ define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SANDY-LABEL: test_pshufb:
; SANDY: # BB#0:
; SANDY-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pshufb:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pshufb:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -630,14 +630,14 @@ define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SANDY-LABEL: test_psignb:
; SANDY: # BB#0:
; SANDY-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psignb:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_psignb:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -681,14 +681,14 @@ define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-LABEL: test_psignd:
; SANDY: # BB#0:
; SANDY-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psignd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_psignd:
; BTVER2: # BB#0:
Expand Down Expand Up @@ -732,14 +732,14 @@ define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SANDY-LABEL: test_psignw:
; SANDY: # BB#0:
; SANDY-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psignw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_psignw:
; BTVER2: # BB#0:
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -201,14 +201,14 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm5
; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm8
; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX512DQ-NEXT: vpsraw $4, %ymm5, %ymm9
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm9, %ymm5, %ymm3
; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm5
; AVX512DQ-NEXT: vpaddw %ymm8, %ymm8, %ymm9
; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4
; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4
Expand Down Expand Up @@ -328,14 +328,14 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm5
; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm8
; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX512DQ-NEXT: vpsraw $4, %ymm5, %ymm9
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm9, %ymm5, %ymm3
; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm5
; AVX512DQ-NEXT: vpaddw %ymm8, %ymm8, %ymm9
; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4
; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,13 @@ define <32 x i16> @shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_1
; KNL-NEXT: vpshufb {{.*#+}} xmm4 = xmm1[8,9,12,13,12,13,10,11,0,1,4,5,4,5,0,1]
; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,1,0,3]
; KNL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[0,3,2,2,4,5,6,7]
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm1
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm5
; KNL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
; KNL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,10,11,8,9,14,15,4,5,2,3,2,3,6,7]
; KNL-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[6,7,2,3,4,5,6,7,2,3,2,3,0,1,14,15]
; KNL-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0
; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
; KNL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[6,7,2,3,4,5,6,7,2,3,2,3,0,1,14,15]
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm5
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm5[1],ymm0[2],ymm5[3],ymm0[4],ymm5[5],ymm0[6],ymm5[7],ymm0[8],ymm5[9],ymm0[10],ymm5[11],ymm0[12],ymm5[13],ymm0[14],ymm5[15]
; KNL-NEXT: vextracti128 $1, %ymm3, %xmm3
; KNL-NEXT: vpbroadcastw %xmm3, %ymm3
; KNL-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]
Expand Down