48 changes: 24 additions & 24 deletions llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16-fma.ll

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ define <32 x half> @stack_fold_addph_zmm_kz(<32 x half> %a0, <32 x half> %a1, i3

define half @stack_fold_addsh(half %a0, half %a1) {
;CHECK-LABEL: stack_fold_addsh
;CHECK: vaddsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 2-byte Folded Reload
;CHECK: vaddsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = fadd half %a0, %a1
ret half %2
Expand Down Expand Up @@ -107,7 +107,7 @@ define <32 x half> @stack_fold_cmpph_mask_commuted(<32 x half> %a0, <32 x half>

define half @stack_fold_divsh(half %a0, half %a1) {
;CHECK-LABEL: stack_fold_divsh
;CHECK: vdivsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 2-byte Folded Reload
;CHECK: vdivsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = fdiv half %a0, %a1
ret half %2
Expand Down Expand Up @@ -390,7 +390,7 @@ define <32 x half> @stack_fold_maxph_zmm_commutable_kz_commuted(<32 x half> %a0,

define half @stack_fold_maxsh(half %a0, half %a1) #0 {
;CHECK-LABEL: stack_fold_maxsh:
;CHECK: vmaxsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 2-byte Folded Reload
;CHECK: vmaxsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = fcmp ogt half %a0, %a1
%3 = select i1 %2, half %a0, half %a1
Expand All @@ -399,7 +399,7 @@ define half @stack_fold_maxsh(half %a0, half %a1) #0 {

define half @stack_fold_maxsh_commuted(half %a0, half %a1) #0 {
;CHECK-LABEL: stack_fold_maxsh_commuted:
;CHECK-NOT: vmaxsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 2-byte Folded Reload
;CHECK-NOT: vmaxsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = fcmp ogt half %a1, %a0
%3 = select i1 %2, half %a1, half %a0
Expand All @@ -408,7 +408,7 @@ define half @stack_fold_maxsh_commuted(half %a0, half %a1) #0 {

define half @stack_fold_maxsh_commutable(half %a0, half %a1) #1 {
;CHECK-LABEL: stack_fold_maxsh_commutable:
;CHECK: vmaxsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 2-byte Folded Reload
;CHECK: vmaxsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = fcmp ogt half %a0, %a1
%3 = select i1 %2, half %a0, half %a1
Expand All @@ -417,7 +417,7 @@ define half @stack_fold_maxsh_commutable(half %a0, half %a1) #1 {

define half @stack_fold_maxsh_commutable_commuted(half %a0, half %a1) #1 {
;CHECK-LABEL: stack_fold_maxsh_commutable_commuted:
;CHECK: vmaxsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 2-byte Folded Reload
;CHECK: vmaxsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = fcmp ogt half %a1, %a0
%3 = select i1 %2, half %a1, half %a0
Expand Down Expand Up @@ -569,7 +569,7 @@ define <32 x half> @stack_fold_minph_zmm_commutable_kz_commuted(<32 x half> %a0,

define half @stack_fold_minsh(half %a0, half %a1) #0 {
;CHECK-LABEL: stack_fold_minsh:
;CHECK: vminsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 2-byte Folded Reload
;CHECK: vminsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = fcmp olt half %a0, %a1
%3 = select i1 %2, half %a0, half %a1
Expand All @@ -578,7 +578,7 @@ define half @stack_fold_minsh(half %a0, half %a1) #0 {

define half @stack_fold_minsh_commuted(half %a0, half %a1) #0 {
;CHECK-LABEL: stack_fold_minsh_commuted:
;CHECK-NOT: vminsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 2-byte Folded Reload
;CHECK-NOT: vminsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = fcmp olt half %a1, %a0
%3 = select i1 %2, half %a1, half %a0
Expand All @@ -587,7 +587,7 @@ define half @stack_fold_minsh_commuted(half %a0, half %a1) #0 {

define half @stack_fold_minsh_commutable(half %a0, half %a1) #1 {
;CHECK-LABEL: stack_fold_minsh_commutable:
;CHECK: vminsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 2-byte Folded Reload
;CHECK: vminsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = fcmp olt half %a0, %a1
%3 = select i1 %2, half %a0, half %a1
Expand All @@ -596,7 +596,7 @@ define half @stack_fold_minsh_commutable(half %a0, half %a1) #1 {

define half @stack_fold_minsh_commutable_commuted(half %a0, half %a1) #1 {
;CHECK-LABEL: stack_fold_minsh_commutable_commuted:
;CHECK: vminsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 2-byte Folded Reload
;CHECK: vminsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = fcmp olt half %a1, %a0
%3 = select i1 %2, half %a1, half %a0
Expand Down Expand Up @@ -671,7 +671,7 @@ define <32 x half> @stack_fold_mulph_zmm_kz(<32 x half> %a0, <32 x half> %a1, i3

define half @stack_fold_mulsh(half %a0, half %a1) {
;CHECK-LABEL: stack_fold_mulsh
;CHECK-NOT: vmulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 2-byte Folded Reload
;CHECK-NOT: vmulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = fmul half %a0, %a1
ret half %2
Expand Down Expand Up @@ -972,7 +972,7 @@ define <32 x half> @stack_fold_subph_zmm(<32 x half> %a0, <32 x half> %a1) {

define half @stack_fold_subsh(half %a0, half %a1) {
;CHECK-LABEL: stack_fold_subsh
;CHECK: vsubsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 2-byte Folded Reload
;CHECK: vsubsh {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = fsub half %a0, %a1
ret half %2
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ body: |
; CHECK: CMP64rr [[NOT64r2]], [[COPY6]], implicit-def $eflags
; CHECK: undef %102.sub_32bit:gr64_with_sub_8bit = MOV32ri 0
; CHECK: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[CMOV64rr]], %102, 4, implicit killed $eflags
; CHECK: INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4390921 /* reguse:GR64 */, %102, 4390921 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags
; CHECK: INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4456457 /* reguse:GR64 */, %102, 4456457 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags
; CHECK: LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, [[COPY5]], implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `i32 addrspace(1)* undef`, addrspace 1)
; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK: $rdi = COPY [[COPY4]]
Expand Down Expand Up @@ -456,7 +456,7 @@ body: |
%63:gr64 = NOT64r %63
CMP64rr %63, %31, implicit-def $eflags
%63:gr64 = CMOV64rr %63, %53, 4, implicit killed $eflags
INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4390921 /* reguse:GR64 */, %53, 4390921 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags
INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4456457 /* reguse:GR64 */, %53, 4456457 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags
LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, %65, implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `i32 addrspace(1)* undef`, addrspace 1)
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
$rdi = COPY %64
Expand Down
56 changes: 27 additions & 29 deletions llvm/test/CodeGen/X86/vec_fp_to_int.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2101,58 +2101,56 @@ define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) {
define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind {
; SSE-LABEL: fptosi_2f16_to_4i32:
; SSE: # %bb.0:
; SSE-NEXT: pushq %rbp
; SSE-NEXT: pushq %rbx
; SSE-NEXT: pushq %rax
; SSE-NEXT: movl %esi, %ebx
; SSE-NEXT: movzwl %di, %edi
; SSE-NEXT: callq __gnu_h2f_ieee@PLT
; SSE-NEXT: cvttss2si %xmm0, %ebp
; SSE-NEXT: movzwl %bx, %edi
; SSE-NEXT: callq __gnu_h2f_ieee@PLT
; SSE-NEXT: subq $16, %rsp
; SSE-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SSE-NEXT: callq __extendhfsf2@PLT
; SSE-NEXT: cvttss2si %xmm0, %ebx
; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: callq __extendhfsf2@PLT
; SSE-NEXT: cvttss2si %xmm0, %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movd %ebp, %xmm1
; SSE-NEXT: movd %ebx, %xmm1
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
; SSE-NEXT: addq $8, %rsp
; SSE-NEXT: addq $16, %rsp
; SSE-NEXT: popq %rbx
; SSE-NEXT: popq %rbp
; SSE-NEXT: retq
;
; VEX-LABEL: fptosi_2f16_to_4i32:
; VEX: # %bb.0:
; VEX-NEXT: pushq %rbp
; VEX-NEXT: pushq %rbx
; VEX-NEXT: pushq %rax
; VEX-NEXT: movl %esi, %ebx
; VEX-NEXT: movzwl %di, %edi
; VEX-NEXT: callq __gnu_h2f_ieee@PLT
; VEX-NEXT: vcvttss2si %xmm0, %ebp
; VEX-NEXT: movzwl %bx, %edi
; VEX-NEXT: callq __gnu_h2f_ieee@PLT
; VEX-NEXT: subq $16, %rsp
; VEX-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; VEX-NEXT: callq __extendhfsf2@PLT
; VEX-NEXT: vcvttss2si %xmm0, %ebx
; VEX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; VEX-NEXT: # xmm0 = mem[0],zero,zero,zero
; VEX-NEXT: callq __extendhfsf2@PLT
; VEX-NEXT: vcvttss2si %xmm0, %eax
; VEX-NEXT: vmovd %eax, %xmm0
; VEX-NEXT: vmovd %ebp, %xmm1
; VEX-NEXT: vmovd %ebx, %xmm1
; VEX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; VEX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; VEX-NEXT: addq $8, %rsp
; VEX-NEXT: addq $16, %rsp
; VEX-NEXT: popq %rbx
; VEX-NEXT: popq %rbp
; VEX-NEXT: retq
;
; AVX512-LABEL: fptosi_2f16_to_4i32:
; AVX512: # %bb.0:
; AVX512-NEXT: movzwl %di, %eax
; AVX512-NEXT: vmovd %eax, %xmm0
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vcvttss2si %xmm0, %eax
; AVX512-NEXT: movzwl %si, %ecx
; AVX512-NEXT: vpextrw $0, %xmm1, %eax
; AVX512-NEXT: vpextrw $0, %xmm0, %ecx
; AVX512-NEXT: movzwl %cx, %ecx
; AVX512-NEXT: vmovd %ecx, %xmm0
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vcvttss2si %xmm0, %ecx
; AVX512-NEXT: vmovd %ecx, %xmm0
; AVX512-NEXT: vmovd %eax, %xmm1
; AVX512-NEXT: movzwl %ax, %eax
; AVX512-NEXT: vmovd %eax, %xmm0
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vcvttss2si %xmm0, %eax
; AVX512-NEXT: vmovd %eax, %xmm0
; AVX512-NEXT: vmovd %ecx, %xmm1
; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512-NEXT: retq
Expand Down
1,039 changes: 410 additions & 629 deletions llvm/test/CodeGen/X86/vector-half-conversions.ll

Large diffs are not rendered by default.

75 changes: 32 additions & 43 deletions llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
Original file line number Diff line number Diff line change
Expand Up @@ -368,69 +368,58 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
; SSE-LABEL: test_v2f16:
; SSE: # %bb.0:
; SSE-NEXT: pushq %rbp
; SSE-NEXT: pushq %r14
; SSE-NEXT: pushq %rbx
; SSE-NEXT: subq $16, %rsp
; SSE-NEXT: movl %esi, %ebx
; SSE-NEXT: movl %edi, %r14d
; SSE-NEXT: movzwl %bx, %ebp
; SSE-NEXT: movl %ebp, %edi
; SSE-NEXT: callq __gnu_h2f_ieee@PLT
; SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SSE-NEXT: movzwl %r14w, %edi
; SSE-NEXT: callq __gnu_h2f_ieee@PLT
; SSE-NEXT: subq $40, %rsp
; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: pextrw $0, %xmm1, %ebx
; SSE-NEXT: pextrw $0, %xmm0, %ebp
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: callq __extendhfsf2@PLT
; SSE-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-NEXT: callq __extendhfsf2@PLT
; SSE-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
; SSE-NEXT: movw %bp, {{[0-9]+}}(%rsp)
; SSE-NEXT: cmoval %r14d, %ebx
; SSE-NEXT: movw %bx, (%rsp)
; SSE-NEXT: movl (%rsp), %eax
; SSE-NEXT: # kill: def $ax killed $ax killed $eax
; SSE-NEXT: addq $16, %rsp
; SSE-NEXT: cmoval %ebp, %ebx
; SSE-NEXT: pinsrw $0, %ebx, %xmm0
; SSE-NEXT: addq $40, %rsp
; SSE-NEXT: popq %rbx
; SSE-NEXT: popq %r14
; SSE-NEXT: popq %rbp
; SSE-NEXT: retq
;
; AVX-LABEL: test_v2f16:
; AVX: # %bb.0:
; AVX-NEXT: pushq %rbp
; AVX-NEXT: pushq %r14
; AVX-NEXT: pushq %rbx
; AVX-NEXT: subq $16, %rsp
; AVX-NEXT: movl %esi, %ebx
; AVX-NEXT: movl %edi, %r14d
; AVX-NEXT: movzwl %bx, %ebp
; AVX-NEXT: movl %ebp, %edi
; AVX-NEXT: callq __gnu_h2f_ieee@PLT
; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; AVX-NEXT: movzwl %r14w, %edi
; AVX-NEXT: callq __gnu_h2f_ieee@PLT
; AVX-NEXT: subq $40, %rsp
; AVX-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX-NEXT: vpextrw $0, %xmm1, %ebx
; AVX-NEXT: vpextrw $0, %xmm0, %ebp
; AVX-NEXT: vmovdqa %xmm1, %xmm0
; AVX-NEXT: callq __extendhfsf2@PLT
; AVX-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; AVX-NEXT: callq __extendhfsf2@PLT
; AVX-NEXT: vucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
; AVX-NEXT: movw %bp, {{[0-9]+}}(%rsp)
; AVX-NEXT: cmoval %r14d, %ebx
; AVX-NEXT: movw %bx, (%rsp)
; AVX-NEXT: movl (%rsp), %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: addq $16, %rsp
; AVX-NEXT: cmoval %ebp, %ebx
; AVX-NEXT: vpinsrw $0, %ebx, %xmm0, %xmm0
; AVX-NEXT: addq $40, %rsp
; AVX-NEXT: popq %rbx
; AVX-NEXT: popq %r14
; AVX-NEXT: popq %rbp
; AVX-NEXT: retq
;
; AVX512BW-LABEL: test_v2f16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: movzwl %si, %eax
; AVX512BW-NEXT: vmovd %eax, %xmm0
; AVX512BW-NEXT: vpextrw $0, %xmm0, %eax
; AVX512BW-NEXT: vpextrw $0, %xmm1, %ecx
; AVX512BW-NEXT: movzwl %cx, %ecx
; AVX512BW-NEXT: vmovd %ecx, %xmm0
; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512BW-NEXT: movzwl %di, %ecx
; AVX512BW-NEXT: vmovd %ecx, %xmm1
; AVX512BW-NEXT: movzwl %ax, %eax
; AVX512BW-NEXT: vmovd %eax, %xmm1
; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX512BW-NEXT: vucomiss %xmm0, %xmm1
; AVX512BW-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: cmoval %edi, %esi
; AVX512BW-NEXT: movw %si, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: cmoval %eax, %ecx
; AVX512BW-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
; AVX512FP16-LABEL: test_v2f16:
Expand Down
75 changes: 32 additions & 43 deletions llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
Original file line number Diff line number Diff line change
Expand Up @@ -367,69 +367,58 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
; SSE-LABEL: test_v2f16:
; SSE: # %bb.0:
; SSE-NEXT: pushq %rbp
; SSE-NEXT: pushq %r14
; SSE-NEXT: pushq %rbx
; SSE-NEXT: subq $16, %rsp
; SSE-NEXT: movl %esi, %ebx
; SSE-NEXT: movl %edi, %r14d
; SSE-NEXT: movzwl %bx, %ebp
; SSE-NEXT: movl %ebp, %edi
; SSE-NEXT: callq __gnu_h2f_ieee@PLT
; SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SSE-NEXT: movzwl %r14w, %edi
; SSE-NEXT: callq __gnu_h2f_ieee@PLT
; SSE-NEXT: subq $40, %rsp
; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: pextrw $0, %xmm1, %ebx
; SSE-NEXT: pextrw $0, %xmm0, %ebp
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: callq __extendhfsf2@PLT
; SSE-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-NEXT: callq __extendhfsf2@PLT
; SSE-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
; SSE-NEXT: movw %bp, {{[0-9]+}}(%rsp)
; SSE-NEXT: cmovbl %r14d, %ebx
; SSE-NEXT: movw %bx, (%rsp)
; SSE-NEXT: movl (%rsp), %eax
; SSE-NEXT: # kill: def $ax killed $ax killed $eax
; SSE-NEXT: addq $16, %rsp
; SSE-NEXT: cmovbl %ebp, %ebx
; SSE-NEXT: pinsrw $0, %ebx, %xmm0
; SSE-NEXT: addq $40, %rsp
; SSE-NEXT: popq %rbx
; SSE-NEXT: popq %r14
; SSE-NEXT: popq %rbp
; SSE-NEXT: retq
;
; AVX-LABEL: test_v2f16:
; AVX: # %bb.0:
; AVX-NEXT: pushq %rbp
; AVX-NEXT: pushq %r14
; AVX-NEXT: pushq %rbx
; AVX-NEXT: subq $16, %rsp
; AVX-NEXT: movl %esi, %ebx
; AVX-NEXT: movl %edi, %r14d
; AVX-NEXT: movzwl %bx, %ebp
; AVX-NEXT: movl %ebp, %edi
; AVX-NEXT: callq __gnu_h2f_ieee@PLT
; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; AVX-NEXT: movzwl %r14w, %edi
; AVX-NEXT: callq __gnu_h2f_ieee@PLT
; AVX-NEXT: subq $40, %rsp
; AVX-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX-NEXT: vpextrw $0, %xmm1, %ebx
; AVX-NEXT: vpextrw $0, %xmm0, %ebp
; AVX-NEXT: vmovdqa %xmm1, %xmm0
; AVX-NEXT: callq __extendhfsf2@PLT
; AVX-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; AVX-NEXT: callq __extendhfsf2@PLT
; AVX-NEXT: vucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
; AVX-NEXT: movw %bp, {{[0-9]+}}(%rsp)
; AVX-NEXT: cmovbl %r14d, %ebx
; AVX-NEXT: movw %bx, (%rsp)
; AVX-NEXT: movl (%rsp), %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: addq $16, %rsp
; AVX-NEXT: cmovbl %ebp, %ebx
; AVX-NEXT: vpinsrw $0, %ebx, %xmm0, %xmm0
; AVX-NEXT: addq $40, %rsp
; AVX-NEXT: popq %rbx
; AVX-NEXT: popq %r14
; AVX-NEXT: popq %rbp
; AVX-NEXT: retq
;
; AVX512BW-LABEL: test_v2f16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: movzwl %si, %eax
; AVX512BW-NEXT: vmovd %eax, %xmm0
; AVX512BW-NEXT: vpextrw $0, %xmm0, %eax
; AVX512BW-NEXT: vpextrw $0, %xmm1, %ecx
; AVX512BW-NEXT: movzwl %cx, %ecx
; AVX512BW-NEXT: vmovd %ecx, %xmm0
; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512BW-NEXT: movzwl %di, %ecx
; AVX512BW-NEXT: vmovd %ecx, %xmm1
; AVX512BW-NEXT: movzwl %ax, %eax
; AVX512BW-NEXT: vmovd %eax, %xmm1
; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX512BW-NEXT: vucomiss %xmm0, %xmm1
; AVX512BW-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: cmovbl %edi, %esi
; AVX512BW-NEXT: movw %si, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: cmovbl %eax, %ecx
; AVX512BW-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
; AVX512FP16-LABEL: test_v2f16:
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/MC/X86/x86_64-asm-match.s
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@
// CHECK: Trying to match opcode MMX_PSHUFBrr
// CHECK: Matching formal operand class MCK_VR64 against actual operand at index 1 (Memory: ModeSize=64,BaseReg=rip,Scale=1,Disp=CPI1_0): Opcode result: multiple operand mismatches, ignoring this opcode
// CHECK: Trying to match opcode PSHUFBrr
// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 1 (Memory: ModeSize=64,BaseReg=rip,Scale=1,Disp=CPI1_0): Opcode result: multiple operand mismatches, ignoring this opcode
// CHECK: Matching formal operand class MCK_FR16 against actual operand at index 1 (Memory: ModeSize=64,BaseReg=rip,Scale=1,Disp=CPI1_0): Opcode result: multiple operand mismatches, ignoring this opcode
// CHECK: Trying to match opcode PSHUFBrm
// CHECK: Matching formal operand class MCK_Mem128 against actual operand at index 1 (Memory: ModeSize=64,BaseReg=rip,Scale=1,Disp=CPI1_0): match success using generic matcher
// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 2 (Reg:xmm1): match success using generic matcher
// CHECK: Matching formal operand class MCK_FR16 against actual operand at index 2 (Reg:xmm1): match success using generic matcher
// CHECK: Matching formal operand class InvalidMatchClass against actual operand at index 3: actual operand index out of range Opcode result: complete match, selecting this opcode
// CHECK: AsmMatcher: found 2 encodings with mnemonic 'sha1rnds4'
// CHECK: Trying to match opcode SHA1RNDS4rri
// CHECK: Matching formal operand class MCK_ImmUnsignedi8 against actual operand at index 1 (Imm:1): match success using generic matcher
// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 2 (Reg:xmm1): match success using generic matcher
// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 3 (Reg:xmm2): match success using generic matcher
// CHECK: Matching formal operand class MCK_FR16 against actual operand at index 2 (Reg:xmm1): match success using generic matcher
// CHECK: Matching formal operand class MCK_FR16 against actual operand at index 3 (Reg:xmm2): match success using generic matcher
// CHECK: Matching formal operand class InvalidMatchClass against actual operand at index 4: actual operand index out of range Opcode result: complete match, selecting this opcode
// CHECK: AsmMatcher: found 4 encodings with mnemonic 'pinsrw'
// CHECK: Trying to match opcode MMX_PINSRWrr
Expand All @@ -24,7 +24,7 @@
// CHECK: Trying to match opcode PINSRWrr
// CHECK: Matching formal operand class MCK_ImmUnsignedi8 against actual operand at index 1 (Imm:3): match success using generic matcher
// CHECK: Matching formal operand class MCK_GR32orGR64 against actual operand at index 2 (Reg:ecx): match success using generic matcher
// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 3 (Reg:xmm5): match success using generic matcher
// CHECK: Matching formal operand class MCK_FR16 against actual operand at index 3 (Reg:xmm5): match success using generic matcher
// CHECK: Matching formal operand class InvalidMatchClass against actual operand at index 4: actual operand index out of range Opcode result: complete match, selecting this opcode
// CHECK: AsmMatcher: found 2 encodings with mnemonic 'crc32l'
// CHECK: Trying to match opcode CRC32r32r32
Expand Down