229 changes: 229 additions & 0 deletions llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s

declare half @llvm.minimum.f16(half, half)
declare half @llvm.maximum.f16(half, half)
declare <8 x half> @llvm.minimum.v8f16(<8 x half>, <8 x half>)
declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>)

define half @test_fminimum(half %x, half %y) {
; CHECK-LABEL: test_fminimum:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovw %xmm0, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: cmpl $32768, %eax # imm = 0x8000
; CHECK-NEXT: sete %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vmovaps %xmm0, %xmm2
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vcmpunordsh %xmm1, %xmm0, %k2
; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vminsh %xmm1, %xmm2, %xmm0
; CHECK-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k2}
; CHECK-NEXT: retq
%z = call half @llvm.minimum.f16(half %x, half %y)
ret half %z
}

define <8 x half> @test_fminimum_scalarize(<8 x half> %x, <8 x half> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
; CHECK-LABEL: test_fminimum_scalarize:
; CHECK: # %bb.0:
; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vminsh %xmm2, %xmm3, %xmm2
; CHECK-NEXT: vshufps {{.*#+}} xmm3 = xmm1[3,3,3,3]
; CHECK-NEXT: vshufps {{.*#+}} xmm4 = xmm0[3,3,3,3]
; CHECK-NEXT: vminsh %xmm3, %xmm4, %xmm3
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vpsrldq {{.*#+}} xmm4 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vminsh %xmm3, %xmm4, %xmm3
; CHECK-NEXT: vshufpd {{.*#+}} xmm4 = xmm1[1,0]
; CHECK-NEXT: vshufpd {{.*#+}} xmm5 = xmm0[1,0]
; CHECK-NEXT: vminsh %xmm4, %xmm5, %xmm4
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
; CHECK-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; CHECK-NEXT: vpsrlq $48, %xmm1, %xmm3
; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm4
; CHECK-NEXT: vminsh %xmm3, %xmm4, %xmm3
; CHECK-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
; CHECK-NEXT: vmovshdup {{.*#+}} xmm5 = xmm0[1,1,3,3]
; CHECK-NEXT: vminsh %xmm4, %xmm5, %xmm4
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm4
; CHECK-NEXT: vpsrld $16, %xmm1, %xmm1
; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0
; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; CHECK-NEXT: retq
%r = call <8 x half> @llvm.minimum.v8f16(<8 x half> %x, <8 x half> %y)
ret <8 x half> %r
}

define half @test_fminimum_nnan(half %x, half %y) "no-nans-fp-math"="true" {
; CHECK-LABEL: test_fminimum_nnan:
; CHECK: # %bb.0:
; CHECK-NEXT: vfpclasssh $5, %xmm1, %k1
; CHECK-NEXT: vmovaps %xmm0, %xmm2
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vminsh %xmm2, %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = tail call half @llvm.minimum.f16(half %x, half %y)
ret half %1
}

define half @test_fminimum_zero(half %x, half %y) {
; CHECK-LABEL: test_fminimum_zero:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; CHECK-NEXT: vcmpunordsh %xmm1, %xmm1, %k1
; CHECK-NEXT: vminsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
; CHECK-NEXT: vmovsh %xmm2, %xmm0, %xmm0 {%k1}
; CHECK-NEXT: retq
%1 = tail call half @llvm.minimum.f16(half -0.0, half %y)
ret half %1
}

define half @test_fminimum_nsz(half %x, half %y) {
; CHECK-LABEL: test_fminimum_nsz:
; CHECK: # %bb.0:
; CHECK-NEXT: vcmpunordsh %xmm1, %xmm0, %k1
; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
; CHECK-NEXT: retq
%1 = tail call nsz half @llvm.minimum.f16(half %x, half %y)
ret half %1
}

define half @test_fminimum_combine_cmps(half %x, half %y) {
; CHECK-LABEL: test_fminimum_combine_cmps:
; CHECK: # %bb.0:
; CHECK-NEXT: vdivsh %xmm0, %xmm1, %xmm1
; CHECK-NEXT: vfpclasssh $5, %xmm0, %k1
; CHECK-NEXT: vmovaps %xmm1, %xmm2
; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
; CHECK-NEXT: vminsh %xmm2, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fdiv nnan half %y, %x
%2 = tail call half @llvm.minimum.f16(half %x, half %1)
ret half %2
}

define half @test_fmaximum(half %x, half %y) {
; CHECK-LABEL: test_fmaximum:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovw %xmm0, %eax
; CHECK-NEXT: testw %ax, %ax
; CHECK-NEXT: sete %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vmovaps %xmm0, %xmm2
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vcmpunordsh %xmm1, %xmm0, %k2
; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmaxsh %xmm1, %xmm2, %xmm0
; CHECK-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k2}
; CHECK-NEXT: retq
%r = call half @llvm.maximum.f16(half %x, half %y)
ret half %r
}

define <8 x half> @test_fmaximum_scalarize(<8 x half> %x, <8 x half> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
; CHECK-LABEL: test_fmaximum_scalarize:
; CHECK: # %bb.0:
; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vmaxsh %xmm2, %xmm3, %xmm2
; CHECK-NEXT: vshufps {{.*#+}} xmm3 = xmm1[3,3,3,3]
; CHECK-NEXT: vshufps {{.*#+}} xmm4 = xmm0[3,3,3,3]
; CHECK-NEXT: vmaxsh %xmm3, %xmm4, %xmm3
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vpsrldq {{.*#+}} xmm4 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vmaxsh %xmm3, %xmm4, %xmm3
; CHECK-NEXT: vshufpd {{.*#+}} xmm4 = xmm1[1,0]
; CHECK-NEXT: vshufpd {{.*#+}} xmm5 = xmm0[1,0]
; CHECK-NEXT: vmaxsh %xmm4, %xmm5, %xmm4
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
; CHECK-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; CHECK-NEXT: vpsrlq $48, %xmm1, %xmm3
; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm4
; CHECK-NEXT: vmaxsh %xmm3, %xmm4, %xmm3
; CHECK-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
; CHECK-NEXT: vmovshdup {{.*#+}} xmm5 = xmm0[1,1,3,3]
; CHECK-NEXT: vmaxsh %xmm4, %xmm5, %xmm4
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm4
; CHECK-NEXT: vpsrld $16, %xmm1, %xmm1
; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0
; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; CHECK-NEXT: retq
%r = call <8 x half> @llvm.maximum.v8f16(<8 x half> %x, <8 x half> %y)
ret <8 x half> %r
}

define half @test_fmaximum_nnan(half %x, half %y) {
; CHECK-LABEL: test_fmaximum_nnan:
; CHECK: # %bb.0:
; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm2
; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vfpclasssh $3, %xmm0, %k1
; CHECK-NEXT: vmovaps %xmm2, %xmm1
; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovsh %xmm2, %xmm0, %xmm0 {%k1}
; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fadd nnan half %x, %y
%2 = fsub nnan half %x, %y
%3 = tail call half @llvm.maximum.f16(half %1, half %2)
ret half %3
}

define half @test_fmaximum_zero(half %x, half %y) {
; CHECK-LABEL: test_fmaximum_zero:
; CHECK: # %bb.0:
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vmaxsh %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; CHECK-NEXT: vcmpunordsh %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovsh %xmm2, %xmm0, %xmm0 {%k1}
; CHECK-NEXT: retq
%1 = tail call half @llvm.maximum.f16(half 0.0, half %y)
ret half %1
}

define half @test_fmaximum_nsz(half %x, half %y) "no-signed-zeros-fp-math"="true" {
; CHECK-LABEL: test_fmaximum_nsz:
; CHECK: # %bb.0:
; CHECK-NEXT: vcmpunordsh %xmm1, %xmm0, %k1
; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
; CHECK-NEXT: retq
%1 = tail call half @llvm.maximum.f16(half %x, half %y)
ret half %1
}

define half @test_fmaximum_combine_cmps(half %x, half %y) {
; CHECK-LABEL: test_fmaximum_combine_cmps:
; CHECK: # %bb.0:
; CHECK-NEXT: vdivsh %xmm0, %xmm1, %xmm1
; CHECK-NEXT: vfpclasssh $3, %xmm0, %k1
; CHECK-NEXT: vmovaps %xmm1, %xmm2
; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
; CHECK-NEXT: vmaxsh %xmm2, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fdiv nnan half %y, %x
%2 = tail call half @llvm.maximum.f16(half %x, half %1)
ret half %2
}
44 changes: 34 additions & 10 deletions llvm/test/CodeGen/X86/extract-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -105,17 +105,41 @@ define double @ext_minnum_v2f64(<2 x double> %x) nounwind {
ret double %r
}

;define double @ext_maximum_v4f64(<2 x double> %x) nounwind {
; %v = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> <double 42.0, double 43.0>)
; %r = extractelement <2 x double> %v, i32 1
; ret double %r
;}
define double @ext_maximum_v4f64(<2 x double> %x) nounwind {
; CHECK-LABEL: ext_maximum_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: maxsd %xmm0, %xmm1
; CHECK-NEXT: cmpunordsd %xmm0, %xmm0
; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
; CHECK-NEXT: andpd %xmm0, %xmm2
; CHECK-NEXT: andnpd %xmm1, %xmm0
; CHECK-NEXT: orpd %xmm2, %xmm0
; CHECK-NEXT: retq
%v = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> <double 42.0, double 43.0>)
%r = extractelement <2 x double> %v, i32 1
ret double %r
}

;define float @ext_minimum_v4f32(<4 x float> %x) nounwind {
; %v = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> <float 0.0, float 1.0, float 2.0, float 42.0>)
; %r = extractelement <4 x float> %v, i32 1
; ret float %r
;}
define float @ext_minimum_v4f32(<4 x float> %x) nounwind {
; CHECK-LABEL: ext_minimum_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: minss %xmm0, %xmm1
; CHECK-NEXT: cmpunordss %xmm0, %xmm0
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; CHECK-NEXT: andps %xmm0, %xmm2
; CHECK-NEXT: andnps %xmm1, %xmm0
; CHECK-NEXT: orps %xmm2, %xmm0
; CHECK-NEXT: retq
%v = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> <float 0.0, float 1.0, float 2.0, float 42.0>)
%r = extractelement <4 x float> %v, i32 1
ret float %r
}

declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)
declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
216 changes: 196 additions & 20 deletions llvm/test/CodeGen/X86/extractelement-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -672,29 +672,205 @@ define double @fminnum_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
ret double %r
}

;define float @fmaximum_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
; %v = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
; %r = extractelement <4 x float> %v, i32 0
; ret float %r
;}
define float @fmaximum_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
; X64-LABEL: fmaximum_v4f32:
; X64: # %bb.0:
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: testl %eax, %eax
; X64-NEXT: je .LBB30_1
; X64-NEXT: # %bb.2:
; X64-NEXT: vmovdqa %xmm1, %xmm2
; X64-NEXT: vmovdqa %xmm0, %xmm3
; X64-NEXT: jmp .LBB30_3
; X64-NEXT: .LBB30_1:
; X64-NEXT: vmovdqa %xmm0, %xmm2
; X64-NEXT: vmovdqa %xmm1, %xmm3
; X64-NEXT: .LBB30_3:
; X64-NEXT: vmaxss %xmm2, %xmm3, %xmm2
; X64-NEXT: vcmpunordss %xmm1, %xmm0, %xmm0
; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
; X64-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: fmaximum_v4f32:
; X86: # %bb.0:
; X86-NEXT: vmovd %xmm0, %eax
; X86-NEXT: testl %eax, %eax
; X86-NEXT: je .LBB30_1
; X86-NEXT: # %bb.2:
; X86-NEXT: vmovdqa %xmm1, %xmm2
; X86-NEXT: vmovdqa %xmm0, %xmm3
; X86-NEXT: jmp .LBB30_3
; X86-NEXT: .LBB30_1:
; X86-NEXT: vmovdqa %xmm0, %xmm2
; X86-NEXT: vmovdqa %xmm1, %xmm3
; X86-NEXT: .LBB30_3:
; X86-NEXT: pushl %eax
; X86-NEXT: vmaxss %xmm2, %xmm3, %xmm2
; X86-NEXT: vcmpunordss %xmm1, %xmm0, %xmm0
; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
; X86-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
; X86-NEXT: vmovss %xmm0, (%esp)
; X86-NEXT: flds (%esp)
; X86-NEXT: popl %eax
; X86-NEXT: retl
%v = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
%r = extractelement <4 x float> %v, i32 0
ret float %r
}

;define double @fmaximum_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
; %v = call <4 x double> @llvm.maximum.v4f64(<4 x double> %x, <4 x double> %y)
; %r = extractelement <4 x double> %v, i32 0
; ret double %r
;}
define double @fmaximum_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
; X64-LABEL: fmaximum_v4f64:
; X64: # %bb.0:
; X64-NEXT: vmovq %xmm0, %rax
; X64-NEXT: testq %rax, %rax
; X64-NEXT: je .LBB31_1
; X64-NEXT: # %bb.2:
; X64-NEXT: vmovdqa %xmm1, %xmm2
; X64-NEXT: vmovdqa %xmm0, %xmm3
; X64-NEXT: jmp .LBB31_3
; X64-NEXT: .LBB31_1:
; X64-NEXT: vmovdqa %xmm0, %xmm2
; X64-NEXT: vmovdqa %xmm1, %xmm3
; X64-NEXT: .LBB31_3:
; X64-NEXT: vmaxsd %xmm2, %xmm3, %xmm2
; X64-NEXT: vcmpunordsd %xmm1, %xmm0, %xmm0
; X64-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
;
; X86-LABEL: fmaximum_v4f64:
; X86: # %bb.0:
; X86-NEXT: vpextrd $1, %xmm0, %eax
; X86-NEXT: vmovd %xmm0, %ecx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: je .LBB31_1
; X86-NEXT: # %bb.2:
; X86-NEXT: vmovdqa %xmm1, %xmm2
; X86-NEXT: vmovdqa %xmm0, %xmm3
; X86-NEXT: jmp .LBB31_3
; X86-NEXT: .LBB31_1:
; X86-NEXT: vmovdqa %xmm0, %xmm2
; X86-NEXT: vmovdqa %xmm1, %xmm3
; X86-NEXT: .LBB31_3:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
; X86-NEXT: vmaxsd %xmm2, %xmm3, %xmm2
; X86-NEXT: vcmpunordsd %xmm1, %xmm0, %xmm0
; X86-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2, %xmm0
; X86-NEXT: vmovlpd %xmm0, (%esp)
; X86-NEXT: fldl (%esp)
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: vzeroupper
; X86-NEXT: retl
%v = call <4 x double> @llvm.maximum.v4f64(<4 x double> %x, <4 x double> %y)
%r = extractelement <4 x double> %v, i32 0
ret double %r
}

;define float @fminimum_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
; %v = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> %y)
; %r = extractelement <4 x float> %v, i32 0
; ret float %r
;}
define float @fminimum_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
; X64-LABEL: fminimum_v4f32:
; X64: # %bb.0:
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: cmpl $-2147483648, %eax # imm = 0x80000000
; X64-NEXT: je .LBB32_1
; X64-NEXT: # %bb.2:
; X64-NEXT: vmovdqa %xmm1, %xmm2
; X64-NEXT: vmovdqa %xmm0, %xmm3
; X64-NEXT: jmp .LBB32_3
; X64-NEXT: .LBB32_1:
; X64-NEXT: vmovdqa %xmm0, %xmm2
; X64-NEXT: vmovdqa %xmm1, %xmm3
; X64-NEXT: .LBB32_3:
; X64-NEXT: vminss %xmm2, %xmm3, %xmm2
; X64-NEXT: vcmpunordss %xmm1, %xmm0, %xmm0
; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
; X64-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: fminimum_v4f32:
; X86: # %bb.0:
; X86-NEXT: vmovd %xmm0, %eax
; X86-NEXT: cmpl $-2147483648, %eax # imm = 0x80000000
; X86-NEXT: je .LBB32_1
; X86-NEXT: # %bb.2:
; X86-NEXT: vmovdqa %xmm1, %xmm2
; X86-NEXT: vmovdqa %xmm0, %xmm3
; X86-NEXT: jmp .LBB32_3
; X86-NEXT: .LBB32_1:
; X86-NEXT: vmovdqa %xmm0, %xmm2
; X86-NEXT: vmovdqa %xmm1, %xmm3
; X86-NEXT: .LBB32_3:
; X86-NEXT: pushl %eax
; X86-NEXT: vminss %xmm2, %xmm3, %xmm2
; X86-NEXT: vcmpunordss %xmm1, %xmm0, %xmm0
; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
; X86-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
; X86-NEXT: vmovss %xmm0, (%esp)
; X86-NEXT: flds (%esp)
; X86-NEXT: popl %eax
; X86-NEXT: retl
%v = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> %y)
%r = extractelement <4 x float> %v, i32 0
ret float %r
}

;define double @fminimum_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
; %v = call <4 x double> @llvm.minimum.v4f64(<4 x double> %x, <4 x double> %y)
; %r = extractelement <4 x double> %v, i32 0
; ret double %r
;}
define double @fminimum_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
; X64-LABEL: fminimum_v4f64:
; X64: # %bb.0:
; X64-NEXT: vmovq %xmm0, %rax
; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: je .LBB33_1
; X64-NEXT: # %bb.2:
; X64-NEXT: vmovdqa %xmm1, %xmm2
; X64-NEXT: vmovdqa %xmm0, %xmm3
; X64-NEXT: jmp .LBB33_3
; X64-NEXT: .LBB33_1:
; X64-NEXT: vmovdqa %xmm0, %xmm2
; X64-NEXT: vmovdqa %xmm1, %xmm3
; X64-NEXT: .LBB33_3:
; X64-NEXT: vminsd %xmm2, %xmm3, %xmm2
; X64-NEXT: vcmpunordsd %xmm1, %xmm0, %xmm0
; X64-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
;
; X86-LABEL: fminimum_v4f64:
; X86: # %bb.0:
; X86-NEXT: vmovd %xmm0, %eax
; X86-NEXT: vpextrd $1, %xmm0, %ecx
; X86-NEXT: addl $-2147483648, %ecx # imm = 0x80000000
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: je .LBB33_1
; X86-NEXT: # %bb.2:
; X86-NEXT: vmovdqa %xmm1, %xmm2
; X86-NEXT: vmovdqa %xmm0, %xmm3
; X86-NEXT: jmp .LBB33_3
; X86-NEXT: .LBB33_1:
; X86-NEXT: vmovdqa %xmm0, %xmm2
; X86-NEXT: vmovdqa %xmm1, %xmm3
; X86-NEXT: .LBB33_3:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
; X86-NEXT: vminsd %xmm2, %xmm3, %xmm2
; X86-NEXT: vcmpunordsd %xmm1, %xmm0, %xmm0
; X86-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2, %xmm0
; X86-NEXT: vmovlpd %xmm0, (%esp)
; X86-NEXT: fldl (%esp)
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: vzeroupper
; X86-NEXT: retl
%v = call <4 x double> @llvm.minimum.v4f64(<4 x double> %x, <4 x double> %y)
%r = extractelement <4 x double> %v, i32 0
ret double %r
}

define float @maxps_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
; X64-LABEL: maxps_v4f32:
Expand Down
1,024 changes: 1,024 additions & 0 deletions llvm/test/CodeGen/X86/fminimum-fmaximum.ll

Large diffs are not rendered by default.

63 changes: 11 additions & 52 deletions llvm/test/CodeGen/X86/half.ll
Original file line number Diff line number Diff line change
Expand Up @@ -954,11 +954,10 @@ define half @PR40273(half) #0 {
ret half %3
}

define dso_local void @brcond(half %0) {
define void @brcond(half %0) #0 {
; CHECK-LIBCALL-LABEL: brcond:
; CHECK-LIBCALL: # %bb.0: # %entry
; CHECK-LIBCALL-NEXT: pushq %rax
; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 16
; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
; CHECK-LIBCALL-NEXT: xorps %xmm1, %xmm1
; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0
Expand All @@ -968,7 +967,6 @@ define dso_local void @brcond(half %0) {
; CHECK-LIBCALL-NEXT: jne .LBB18_2
; CHECK-LIBCALL-NEXT: # %bb.1: # %if.then
; CHECK-LIBCALL-NEXT: popq %rax
; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 8
; CHECK-LIBCALL-NEXT: retq
; CHECK-LIBCALL-NEXT: .LBB18_2: # %if.end
;
Expand All @@ -991,7 +989,6 @@ define dso_local void @brcond(half %0) {
; CHECK-I686-LABEL: brcond:
; CHECK-I686: # %bb.0: # %entry
; CHECK-I686-NEXT: subl $12, %esp
; CHECK-I686-NEXT: .cfi_def_cfa_offset 16
; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
; CHECK-I686-NEXT: movw %ax, (%esp)
Expand All @@ -1006,7 +1003,6 @@ define dso_local void @brcond(half %0) {
; CHECK-I686-NEXT: jne .LBB18_2
; CHECK-I686-NEXT: # %bb.1: # %if.then
; CHECK-I686-NEXT: addl $12, %esp
; CHECK-I686-NEXT: .cfi_def_cfa_offset 4
; CHECK-I686-NEXT: retl
; CHECK-I686-NEXT: .LBB18_2: # %if.end
entry:
Expand All @@ -1020,16 +1016,14 @@ if.end: ; preds = %entry
unreachable
}

define half @test_sqrt(half %0) {
define half @test_sqrt(half %0) #0 {
; CHECK-LIBCALL-LABEL: test_sqrt:
; CHECK-LIBCALL: # %bb.0: # %entry
; CHECK-LIBCALL-NEXT: pushq %rax
; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 16
; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
; CHECK-LIBCALL-NEXT: sqrtss %xmm0, %xmm0
; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
; CHECK-LIBCALL-NEXT: popq %rax
; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 8
; CHECK-LIBCALL-NEXT: retq
;
; BWON-F16C-LABEL: test_sqrt:
Expand All @@ -1047,7 +1041,6 @@ define half @test_sqrt(half %0) {
; CHECK-I686-LABEL: test_sqrt:
; CHECK-I686: # %bb.0: # %entry
; CHECK-I686-NEXT: subl $12, %esp
; CHECK-I686-NEXT: .cfi_def_cfa_offset 16
; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
; CHECK-I686-NEXT: movw %ax, (%esp)
Expand All @@ -1058,7 +1051,6 @@ define half @test_sqrt(half %0) {
; CHECK-I686-NEXT: movss %xmm0, (%esp)
; CHECK-I686-NEXT: calll __truncsfhf2
; CHECK-I686-NEXT: addl $12, %esp
; CHECK-I686-NEXT: .cfi_def_cfa_offset 4
; CHECK-I686-NEXT: retl
entry:
%1 = call half @llvm.sqrt.f16(half %0)
Expand All @@ -1067,7 +1059,7 @@ entry:

declare half @llvm.sqrt.f16(half)

define void @main.158() local_unnamed_addr #0 {
define void @main.158() #0 {
; CHECK-LIBCALL-LABEL: main.158:
; CHECK-LIBCALL: # %bb.0: # %entry
; CHECK-LIBCALL-NEXT: pushq %rax
Expand Down Expand Up @@ -1143,23 +1135,14 @@ entry:
ret void
}

define void @main.45() local_unnamed_addr {
define void @main.45() #0 {
; CHECK-LIBCALL-LABEL: main.45:
; CHECK-LIBCALL: # %bb.0: # %entry
; CHECK-LIBCALL-NEXT: pushq %rbp
; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 16
; CHECK-LIBCALL-NEXT: pushq %r15
; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 24
; CHECK-LIBCALL-NEXT: pushq %r14
; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 32
; CHECK-LIBCALL-NEXT: pushq %rbx
; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 40
; CHECK-LIBCALL-NEXT: pushq %rax
; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 48
; CHECK-LIBCALL-NEXT: .cfi_offset %rbx, -40
; CHECK-LIBCALL-NEXT: .cfi_offset %r14, -32
; CHECK-LIBCALL-NEXT: .cfi_offset %r15, -24
; CHECK-LIBCALL-NEXT: .cfi_offset %rbp, -16
; CHECK-LIBCALL-NEXT: pinsrw $0, (%rax), %xmm0
; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax
; CHECK-LIBCALL-NEXT: movd %eax, %xmm1
Expand All @@ -1183,15 +1166,10 @@ define void @main.45() local_unnamed_addr {
; CHECK-LIBCALL-NEXT: movw %r15w, (%rax)
; CHECK-LIBCALL-NEXT: movw %bp, (%rax)
; CHECK-LIBCALL-NEXT: addq $8, %rsp
; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 40
; CHECK-LIBCALL-NEXT: popq %rbx
; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 32
; CHECK-LIBCALL-NEXT: popq %r14
; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 24
; CHECK-LIBCALL-NEXT: popq %r15
; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 16
; CHECK-LIBCALL-NEXT: popq %rbp
; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 8
; CHECK-LIBCALL-NEXT: retq
;
; BWON-F16C-LABEL: main.45:
Expand All @@ -1213,13 +1191,8 @@ define void @main.45() local_unnamed_addr {
; CHECK-I686-LABEL: main.45:
; CHECK-I686: # %bb.0: # %entry
; CHECK-I686-NEXT: pushl %edi
; CHECK-I686-NEXT: .cfi_def_cfa_offset 8
; CHECK-I686-NEXT: pushl %esi
; CHECK-I686-NEXT: .cfi_def_cfa_offset 12
; CHECK-I686-NEXT: subl $20, %esp
; CHECK-I686-NEXT: .cfi_def_cfa_offset 32
; CHECK-I686-NEXT: .cfi_offset %esi, -12
; CHECK-I686-NEXT: .cfi_offset %edi, -8
; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0
; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
; CHECK-I686-NEXT: movd %eax, %xmm0
Expand All @@ -1238,11 +1211,8 @@ define void @main.45() local_unnamed_addr {
; CHECK-I686-NEXT: movw %di, (%eax)
; CHECK-I686-NEXT: movw %si, (%eax)
; CHECK-I686-NEXT: addl $20, %esp
; CHECK-I686-NEXT: .cfi_def_cfa_offset 12
; CHECK-I686-NEXT: popl %esi
; CHECK-I686-NEXT: .cfi_def_cfa_offset 8
; CHECK-I686-NEXT: popl %edi
; CHECK-I686-NEXT: .cfi_def_cfa_offset 4
; CHECK-I686-NEXT: retl
entry:
%0 = load half, ptr undef, align 8
Expand Down Expand Up @@ -1360,22 +1330,17 @@ declare half @llvm.minnum.f16(half, half)
define half @pr61271(half %0, half %1) #0 {
; CHECK-LIBCALL-LABEL: pr61271:
; CHECK-LIBCALL: # %bb.0:
; CHECK-LIBCALL-NEXT: subq $40, %rsp
; CHECK-LIBCALL-NEXT: pushq %rax
; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-LIBCALL-NEXT: movaps %xmm1, %xmm0
; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-LIBCALL-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT
; CHECK-LIBCALL-NEXT: movaps %xmm0, %xmm1
; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
; CHECK-LIBCALL-NEXT: cmpltss %xmm2, %xmm1
; CHECK-LIBCALL-NEXT: andps %xmm1, %xmm0
; CHECK-LIBCALL-NEXT: andnps %xmm2, %xmm1
; CHECK-LIBCALL-NEXT: orps %xmm1, %xmm0
; CHECK-LIBCALL-NEXT: minss (%rsp), %xmm0 # 4-byte Folded Reload
; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT
; CHECK-LIBCALL-NEXT: addq $40, %rsp
; CHECK-LIBCALL-NEXT: popq %rax
; CHECK-LIBCALL-NEXT: retq
;
; BWON-F16C-LABEL: pr61271:
Expand All @@ -1388,8 +1353,7 @@ define half @pr61271(half %0, half %1) #0 {
; BWON-F16C-NEXT: movzwl %ax, %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm1
; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
; BWON-F16C-NEXT: vcmpltss %xmm0, %xmm1, %xmm2
; BWON-F16C-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
; BWON-F16C-NEXT: vminss %xmm0, %xmm1, %xmm0
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; BWON-F16C-NEXT: vmovd %xmm0, %eax
; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
Expand All @@ -1411,13 +1375,8 @@ define half @pr61271(half %0, half %1) #0 {
; CHECK-I686-NEXT: calll __extendhfsf2
; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-I686-NEXT: movaps %xmm1, %xmm2
; CHECK-I686-NEXT: cmpltss %xmm0, %xmm2
; CHECK-I686-NEXT: andps %xmm2, %xmm1
; CHECK-I686-NEXT: andnps %xmm0, %xmm2
; CHECK-I686-NEXT: orps %xmm1, %xmm2
; CHECK-I686-NEXT: movss %xmm2, (%esp)
; CHECK-I686-NEXT: minss {{[0-9]+}}(%esp), %xmm0
; CHECK-I686-NEXT: movss %xmm0, (%esp)
; CHECK-I686-NEXT: calll __truncsfhf2
; CHECK-I686-NEXT: addl $44, %esp
; CHECK-I686-NEXT: retl
Expand Down