Skip to content

Commit

Permalink
[X86] Allow v2i32->v2f32 strict and non-strict uint_to_fp to be widen…
Browse files Browse the repository at this point in the history
…ed to v4i32->v4f32 under avx512.

With avx512vl we get v4i32->v4f32 uint_to_fp instructions. With
avx512f we get v16i32->v16f32 instructions which we can use to
emulate v4i32->v4f32.
  • Loading branch information
topperc committed Dec 27, 2019
1 parent 931946b commit fca4736
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 25 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -29012,7 +29012,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
if (SrcVT != MVT::v2i32)
return;

if (IsSigned) {
if (IsSigned || Subtarget.hasAVX512()) {
if (!IsStrict)
return;

Expand Down
44 changes: 36 additions & 8 deletions llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
Expand Up @@ -64,14 +64,42 @@ define <2 x float> @uitofp_v2i32_v2f32(<2 x i32> %x) #0 {
; SSE-NEXT: cvtpd2ps %xmm0, %xmm0
; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: uitofp_v2i32_v2f32:
; AVX: # %bb.0:
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vsubpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vcvtpd2ps %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
; AVX1-LABEL: uitofp_v2i32_v2f32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0
; AVX1-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: uitofp_v2i32_v2f32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: uitofp_v2i32_v2f32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
; AVX512DQ-LABEL: uitofp_v2i32_v2f32:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: uitofp_v2i32_v2f32:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQVL-NEXT: vcvtudq2ps %xmm0, %xmm0
; AVX512DQVL-NEXT: ret{{[l|q]}}
%result = call <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
Expand Down
42 changes: 34 additions & 8 deletions llvm/test/CodeGen/X86/vec_int_to_fp.ll
Expand Up @@ -52,14 +52,40 @@ define <2 x float> @uitofp_2i32_to_2f32(<2 x i32> %a) {
; SSE41-NEXT: cvtpd2ps %xmm0, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: uitofp_2i32_to_2f32:
; AVX: # %bb.0:
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vsubpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vcvtpd2ps %xmm0, %xmm0
; AVX-NEXT: retq
; VEX-LABEL: uitofp_2i32_to_2f32:
; VEX: # %bb.0:
; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; VEX-NEXT: vpor %xmm1, %xmm0, %xmm0
; VEX-NEXT: vsubpd %xmm1, %xmm0, %xmm0
; VEX-NEXT: vcvtpd2ps %xmm0, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: uitofp_2i32_to_2f32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_2i32_to_2f32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_2i32_to_2f32:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512VLDQ-LABEL: uitofp_2i32_to_2f32:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvtudq2ps %xmm0, %xmm0
; AVX512VLDQ-NEXT: retq
%cvt = uitofp <2 x i32> %a to <2 x float>
ret <2 x float> %cvt
}
Expand Down
24 changes: 16 additions & 8 deletions llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
Expand Up @@ -6828,14 +6828,22 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i32(<2 x i32> %x) #0 {
; CHECK-NEXT: cvtpd2ps %xmm0, %xmm0
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_uitofp_v2f32_v2i32:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vsubpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vcvtpd2ps %xmm0, %xmm0
; AVX-NEXT: retq
; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i32:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_uitofp_v2f32_v2i32:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
entry:
%result = call <2 x float>
@llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32> %x,
Expand Down

0 comments on commit fca4736

Please sign in to comment.