Skip to content

Commit

Permalink
[X86][MMX] Add tests showing missed opportunities to use MMX sitofp c…
Browse files Browse the repository at this point in the history
…onversions

If we are transferring MMX registers to XMM for conversion we could use the MMX equivalents (CVTPI2PD + CVTPI2PS) without affecting rounding/exceptions etc.

llvm-svn: 297481
  • Loading branch information
RKSimon committed Mar 10, 2017
1 parent de8d659 commit ed655f0
Showing 1 changed file with 76 additions and 1 deletion.
77 changes: 76 additions & 1 deletion llvm/test/CodeGen/X86/mmx-cvt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X64

; FIXME: If we are transferring XMM conversion results to MMX registers we could use the MMX equivalents
; (CVTPD2PI/CVTTPD2PI + CVTPS2PI/CVTTPS2PI) with affecting rounding/expections etc.
; (CVTPD2PI/CVTTPD2PI + CVTPS2PI/CVTTPS2PI) without affecting rounding/exceptions etc.

define void @cvt_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind {
; X86-LABEL: cvt_v2f64_v2i32:
Expand Down Expand Up @@ -248,8 +248,83 @@ define void @fptosi_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind {
ret void
}

; FIXME: If we are transferring MMX conversion results to XMM registers we could use the MMX equivalents
; (CVTPI2PD + CVTPI2PS) without affecting rounding/exceptions etc.

define <2 x double> @sitofp_v2i32_v2f64(<1 x i64>*) nounwind {
; X86-LABEL: sitofp_v2i32_v2f64:
; X86: # BB#0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movq (%eax), %mm0
; X86-NEXT: paddd %mm0, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT: cvtdq2pd %xmm0, %xmm0
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: sitofp_v2i32_v2f64:
; X64: # BB#0:
; X64-NEXT: movq (%rdi), %mm0
; X64-NEXT: paddd %mm0, %mm0
; X64-NEXT: movq2dq %mm0, %xmm0
; X64-NEXT: cvtdq2pd %xmm0, %xmm0
; X64-NEXT: retq
%2 = bitcast <1 x i64>* %0 to x86_mmx*
%3 = load x86_mmx, x86_mmx* %2, align 8
%4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
%5 = bitcast x86_mmx %4 to i64
%6 = insertelement <2 x i64> undef, i64 %5, i32 0
%7 = bitcast <2 x i64> %6 to <4 x i32>
%8 = shufflevector <4 x i32> %7, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
%9 = sitofp <2 x i32> %8 to <2 x double>
ret <2 x double> %9
}

define <4 x float> @sitofp_v2i32_v2f32(<1 x i64>*) nounwind {
; X86-LABEL: sitofp_v2i32_v2f32:
; X86: # BB#0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movq (%eax), %mm0
; X86-NEXT: paddd %mm0, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT: cvtdq2ps %xmm0, %xmm0
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: sitofp_v2i32_v2f32:
; X64: # BB#0:
; X64-NEXT: movq (%rdi), %mm0
; X64-NEXT: paddd %mm0, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: movd %rax, %xmm0
; X64-NEXT: cvtdq2ps %xmm0, %xmm0
; X64-NEXT: retq
%2 = bitcast <1 x i64>* %0 to x86_mmx*
%3 = load x86_mmx, x86_mmx* %2, align 8
%4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
%5 = bitcast x86_mmx %4 to i64
%6 = insertelement <2 x i64> undef, i64 %5, i32 0
%7 = insertelement <2 x i64> %6, i64 0, i32 1
%8 = bitcast <2 x i64> %7 to <4 x i32>
%9 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %8)
ret <4 x float> %9
}

declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>)
declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>)
declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>)
declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)
declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>)

0 comments on commit ed655f0

Please sign in to comment.