diff --git a/llvm/lib/Target/X86/README-SSE.txt b/llvm/lib/Target/X86/README-SSE.txt index d52840e5c4896..c0c1bb66ebaba 100644 --- a/llvm/lib/Target/X86/README-SSE.txt +++ b/llvm/lib/Target/X86/README-SSE.txt @@ -680,37 +680,6 @@ _t: shufps $132, %xmm2, %xmm0 movaps %xmm0, 0 -//===---------------------------------------------------------------------===// -rdar://5907648 - -This function: - -float foo(unsigned char x) { - return x; -} - -compiles to (x86-32): - -define float @foo(i8 zeroext %x) nounwind { - %tmp12 = uitofp i8 %x to float ; [#uses=1] - ret float %tmp12 -} - -compiles to: - -_foo: - subl $4, %esp - movzbl 8(%esp), %eax - cvtsi2ss %eax, %xmm0 - movss %xmm0, (%esp) - flds (%esp) - addl $4, %esp - ret - -We should be able to use: - cvtsi2ss 8($esp), %xmm0 -since we know the stack slot is already zext'd. - //===---------------------------------------------------------------------===// Consider using movlps instead of movsd to implement (scalar_to_vector (loadf64)) diff --git a/llvm/test/CodeGen/X86/int8-to-fp.ll b/llvm/test/CodeGen/X86/int8-to-fp.ll new file mode 100644 index 0000000000000..72c3e94832d91 --- /dev/null +++ b/llvm/test/CodeGen/X86/int8-to-fp.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-apple-macosx -mattr=+sse2 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse2 | FileCheck %s --check-prefix=X64 + +; We get this right for x86-64, but on x86-32 the code is less optimal. +; See: https://github.com/llvm/llvm-project/issues/64174 +define noundef float @i8_to_fp(i8 noundef zeroext %0) { +; X86-LABEL: i8_to_fp: +; X86: ## %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cvtsi2ss %eax, %xmm0 +; X86-NEXT: movss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl +; +; X64-LABEL: i8_to_fp: +; X64: ## %bb.0: +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq + %2 = uitofp i8 %0 to float + ret float %2 +}