-
Notifications
You must be signed in to change notification settings - Fork 15.1k
Description
Looks like a problem in offical build CI configuration.
I am reporting an ABI incompatibility issue with the __extendhfsf2 function in the compiler-rt library shipped with the official LLVM 21 win64 build.
When compiling C++ code that uses the _Float16 type, Clang correctly generates code that passes the _Float16 argument to __extendhfsf2 via the xmm0 register, following the modern ABI. (See #56854)
However, the implementation of __extendhfsf2 within the pre-built clang_rt.builtins-x86_64.lib included in the official release expects the argument to be passed in the ecx register, which is the legacy ABI. This mismatch leads to incorrect floating-point conversion results at runtime.
Debugging the call to __extendhfsf2 confirms that the caller places the _Float16 value in xmm0, but the callee reads from ecx, resulting in garbage data being converted.
I have confirmed that this issue does not occur if I build compiler-rt from the llvm-project source (any version from llvm 18) myself using latest clang-cl on Windows with CMake. During my own CMake configuration process, the check COMPILER_RT_HAS_x86_64_FLOAT16 reported Success. The resulting self-built library works correctly.
; my wrapper function
00007FF6E7845A90 sub rsp,38h
00007FF6E7845A94 mov dword ptr [rsp+34h],r8d
00007FF6E7845A99 mov qword ptr [rsp+28h],rdx
00007FF6E7845A9E pextrw eax,xmm0,0
00007FF6E7845AA3 mov word ptr [rsp+26h],ax
00007FF6E7845AA8 pinsrw xmm0,word ptr [x],0
00007FF6E7845AAF call __extendhfsf2 (07FF6E7851AD0h)
00007FF6E7845AB4 nop
00007FF6E7845AB5 add rsp,38h
00007FF6E7845AB9 ret
; begining of __extendhfsf2
00007FF6E7851AD0 mov eax,ecx
00007FF6E7851AD2 mov edx,eax
00007FF6E7851AD4 shr edx,0Ah
00007FF6E7851AD7 and edx,1Fh
00007FF6E7851ADA mov ecx,eax
00007FF6E7851ADC and ecx,3FFh
00007FF6E7851AE2 test edx,edx
00007FF6E7851AE4 je __extendhfsf2+29h (07FF6E7851AF9h)
00007FF6E7851AE6 cmp edx,1Fh
00007FF6E7851AE9 jne __extendhfsf2+4Dh (07FF6E7851B1Dh)
00007FF6E7851AEB movzx edx,cx
00007FF6E7851AEE shl edx,0Dh
00007FF6E7851AF1 mov r8d,0FFh
00007FF6E7851AF7 jmp __extendhfsf2+62h (07FF6E7851B32h)
00007FF6E7851AF9 test cx,cx
00007FF6E7851AFC je __extendhfsf2+5Dh (07FF6E7851B2Dh)
00007FF6E7851AFE movzx edx,cx
00007FF6E7851B01 bsr ecx,edx
00007FF6E7851B04 xor ecx,1Fh
00007FF6E7851B07 mov r8d,86h
00007FF6E7851B0D sub r8d,ecx
00007FF6E7851B10 add cl,0F8h
00007FF6E7851B13 shl edx,cl
00007FF6E7851B15 xor edx,800000h
00007FF6E7851B1B jmp __extendhfsf2+62h (07FF6E7851B32h)
00007FF6E7851B1D movzx r8d,dx
00007FF6E7851B21 add r8d,70h
00007FF6E7851B25 movzx edx,cx
00007FF6E7851B28 shl edx,0Dh
00007FF6E7851B2B jmp __extendhfsf2+62h (07FF6E7851B32h)
00007FF6E7851B2D xor edx,edx
00007FF6E7851B2F xor r8d,r8d
00007FF6E7851B32 shl eax,10h
00007FF6E7851B35 and eax,80000000h
00007FF6E7851B3A shl r8d,17h
00007FF6E7851B3E or r8d,eax
00007FF6E7851B41 or r8d,edx
00007FF6E7851B44 movd xmm0,r8d
00007FF6E7851B49 ret