Skip to content

Commit

Permalink
[X86ISelLowering] avoid emitting libcalls to __mulodi4()
Browse files Browse the repository at this point in the history
Similar to D108842, D108844, and D108926.

__has_builtin(builtin_mul_overflow) returns true for 32b x86 targets,
but Clang is deferring to compiler RT when encountering long long types.
This breaks ARCH=i386 + CONFIG_BLK_DEV_NBD=y builds of the Linux kernel
that are using builtin_mul_overflow with these types for these targets.

If the semantics of __has_builtin mean "the compiler resolves these,
always" then we shouldn't conditionally emit a libcall.

This will still need to be worked around in the Linux kernel in order to
continue to support these builds of the Linux kernel for this
target with older releases of clang.

Link: https://bugs.llvm.org/show_bug.cgi?id=28629
Link: https://bugs.llvm.org/show_bug.cgi?id=35922
Link: ClangBuiltLinux/linux#1438

Reviewed By: lebedev.ri, RKSimon

Differential Revision: https://reviews.llvm.org/D108928
  • Loading branch information
nickdesaulniers committed Sep 7, 2021
1 parent c9e9635 commit d0eeb64
Show file tree
Hide file tree
Showing 4 changed files with 395 additions and 115 deletions.
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -2148,6 +2148,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
setLibcallName(RTLIB::MUL_I128, nullptr);
setLibcallName(RTLIB::MULO_I64, nullptr);
setLibcallName(RTLIB::MULO_I128, nullptr);
}

Expand Down
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/X86/overflow-intrinsic-optimizations.ll
@@ -0,0 +1,19 @@
; RUN: llc %s -mtriple=i386 -o - | FileCheck %s

define i1 @no__mulodi4(i32 %a, i64 %b, i32* %c) {
; CHECK-LABEL: no__mulodi4:
; CHECK-NOT: calll __mulodi4
entry:
%0 = sext i32 %a to i64
%1 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %0, i64 %b)
%2 = extractvalue { i64, i1 } %1, 1
%3 = extractvalue { i64, i1 } %1, 0
%4 = trunc i64 %3 to i32
%5 = sext i32 %4 to i64
%6 = icmp ne i64 %3, %5
%7 = or i1 %2, %6
store i32 %4, i32* %c, align 4
ret i1 %7
}

declare { i64, i1 } @llvm.smul.with.overflow.i64(i64, i64)
92 changes: 65 additions & 27 deletions llvm/test/CodeGen/X86/smul_fix_sat.ll
Expand Up @@ -365,41 +365,79 @@ define i64 @func5(i64 %x, i64 %y) {
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_def_cfa_offset 24
; X86-NEXT: subl $12, %esp
; X86-NEXT: .cfi_def_cfa_offset 32
; X86-NEXT: .cfi_offset %esi, -20
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: .cfi_offset %ebp, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl $0, (%esp)
; X86-NEXT: movl %esp, %edi
; X86-NEXT: movl %ecx, %ebx
; X86-NEXT: xorl %esi, %ebx
; X86-NEXT: sarl $31, %ebx
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: xorl $2147483647, %ebp # imm = 0x7FFFFFFF
; X86-NEXT: notl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: pushl %edx
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: pushl %ecx
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: calll __mulodi4
; X86-NEXT: addl $20, %esp
; X86-NEXT: .cfi_adjust_cfa_offset -20
; X86-NEXT: cmpl $0, (%esp)
; X86-NEXT: cmovnel %ebx, %eax
; X86-NEXT: cmovnel %ebp, %edx
; X86-NEXT: addl $4, %esp
; X86-NEXT: movl %eax, %edi
; X86-NEXT: imull %ebx, %edi
; X86-NEXT: mull %ebx
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: addl %edi, %edx
; X86-NEXT: movl %ebp, %edi
; X86-NEXT: imull %ebp, %ebx
; X86-NEXT: addl %edx, %ebx
; X86-NEXT: sarl $31, %edi
; X86-NEXT: movl %edi, %ebp
; X86-NEXT: imull %ecx, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull %esi
; X86-NEXT: addl %ebp, %edx
; X86-NEXT: imull %esi, %edi
; X86-NEXT: addl %edx, %edi
; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %ebx, %edi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: addl %ebp, %ebx
; X86-NEXT: adcl $0, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: addl %ebx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %esi, %ebp
; X86-NEXT: setb %bl
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: mull %edx
; X86-NEXT: addl %ebp, %eax
; X86-NEXT: movzbl %bl, %esi
; X86-NEXT: adcl %esi, %edx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: adcl %edi, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: movl %ebx, %edi
; X86-NEXT: sarl $31, %edi
; X86-NEXT: xorl %edi, %edx
; X86-NEXT: xorl %eax, %edi
; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: sarl $31, %ecx
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: xorl $2147483647, %esi # imm = 0x7FFFFFFF
; X86-NEXT: orl %edx, %edi
; X86-NEXT: notl %ecx
; X86-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: cmovel %ebx, %esi
; X86-NEXT: movl %esi, %edx
; X86-NEXT: addl $12, %esp
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 16
Expand Down

0 comments on commit d0eeb64

Please sign in to comment.