[X86ISelLowering] avoid emitting libcalls to __mulodi4()

Similar to D108842, D108844, and D108926. __has_builtin(builtin_mul_overflow) returns true for 32b x86 targets, but Clang is deferring to compiler RT when encountering long long types. This breaks ARCH=i386 + CONFIG_BLK_DEV_NBD=y builds of the Linux kernel that are using builtin_mul_overflow with these types for these targets. If the semantics of __has_builtin mean "the compiler resolves these, always" then we shouldn't conditionally emit a libcall. This will still need to be worked around in the Linux kernel in order to continue to support these builds of the Linux kernel for this target with older releases of clang. Link: https://bugs.llvm.org/show_bug.cgi?id=28629 Link: https://bugs.llvm.org/show_bug.cgi?id=35922 Link: ClangBuiltLinux/linux#1438 Reviewed By: lebedev.ri, RKSimon Differential Revision: https://reviews.llvm.org/D108928
llvm · Sep 7, 2021 · d0eeb64 · d0eeb64
1 parent c9e9635
commit d0eeb64
Show file tree

Hide file tree

Showing 4 changed files with 395 additions and 115 deletions.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2148,6 +2148,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setLibcallName(RTLIB::SRL_I128, nullptr);
     setLibcallName(RTLIB::SRA_I128, nullptr);
     setLibcallName(RTLIB::MUL_I128, nullptr);
+    setLibcallName(RTLIB::MULO_I64, nullptr);
     setLibcallName(RTLIB::MULO_I128, nullptr);
   }
 

diff --git a/llvm/test/CodeGen/X86/overflow-intrinsic-optimizations.ll b/llvm/test/CodeGen/X86/overflow-intrinsic-optimizations.ll
@@ -0,0 +1,19 @@
+; RUN: llc %s -mtriple=i386 -o - | FileCheck %s
+
+define i1 @no__mulodi4(i32 %a, i64 %b, i32* %c) {
+; CHECK-LABEL: no__mulodi4:
+; CHECK-NOT: calll __mulodi4
+entry:
+  %0 = sext i32 %a to i64
+  %1 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %0, i64 %b)
+  %2 = extractvalue { i64, i1 } %1, 1
+  %3 = extractvalue { i64, i1 } %1, 0
+  %4 = trunc i64 %3 to i32
+  %5 = sext i32 %4 to i64
+  %6 = icmp ne i64 %3, %5
+  %7 = or i1 %2, %6
+  store i32 %4, i32* %c, align 4
+  ret i1 %7
+}
+
+declare { i64, i1 } @llvm.smul.with.overflow.i64(i64, i64)
diff --git a/llvm/test/CodeGen/X86/smul_fix_sat.ll b/llvm/test/CodeGen/X86/smul_fix_sat.ll
@@ -365,41 +365,79 @@ define i64 @func5(i64 %x, i64 %y) {
 ; X86-NEXT:    .cfi_def_cfa_offset 16
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    .cfi_def_cfa_offset 20
-; X86-NEXT:    pushl %eax
-; X86-NEXT:    .cfi_def_cfa_offset 24
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 32
 ; X86-NEXT:    .cfi_offset %esi, -20
 ; X86-NEXT:    .cfi_offset %edi, -16
 ; X86-NEXT:    .cfi_offset %ebx, -12
 ; X86-NEXT:    .cfi_offset %ebp, -8
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl $0, (%esp)
-; X86-NEXT:    movl %esp, %edi
 ; X86-NEXT:    movl %ecx, %ebx
-; X86-NEXT:    xorl %esi, %ebx
 ; X86-NEXT:    sarl $31, %ebx
-; X86-NEXT:    movl %ebx, %ebp
-; X86-NEXT:    xorl $2147483647, %ebp # imm = 0x7FFFFFFF
-; X86-NEXT:    notl %ebx
-; X86-NEXT:    pushl %edi
-; X86-NEXT:    .cfi_adjust_cfa_offset 4
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    .cfi_adjust_cfa_offset 4
-; X86-NEXT:    pushl %edx
-; X86-NEXT:    .cfi_adjust_cfa_offset 4
-; X86-NEXT:    pushl %ecx
-; X86-NEXT:    .cfi_adjust_cfa_offset 4
-; X86-NEXT:    pushl %eax
-; X86-NEXT:    .cfi_adjust_cfa_offset 4
-; X86-NEXT:    calll __mulodi4
-; X86-NEXT:    addl $20, %esp
-; X86-NEXT:    .cfi_adjust_cfa_offset -20
-; X86-NEXT:    cmpl $0, (%esp)
-; X86-NEXT:    cmovnel %ebx, %eax
-; X86-NEXT:    cmovnel %ebp, %edx
-; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    imull %ebx, %edi
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    addl %edi, %edx
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    imull %ebp, %ebx
+; X86-NEXT:    addl %edx, %ebx
+; X86-NEXT:    sarl $31, %edi
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    imull %ecx, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    addl %ebp, %edx
+; X86-NEXT:    imull %esi, %edi
+; X86-NEXT:    addl %edx, %edi
+; X86-NEXT:    addl (%esp), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %ebx, %edi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %esi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    addl %ebp, %ebx
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    adcl %esi, %ebp
+; X86-NEXT:    setb %bl
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    mull %edx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movzbl %bl, %esi
+; X86-NEXT:    adcl %esi, %edx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    adcl %edi, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    sarl $31, %edi
+; X86-NEXT:    xorl %edi, %edx
+; X86-NEXT:    xorl %eax, %edi
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    sarl $31, %ecx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    xorl $2147483647, %esi # imm = 0x7FFFFFFF
+; X86-NEXT:    orl %edx, %edi
+; X86-NEXT:    notl %ecx
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    cmovnel %ecx, %eax
+; X86-NEXT:    cmovel %ebx, %esi
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 20
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    .cfi_def_cfa_offset 16