[ThinLTO] Don't mark calloc function dead #72673

eleviant · 2023-11-17T16:11:48Z

Dead store elimination pass may fold malloc + memset calls into a single call to calloc. If calloc is not preserved and is not being called directly it can be marked dead during thin link and result in link error.

llvmbot · 2023-11-17T16:12:20Z

@llvm/pr-subscribers-lld
@llvm/pr-subscribers-lld-elf
@llvm/pr-subscribers-llvm-ir
@llvm/pr-subscribers-llvm-binary-utilities

@llvm/pr-subscribers-lto

Author: None (eleviant)

Changes

Dead store elimination pass may fold malloc + memset calls into a single call to calloc. If calloc is not preserved and is not being called directly it can be marked dead during thin link and result in link error.

Full diff: https://github.com/llvm/llvm-project/pull/72673.diff

3 Files Affected:

(modified) llvm/lib/LTO/LTO.cpp (+7-1)
(added) llvm/test/ThinLTO/X86/Inputs/calloc.ll (+48)
(added) llvm/test/ThinLTO/X86/call-calloc.ll (+60)

diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index e111e09681178e2..8614f1c9bbcd680 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -606,6 +606,8 @@ void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
   auto *ResE = Res.end();
   (void)ResE;
   const Triple TT(RegularLTO.CombinedModule->getTargetTriple());
+  TargetLibraryInfoImpl TLII(TT);
+  StringRef CallocName = TargetLibraryInfo(TLII).getName(LibFunc_calloc);
   for (const InputFile::Symbol &Sym : Syms) {
     assert(ResI != ResE);
     SymbolResolution Res = *ResI++;
@@ -644,7 +646,11 @@ void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
     // FIXME: instead of this check, it would be desirable to compute GUIDs
     // based on mangled name, but this requires an access to the Target Triple
     // and would be relatively invasive on the codebase.
-    if (GlobalRes.IRName != Sym.getIRName()) {
+    if ((GlobalRes.IRName != Sym.getIRName()) ||
+        // The dead store elimination pass can fold malloc + memset calls into
+        // a single call to calloc. Prevent thin LTO from marking calloc a dead
+        // function otherwise we may face link errors.
+        (!CallocName.empty() && GlobalRes.IRName == CallocName)) {
       GlobalRes.Partition = GlobalResolution::External;
       GlobalRes.VisibleOutsideSummary = true;
     }
diff --git a/llvm/test/ThinLTO/X86/Inputs/calloc.ll b/llvm/test/ThinLTO/X86/Inputs/calloc.ll
new file mode 100644
index 000000000000000..a8a522a29cbd8a4
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/Inputs/calloc.ll
@@ -0,0 +1,48 @@
+; ModuleID = 'calloc.o'
+source_filename = "calloc.cpp"
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+@_ZZ6mallocE3buf = internal global [2000 x i8] zeroinitializer, align 1
+
+define hidden noalias ptr @malloc(i64 %a) local_unnamed_addr !type !7 !type !8 {
+entry:
+  tail call void asm sideeffect "dsb sy", "~{memory}"(), !srcloc !9
+  ret ptr @_ZZ6mallocE3buf
+}
+
+define hidden void @free(ptr nocapture noundef readnone %p) local_unnamed_addr !type !10 !type !11 {
+entry:
+  tail call void asm sideeffect "dsb nsh", "~{memory}"(), !srcloc !12
+  ret void
+}
+
+define hidden noalias nonnull ptr @calloc(i64 noundef %num, i64 noundef %size) local_unnamed_addr !type !13 !type !14 {
+entry:
+  %mul = mul i64 %size, %num
+  %call = tail call noalias ptr @malloc(i64 poison)
+  tail call void @llvm.memset.p0.i64(ptr nonnull align 1 @_ZZ6mallocE3buf, i8 0, i64 %mul, i1 false)
+  ret ptr @_ZZ6mallocE3buf
+}
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
+
+!llvm.module.flags = !{!0, !1, !2, !3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 4, !"CFI Canonical Jump Tables", i32 1}
+!2 = !{i32 8, !"PIC Level", i32 2}
+!3 = !{i32 7, !"uwtable", i32 2}
+!4 = !{i32 7, !"frame-pointer", i32 1}
+!5 = !{i32 1, !"EnableSplitLTOUnit", i32 1}
+!6 = !{!"clang version 18.0.0 (https://github.com/llvm/llvm-project.git a902ca66428164b4f11dedf1c551393add511271)"}
+!7 = !{i64 0, !"_ZTSFPvmE"}
+!8 = !{i64 0, !"_ZTSFPvmE.generalized"}
+!9 = !{i64 194}
+!10 = !{i64 0, !"_ZTSFvPvE"}
+!11 = !{i64 0, !"_ZTSFvPvE.generalized"}
+!12 = !{i64 288}
+!13 = !{i64 0, !"_ZTSFPvmmE"}
+!14 = !{i64 0, !"_ZTSFPvmmE.generalized"}
diff --git a/llvm/test/ThinLTO/X86/call-calloc.ll b/llvm/test/ThinLTO/X86/call-calloc.ll
new file mode 100644
index 000000000000000..41e002cecbcc2c7
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/call-calloc.ll
@@ -0,0 +1,60 @@
+; RUN: opt -module-summary %s -o %t1.bc
+; RUN: opt -module-summary %p/Inputs/calloc.ll -o %t2.bc
+; RUN: llvm-lto2 run -save-temps -o %t.out \
+; RUN:   %t1.bc -r=%t1.bc,_Z7DoStuffv,px -r=%t1.bc,_ZnwmRKSt9nothrow_t,px \
+; RUN:          -r=%t1.bc,__gxx_personality_v0, -r=%t1.bc,_ZSt7nothrow, -r=%t1.bc,malloc,l \
+; RUN:   %t2.bc -r=%t2.bc,malloc,pl -r=%t2.bc,free,pl -r=%t2.bc,calloc,pl
+; RUN: llvm-dis %t.out.2.5.precodegen.bc -o - | FileCheck %s
+
+; CHECK: define {{.*}} @calloc
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+%"struct.std::nothrow_t" = type { i8 }
+
+@_ZSt7nothrow = external global %"struct.std::nothrow_t", align 1
+
+declare !type !12 !type !13 noalias noundef ptr @malloc(i64 noundef) local_unnamed_addr
+
+define noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef %size, ptr noundef nonnull align 1 dereferenceable(1) %0) local_unnamed_addr !type !10 !type !11 {
+entry:
+  %call = tail call noalias ptr @malloc(i64 noundef %size)
+  ret ptr %call
+}
+
+define void @_Z7DoStuffv() local_unnamed_addr personality ptr @__gxx_personality_v0 !type !7 !type !8 {
+entry:
+  %call = tail call noalias noundef dereferenceable_or_null(400) ptr @_ZnwmRKSt9nothrow_t(i64 noundef 400, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
+  %new.isnull = icmp ne ptr %call, null
+  tail call void @llvm.assume(i1 %new.isnull)
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(400) %call, i8 0, i64 400, i1 false)
+  tail call void asm sideeffect "str xzr, [$0]", "r,~{memory}"(ptr nonnull %call), !srcloc !9
+  ret void
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
+declare void @llvm.assume(i1 noundef)
+
+!llvm.module.flags = !{!0, !1, !2, !3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 4, !"CFI Canonical Jump Tables", i32 1}
+!2 = !{i32 8, !"PIC Level", i32 2}
+!3 = !{i32 7, !"uwtable", i32 2}
+!4 = !{i32 7, !"frame-pointer", i32 1}
+!5 = !{i32 1, !"EnableSplitLTOUnit", i32 1}
+!6 = !{!"clang version 18.0.0 (https://github.com/llvm/llvm-project.git a902ca66428164b4f11dedf1c551393add511271)"}
+!7 = !{i64 0, !"_ZTSFvvE"}
+!8 = !{i64 0, !"_ZTSFvvE.generalized"}
+!9 = !{i64 125}
+!10 = !{i64 0, !"_ZTSFPvmRKSt9nothrow_tE"}
+!11 = !{i64 0, !"_ZTSFPvmRKSt9nothrow_tE.generalized"}
+!12 = !{i64 0, !"_ZTSFPvmE"}
+!13 = !{i64 0, !"_ZTSFPvmE.generalized"}

minglotus-6 · 2023-11-17T17:13:05Z

llvm/test/ThinLTO/X86/Inputs/calloc.ll

nit: the test case could be simplified. Same for the other one.

Usually I would remove the unneeded parameter attributes and function attributes. In this test case, the type metadata doesn't seem to be relevant either.

minglotus-6 · 2023-11-17T17:24:35Z

llvm/lib/LTO/LTO.cpp

+        // The dead store elimination pass can fold malloc + memset calls into
+        // a single call to calloc. Prevent thin LTO from marking calloc a dead
+        // function otherwise we may face link errors.
+        (!CallocName.empty() && GlobalRes.IRName == CallocName)) {
      GlobalRes.Partition = GlobalResolution::External;


Dead store elimination pass may fold malloc + memset calls into a single call to calloc. If calloc is not preserved and is not being called directly it can be marked dead during thin link and result in link error.

Would this happen for other built-in libc calls? The current way of preserving it doesn't seem general enough as it handles calloc specially

The problem with calloc is that two libcalls are being folded into third libcall. When using thin LTO we can import function calling malloc (new operator) from one TU and later fold malloc followed by memset into calloc. I don't know about other related problems (there might be).

cachemeifyoucan · 2023-11-17T17:31:19Z

Right, I don't think it is correct to special case calloc or any function. In general, this kind of functions are handled by preserving all RuntimeLibcalls. I wonder if we should consider just adding all the allocation functions that backend understand into that list, then it can be handled automatically.

And yes, test needs to be simplified to drop all redundant IR that is not related to tests.

eleviant · 2023-11-17T18:04:41Z

Right, I don't think it is correct to special case calloc or any function. In general, this kind of functions are handled by preserving all RuntimeLibcalls

I don't think it's a good idea to treat all RuntimeLibcalls like this. The problem arises only when two (or more) libcalls are folded into one (or more) other libcalls. This happens in particular when we import operator new from another TU so DSE sees malloc followed by memset to zero. What are other cases possible? We don't have to take care of simple cases like optimizer generating memset/memcpy, because this happens before the thin link.

cachemeifyoucan · 2023-11-17T18:52:41Z

The problem arises only when two (or more) libcalls are folded into one (or more) other libcalls.

The problem here is all allocation functions are not in the RuntimeLibcalls list. All RuntimeLibcalls functions are preserved by LTO process (in general, unless for specific platform they are not available) because they can be produced by backend even if the bitcode object file don't reference them. There is no difference from this case, which calloc doesn't get referenced before optimization.

eleviant · 2023-11-20T09:31:36Z

Updated test cases

The problem arises only when two (or more) libcalls are folded into one (or more) other libcalls.

The problem here is all allocation functions are not in the RuntimeLibcalls list. All RuntimeLibcalls functions are preserved by LTO process (in general, unless for specific platform they are not available) because they can be produced by backend even if the bitcode object file don't reference them. There is no difference from this case, which calloc doesn't get referenced before optimization.

Well, I've done a brief investigation on this and looks like RuntimeLibcalls handling boils up to this snippet of code in lld

  if (!ctx.bitcodeFiles.empty())
    for (auto *s : lto::LTO::getRuntimeLibcallSymbols())
      handleLibcall(s);

The handleLibcall function check if IR symbol representing runtime library function is lazy, and adds its bitcode file to LTO input set if so. This doesn't really help much, because calloc (event if added to to RuntimeLibcalls set) is not a lazy symbol in my case. The problem is not that we don't add it but that we explicitly remove it in DCE. I'm starting to beleive that calloc is the unique case (I haven't found anything else of the sort).

teresajohnson · 2023-11-20T16:50:46Z

Updated test cases

The problem arises only when two (or more) libcalls are folded into one (or more) other libcalls.

The problem here is all allocation functions are not in the RuntimeLibcalls list. All RuntimeLibcalls functions are preserved by LTO process (in general, unless for specific platform they are not available) because they can be produced by backend even if the bitcode object file don't reference them. There is no difference from this case, which calloc doesn't get referenced before optimization.

Well, I've done a brief investigation on this and looks like RuntimeLibcalls handling boils up to this snippet of code in lld
  if (!ctx.bitcodeFiles.empty())
    for (auto *s : lto::LTO::getRuntimeLibcallSymbols())
      handleLibcall(s);
The handleLibcall function check if IR symbol representing runtime library function is lazy, and adds its bitcode file to LTO input set if so. This doesn't really help much, because calloc (event if added to to RuntimeLibcalls set) is not a lazy symbol in my case. The problem is not that we don't add it but that we explicitly remove it in DCE. I'm starting to beleive that calloc is the unique case (I haven't found anything else of the sort).

It isn't lld that is doing the handling that preserves these symbols, it is LTO, specifically llvm/lib/Object/IRSymtab.cpp. See the use of RuntimeLibcalls.def here and where it is used to preserve symbols later in the file:

llvm-project/llvm/lib/Object/IRSymtab.cpp

Lines 48 to 57 in c38ae74

    
           static const char *PreservedSymbols[] = { 
        
           #define HANDLE_LIBCALL(code, name) name, 
        
           #include "llvm/IR/RuntimeLibcalls.def" 
        
           #undef HANDLE_LIBCALL 
        
               // There are global variables, so put it here instead of in 
        
               // RuntimeLibcalls.def. 
        
               // TODO: Are there similar such variables? 
        
               "__ssp_canary_word", 
        
               "__stack_chk_guard", 
        
           };

teresajohnson · 2023-11-20T16:57:54Z

It isn't lld that is doing the handling that preserves these symbols, it is LTO, specifically llvm/lib/Object/IRSymtab.cpp. See the use of RuntimeLibcalls.def here and where it is used to preserve symbols later in the file.

(invoked from LTO.cpp via readIRSymtab, and the flags are later checked when adding symbols in addModuleToGlobalRes)

eleviant · 2023-11-21T11:27:23Z

(invoked from LTO.cpp via readIRSymtab, and the flags are later checked when adding symbols in addModuleToGlobalRes)

Thanks a lot, I somehow missed that. Updated the patch

teresajohnson

The source change lgtm but a couple questions on the test below.

teresajohnson · 2023-11-21T16:08:59Z

llvm/test/ThinLTO/X86/Inputs/calloc.ll

+  ret ptr @_ZZ6mallocE3buf
+}
+
+define hidden void @free(ptr %p) {


Is this def needed in the test?

Looks like a refactoring leftover

teresajohnson · 2023-11-21T16:09:54Z

llvm/test/ThinLTO/X86/Inputs/operator-new.ll

+
+declare ptr @malloc(i64)
+
+define ptr @_ZnwmRKSt9nothrow_t(i64 %size, ptr %0) {


Is this separate input file needed for the test? Why can't this function be in the main input file, or even have the call to malloc inlined into DoStuff to start with?

Originally it was in separate source file (as part of larger C++ program), but as we don't run optimizations on source IR this "operator new" won't be converted to calloc by opt, so we can safely move it to main input file. I'll update the patch.

teresajohnson · 2023-11-22T15:56:32Z

llvm/test/ThinLTO/X86/call-calloc.ll

+; RUN:   %t2.bc -r=%t2.bc,malloc,pl -r=%t2.bc,calloc,pl
+; RUN: llvm-dis %t.out.2.5.precodegen.bc -o - | FileCheck %s
+
+; CHECK: define {{.*}} @calloc


Actually, since you are only checking that the calloc def is not removed, can this test be simplified even further? I.e. you really only need a single source file with a def of calloc run through llvm-lto2, do you even need any of the other code?

Honestly, I'm not quite sure if such simplification is justified. May be it is better to also check that malloc/memset pair is folded to calloc, so we have both def and use?

Sure that seems fine too. My point was just that if we aren't checking anything from that main file, then there is no need to have it, as I don't believe it affects whether calloc is kept in the other file. And if the operator new isn't needed, but it is sufficient to just have a function that directly calls both malloc and memset in order to get the folding to calloc, then just have that and remove anything extraneous.

We want to test references from one TU to both malloc and calloc with a stub implementation of calloc. I think it is more genuine for calloc.ll to define malloc along with calloc (otherwise malloc is undefined while calloc is defined, making the test less like a real world scenario).

MaskRay

I think using PreservedSymbols is correct.

MaskRay · 2023-11-22T20:46:46Z

llvm/test/ThinLTO/X86/Inputs/calloc.ll

+entry:
+  %mul = mul i64 %size, %num
+  %call = tail call noalias ptr @malloc(i64 poison)
+  tail call void @llvm.memset.p0.i64(ptr nonnull align 1 @_ZZ6mallocE3buf, i8 0, i64 %mul, i1 false)


Call memset on %call and return %call, to make the stub implementation more authentic.

MaskRay · 2023-11-22T21:52:25Z

llvm/test/ThinLTO/X86/call-calloc.ll

@@ -0,0 +1,35 @@
+; RUN: opt -module-summary %s -o %t1.bc


Perhaps add a file-level comment that this is similar to builtin-nostrip.ll

MaskRay · 2023-11-22T21:55:28Z

llvm/test/ThinLTO/X86/call-calloc.ll

+; RUN:   %t2.bc -r=%t2.bc,malloc,pl -r=%t2.bc,calloc,pl
+; RUN: llvm-dis %t.out.2.5.precodegen.bc -o - | FileCheck %s
+
+; CHECK: define {{.*}} @calloc


We want to test references from one TU to both malloc and calloc with a stub implementation of calloc. I think it is more genuine for calloc.ll to define malloc along with calloc (otherwise malloc is undefined while calloc is defined, making the test less like a real world scenario).

MaskRay · 2023-11-22T21:59:34Z

llvm/lib/Object/IRSymtab.cpp

@@ -54,6 +54,10 @@ static const char *PreservedSymbols[] = {
    // TODO: Are there similar such variables?
    "__ssp_canary_word",
    "__stack_chk_guard",
+    // The dead store elimination pass can fold malloc + memset calls into
+    // a single call to calloc. Prevent thin LTO from marking calloc a dead


// Prevent thin LTO from marking calloc a dead function ...

I think this sentence can be removed, as it describes the problem that applies too all preserved symbols, which can use a comment applying to the whole variable.

This change affects regular LTO, so "thin LTO" is not accurate. This list also prevents internalization.

I think this comment is not addressed.

MaskRay · 2023-11-22T22:14:20Z

Updated test cases

The problem arises only when two (or more) libcalls are folded into one (or more) other libcalls.

The problem here is all allocation functions are not in the RuntimeLibcalls list. All RuntimeLibcalls functions are preserved by LTO process (in general, unless for specific platform they are not available) because they can be produced by backend even if the bitcode object file don't reference them. There is no difference from this case, which calloc doesn't get referenced before optimization.

Well, I've done a brief investigation on this and looks like RuntimeLibcalls handling boils up to this snippet of code in lld
  if (!ctx.bitcodeFiles.empty())
    for (auto *s : lto::LTO::getRuntimeLibcallSymbols())
      handleLibcall(s);
The handleLibcall function check if IR symbol representing runtime library function is lazy, and adds its bitcode file to LTO input set if so. This doesn't really help much, because calloc (event if added to to RuntimeLibcalls set) is not a lazy symbol in my case. The problem is not that we don't add it but that we explicitly remove it in DCE. I'm starting to beleive that calloc is the unique case (I haven't found anything else of the sort).
It isn't lld that is doing the handling that preserves these symbols, it is LTO, specifically llvm/lib/Object/IRSymtab.cpp. See the use of RuntimeLibcalls.def here and where it is used to preserve symbols later in the file:

For a complete fix, we should also add calloc to LTO.cpp:static const char *libcallRoutineNames[] to affect the following code

> >   if (!ctx.bitcodeFiles.empty())
> >     for (auto *s : lto::LTO::getRuntimeLibcallSymbols())
> >       handleLibcall(s);

Middle-end library function optimizations may reference a runtime library function that is not in the referencer's symbol table.
If the definition is provided by a lazy bitcode file, we will find that we need to extract the lazy bitcode file after LTO compilation.
However, the bitcode file did not participate the LTO compilation and our model does not allow repeated LTO compilation.
As a result, the runtime library function will be either undefined or defined as a symbol without an associated section.

To address this issue, we make two changes:

Change the linker to extract all runtime library functions defined in lazy bitcode files. We cannot fortell what runtime library functions will be referenced, so we conservatively retain all (https://reviews.llvm.org/D50017).
Set the VisibileToRegularObj bit for all runtime library functions in an IR symbol table. This prevents the symbol from being internalized or discarded.

The current patch handles the second point, but not the first. If we place malloc and calloc in different lazy bitcode files, we will find that calloc may not be correctly extracted => incorrect definition.

eleviant · 2023-11-23T17:05:23Z

Addressed review comments

github-actions · 2023-11-23T17:07:01Z

✅ With the latest revision this PR passed the C/C++ code formatter.

teresajohnson · 2023-11-27T14:50:11Z

llvm/test/ThinLTO/X86/call-calloc.ll

+
+@__p = dso_local local_unnamed_addr global ptr null, align 8
+
+define ptr @_ZnwmRKSt9nothrow_t(i64 %size, ptr %0) {


Following up from an earlier comment - is this separate function needed - can the test be simplified by just having DoStuff call malloc followed by memset? It would be nice to have the test as simple as possible for testing the specific issue.

teresajohnson · 2023-11-27T14:51:10Z

llvm/lib/Object/IRSymtab.cpp

@@ -54,6 +54,10 @@ static const char *PreservedSymbols[] = {
    // TODO: Are there similar such variables?
    "__ssp_canary_word",
    "__stack_chk_guard",
+    // The dead store elimination pass can fold malloc + memset calls into
+    // a single call to calloc. Prevent thin LTO from marking calloc a dead


I think this comment is not addressed.

teresajohnson · 2023-11-27T14:52:18Z

llvm/lib/LTO/LTO.cpp

@@ -1344,6 +1344,7 @@ static const char *libcallRoutineNames[] = {
 #define HANDLE_LIBCALL(code, name) name,
 #include "llvm/IR/RuntimeLibcalls.def"
 #undef HANDLE_LIBCALL
+    "calloc",


Can we just add calloc into RuntimeLibcalls.def?

eleviant · 2023-11-28T19:40:17Z

Addressed comments

teresajohnson

lgtm, @MaskRay do you have any other comments or concerns?

cachemeifyoucan

The implementation LGTM. Some comments about the test.

cachemeifyoucan · 2023-11-28T19:48:23Z

llvm/test/ThinLTO/X86/call-calloc.ll

@@ -0,0 +1,39 @@
+; REQUIRES: x86-registered-target


nit: this is not needed. X86 directory has a lit.local.cfg that covers this requirement.

cachemeifyoucan · 2023-11-28T19:48:48Z

llvm/test/ThinLTO/X86/Inputs/calloc.ll

+}
+
+define hidden ptr @calloc(i64 noundef %num, i64 noundef %size) {
+entry:


I will further cut down this test case since the implementation of calloc doesn't matter here.

cachemeifyoucan · 2023-11-28T19:54:34Z

llvm/test/ThinLTO/X86/call-calloc.ll

+
+; CALLOC-USE:      define void @_Z7DoStuffv()
+; CALLOC-USE-NEXT: entry:
+; CALLOC-USE-NEXT:   tail call {{.*}} ptr @calloc


I might even remove the use for calloc here since you just want to test that calloc is dead-stripped, and don't want to test that calloc optimization is happening. calloc should be preserved with or without optimization is happening or not.

That said, maybe just merge this test into builtin-nostrip.ll

A calloc definition can be useful in the test file if it is in a separate bitcode file linked using archive semantics. See lld/test/ELF/lto/libcall-archive-calloc.ll, which needs updates with this patch.

…et => calloc) libcall optimization Similar to https://reviews.llvm.org/D50017: malloc+memset references can be combined to a calloc reference, which is not explicit in the referencer's IR symbol table. If calloc is defined in a lazy bitcode file, we should extract the archive member to satisfy possible references from LTO generated object files; otherwise (current status, which will be fixed by #72673), `calloc` as a LazyObject symbol will be resolved by compileBitcodeFiles generated Undefined, leading to an incorrectly-extracted Defined symbol without section, which will lower to an SHN_ABS symbol at address 0.

MaskRay

There are some comments from @cachemeifyoucan that should be addressed. You can use lld/test/ELF/lto/libcall-archive-calloc.ll to simplify the test.

MaskRay · 2023-11-29T14:35:45Z

llvm/test/ThinLTO/X86/call-calloc.ll

+
+; CALLOC-USE:      define void @_Z7DoStuffv()
+; CALLOC-USE-NEXT: entry:
+; CALLOC-USE-NEXT:   tail call {{.*}} ptr @calloc


A calloc definition can be useful in the test file if it is in a separate bitcode file linked using archive semantics. See lld/test/ELF/lto/libcall-archive-calloc.ll, which needs updates with this patch.

llvm/test/ThinLTO/X86/call-calloc.ll

eleviant · 2023-11-29T17:57:00Z

Addressed comments

cachemeifyoucan

Thanks!

MaskRay · 2023-11-29T19:26:53Z

lld/test/ELF/lto/libcall-archive-calloc.ll needs an update as well.

Dead store elimination pass may fold malloc + memset calls into a single call to calloc. If calloc is not preserved and is not being called directly it can be marked dead during thin link and result in link error.

dyung · 2023-11-30T20:43:38Z

@eleviant this change seems to be causing a bunch of failures on Windows, can you take a look and revert if you need time to investigate? Thanks!

https://lab.llvm.org/buildbot/#/builders/216/builds/31173

eleviant · 2023-12-01T07:59:21Z

@eleviant this change seems to be causing a bunch of failures on Windows, can you take a look and revert if you need time to investigate? Thanks!

https://lab.llvm.org/buildbot/#/builders/216/builds/31173

I'll check but the first glance this one: https://lab.llvm.org/buildbot/#/builders/216/builds/31173/steps/7/logs/FAIL__LLVM__2009-02-12-DebugInfoVLA_ll is caused by an issue in ISel which seems to be out of scope of this patch. I also looked at few others: they seem to have some codegen issues, however none of them uses malloc/calloc

dyung · 2023-12-01T09:34:56Z

@eleviant this change seems to be causing a bunch of failures on Windows, can you take a look and revert if you need time to investigate? Thanks!
https://lab.llvm.org/buildbot/#/builders/216/builds/31173

I'll check but the first glance this one: https://lab.llvm.org/buildbot/#/builders/216/builds/31173/steps/7/logs/FAIL__LLVM__2009-02-12-DebugInfoVLA_ll is caused by an issue in ISel which seems to be out of scope of this patch. I also looked at few others: they seem to have some codegen issues, however none of them uses malloc/calloc

Yeah, I can't explain it other to say that I took the machine offline and built the change immediately preceding yours (306e13e) and the tests all passed. And then I built your change (28ad007) and I was able to reproduce the 288 test failures.

eleviant · 2023-12-01T10:53:06Z

Well, I've just checked on Windows 11:

c:\AS\llvm-project\stage1>git reflog
bc265bd66323 (HEAD -> main, origin/main) HEAD@{0}: pull: Fast-forward

c:\AS\llvm-project\stage1>python bin\llvm-lit.py -vvv ..\llvm\test\CodeGen\X86\2012-11-28-merge-store-alias.ll
llvm-lit.py: c:\AS\llvm-project\llvm\utils\lit\lit\llvm\config.py:46: note: using lit tools: C:\Program Files\Git\usr\bin
-- Testing: 1 tests, 1 workers --
PASS: LLVM :: CodeGen/X86/2012-11-28-merge-store-alias.ll (1 of 1)

Testing Time: 0.35s

Total Discovered Tests: 1
  Passed: 1 (100.00%)

c:\AS\llvm-project\stage1>python bin\llvm-lit.py -vvv ..\llvm\test\CodeGen\X86\2009-02-12-DebugInfoVLA.ll
llvm-lit.py: c:\AS\llvm-project\llvm\utils\lit\lit\llvm\config.py:46: note: using lit tools: C:\Program Files\Git\usr\bin
-- Testing: 1 tests, 1 workers --
PASS: LLVM :: CodeGen/X86/2009-02-12-DebugInfoVLA.ll (1 of 1)

Testing Time: 0.45s

Total Discovered Tests: 1
  Passed: 1 (100.00%)

Revision bc265bd66323 contains my changes and your tests pass on my Linux and Windows machines. Can you please try it?

dyung · 2023-12-01T11:27:08Z

Well, I've just checked on Windows 11:

c:\AS\llvm-project\stage1>git reflog
bc265bd66323 (HEAD -> main, origin/main) HEAD@{0}: pull: Fast-forward

c:\AS\llvm-project\stage1>python bin\llvm-lit.py -vvv ..\llvm\test\CodeGen\X86\2012-11-28-merge-store-alias.ll
llvm-lit.py: c:\AS\llvm-project\llvm\utils\lit\lit\llvm\config.py:46: note: using lit tools: C:\Program Files\Git\usr\bin
-- Testing: 1 tests, 1 workers --
PASS: LLVM :: CodeGen/X86/2012-11-28-merge-store-alias.ll (1 of 1)

Testing Time: 0.35s

Total Discovered Tests: 1
  Passed: 1 (100.00%)

c:\AS\llvm-project\stage1>python bin\llvm-lit.py -vvv ..\llvm\test\CodeGen\X86\2009-02-12-DebugInfoVLA.ll
llvm-lit.py: c:\AS\llvm-project\llvm\utils\lit\lit\llvm\config.py:46: note: using lit tools: C:\Program Files\Git\usr\bin
-- Testing: 1 tests, 1 workers --
PASS: LLVM :: CodeGen/X86/2009-02-12-DebugInfoVLA.ll (1 of 1)

Testing Time: 0.45s

Total Discovered Tests: 1
  Passed: 1 (100.00%)

Revision bc265bd66323 contains my changes and your tests pass on my Linux and Windows machines. Can you please try it?

Well this is odd. I just restarted the Windows build bot after taking it offline to do bisections, and now it seems to be green (it was previously showing 126 test failures for most of the day). So I guess we can just forget about these failures, it must have been some configuration issue with the bot or something. Thanks for looking into it for me though, appreciate it, and sorry for the noise!

eleviant · 2023-12-01T11:28:21Z

Never mind :)

eleviant requested review from MaskRay, cachemeifyoucan and teresajohnson November 17, 2023 16:11

llvmbot added the LTO Link time optimization (regular/full LTO or ThinLTO) label Nov 17, 2023

eleviant self-assigned this Nov 17, 2023

minglotus-6 reviewed Nov 17, 2023

View reviewed changes

eleviant force-pushed the dont-mark-calloc-dead branch from eb9ba8a to 75d6377 Compare November 20, 2023 09:22

eleviant force-pushed the dont-mark-calloc-dead branch from 75d6377 to 21a6ec4 Compare November 21, 2023 09:12

llvmbot added the llvm:binary-utilities label Nov 21, 2023

teresajohnson reviewed Nov 21, 2023

View reviewed changes

eleviant force-pushed the dont-mark-calloc-dead branch from 21a6ec4 to 410bd0c Compare November 22, 2023 10:03

teresajohnson reviewed Nov 22, 2023

View reviewed changes

MaskRay reviewed Nov 22, 2023

View reviewed changes

eleviant force-pushed the dont-mark-calloc-dead branch from 410bd0c to f1628d1 Compare November 23, 2023 17:04

eleviant force-pushed the dont-mark-calloc-dead branch from f1628d1 to 35d0a95 Compare November 23, 2023 17:10

teresajohnson reviewed Nov 27, 2023

View reviewed changes

eleviant force-pushed the dont-mark-calloc-dead branch from 35d0a95 to 08598a6 Compare November 28, 2023 19:40

llvmbot added the llvm:ir label Nov 28, 2023

teresajohnson approved these changes Nov 28, 2023

View reviewed changes

cachemeifyoucan reviewed Nov 28, 2023

View reviewed changes

MaskRay approved these changes Nov 29, 2023

View reviewed changes

eleviant force-pushed the dont-mark-calloc-dead branch from 08598a6 to 02ded58 Compare November 29, 2023 17:56

cachemeifyoucan approved these changes Nov 29, 2023

View reviewed changes

[ThinLTO] Don't mark calloc function dead

b5badb9

Dead store elimination pass may fold malloc + memset calls into a single call to calloc. If calloc is not preserved and is not being called directly it can be marked dead during thin link and result in link error.

eleviant force-pushed the dont-mark-calloc-dead branch from 02ded58 to b5badb9 Compare November 30, 2023 17:08

llvmbot added lld lld:ELF labels Nov 30, 2023

eleviant merged commit 28ad007 into llvm:main Nov 30, 2023
4 of 5 checks passed


		declare ptr @malloc(i64)

		define ptr @_ZnwmRKSt9nothrow_t(i64 %size, ptr %0) {


		@__p = dso_local local_unnamed_addr global ptr null, align 8

		define ptr @_ZnwmRKSt9nothrow_t(i64 %size, ptr %0) {

[ThinLTO] Don't mark calloc function dead #72673

[ThinLTO] Don't mark calloc function dead #72673

Conversation

eleviant commented Nov 17, 2023

llvmbot commented Nov 17, 2023 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

cachemeifyoucan commented Nov 17, 2023

eleviant commented Nov 17, 2023 • edited

cachemeifyoucan commented Nov 17, 2023

eleviant commented Nov 20, 2023 • edited

teresajohnson commented Nov 20, 2023

teresajohnson commented Nov 20, 2023

eleviant commented Nov 21, 2023

teresajohnson left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

MaskRay left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

MaskRay commented Nov 22, 2023 • edited

eleviant commented Nov 23, 2023

github-actions bot commented Nov 23, 2023 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

eleviant commented Nov 28, 2023

teresajohnson left a comment

Choose a reason for hiding this comment

cachemeifyoucan left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

MaskRay left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

eleviant commented Nov 29, 2023

cachemeifyoucan left a comment

Choose a reason for hiding this comment

MaskRay commented Nov 29, 2023

dyung commented Nov 30, 2023

eleviant commented Dec 1, 2023

dyung commented Dec 1, 2023

eleviant commented Dec 1, 2023

dyung commented Dec 1, 2023

eleviant commented Dec 1, 2023

llvmbot commented Nov 17, 2023 •

edited

eleviant commented Nov 17, 2023 •

edited

eleviant commented Nov 20, 2023 •

edited

MaskRay commented Nov 22, 2023 •

edited

github-actions bot commented Nov 23, 2023 •

edited