diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index 6b979d6acb5139..31cea8fe2a2a38 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -547,6 +547,17 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, if (T.isNVPTX()) { TLI.disableAllFunctions(); TLI.setAvailable(LibFunc_nvvm_reflect); + TLI.setAvailable(llvm::LibFunc_malloc); + TLI.setAvailable(llvm::LibFunc_free); + + // TODO: We could enable the following two according to [0] but we haven't + // done an evaluation wrt. the performance implications. + // [0] + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations + // + // TLI.setAvailable(llvm::LibFunc_memcpy); + // TLI.setAvailable(llvm::LibFunc_memset); + } else { TLI.setUnavailable(LibFunc_nvvm_reflect); } diff --git a/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll b/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll index 9d6777d59c3e5a..1703dbdf3a8ae4 100644 --- a/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll +++ b/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll @@ -1,6 +1,9 @@ ; RUN: llc < %s -march=nvptx 2>&1 | FileCheck %s ; Allow to make libcalls that are defined in the current module +declare i8* @malloc(i64) +declare void @free(i8*) + ; Underlying libcall declaration ; CHECK: .visible .func (.param .align 16 .b8 func_retval0[16]) __umodti3 @@ -29,3 +32,14 @@ bb1: define i128 @__umodti3(i128, i128) { ret i128 0 } + +define void @malloc_then_free() { +; CHECK: call.uni (retval0), +; CHECK: malloc, +; CHECK: call.uni +; CHECK: free, + %a = call i8* @malloc(i64 4) + store i8 0, i8* %a + call void @free(i8* %a) + ret void +} diff --git a/llvm/test/Transforms/InstCombine/malloc_free_delete_nvptx.ll b/llvm/test/Transforms/InstCombine/malloc_free_delete_nvptx.ll new file mode 100644 index 00000000000000..39050440dc4269 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/malloc_free_delete_nvptx.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +target triple = "nvptx64" + +declare void @user(i8*) +declare i8* @malloc(i64) +declare void @free(i8*) + +; Ensure the nvptx backend states malloc & free are a thing so we can recognize +; so we will optimize them properly. In the test below the malloc-free chain is +; useless and we can remove it *if* we know about malloc & free. +define void @malloc_then_free_not_needed() { +; CHECK-LABEL: @malloc_then_free_not_needed( +; CHECK-NEXT: ret void +; + %a = call i8* @malloc(i64 4) + store i8 0, i8* %a + call void @free(i8* %a) + ret void +} + +define void @malloc_then_free_needed() { +; CHECK-LABEL: @malloc_then_free_needed( +; CHECK-NEXT: [[A:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4) +; CHECK-NEXT: call void @user(i8* [[A]]) +; CHECK-NEXT: call void @free(i8* [[A]]) +; CHECK-NEXT: ret void +; + %a = call i8* @malloc(i64 4) + call void @user(i8* %a) + call void @free(i8* %a) + ret void +}