diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 6b979d6acb5139..31cea8fe2a2a38 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -547,6 +547,17 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
   if (T.isNVPTX()) {
     TLI.disableAllFunctions();
     TLI.setAvailable(LibFunc_nvvm_reflect);
+    TLI.setAvailable(llvm::LibFunc_malloc);
+    TLI.setAvailable(llvm::LibFunc_free);
+
+    // TODO: We could enable the following two according to [0] but we haven't
+    //       done an evaluation wrt. the performance implications.
+    // [0]
+    // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations
+    //
+    //    TLI.setAvailable(llvm::LibFunc_memcpy);
+    //    TLI.setAvailable(llvm::LibFunc_memset);
+
   } else {
     TLI.setUnavailable(LibFunc_nvvm_reflect);
   }
diff --git a/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll b/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll
index 9d6777d59c3e5a..1703dbdf3a8ae4 100644
--- a/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll
+++ b/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll
@@ -1,6 +1,9 @@
 ; RUN: llc < %s -march=nvptx 2>&1 | FileCheck %s
 ; Allow to make libcalls that are defined in the current module
 
+declare i8* @malloc(i64)
+declare void @free(i8*)
+
 ; Underlying libcall declaration
 ; CHECK: .visible .func  (.param .align 16 .b8 func_retval0[16]) __umodti3
 
@@ -29,3 +32,14 @@ bb1:
 define i128 @__umodti3(i128, i128) {
   ret i128 0
 }
+
+define void @malloc_then_free() {
+; CHECK:  call.uni (retval0),
+; CHECK:  malloc,
+; CHECK:  call.uni
+; CHECK:  free,
+  %a = call i8* @malloc(i64 4)
+  store i8 0, i8* %a
+  call void @free(i8* %a)
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/malloc_free_delete_nvptx.ll b/llvm/test/Transforms/InstCombine/malloc_free_delete_nvptx.ll
new file mode 100644
index 00000000000000..39050440dc4269
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/malloc_free_delete_nvptx.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target triple = "nvptx64"
+
+declare void @user(i8*)
+declare i8* @malloc(i64)
+declare void @free(i8*)
+
+; Ensure the nvptx backend states malloc & free are a thing so we can recognize
+; so we will optimize them properly. In the test below the malloc-free chain is
+; useless and we can remove it *if* we know about malloc & free.
+define void @malloc_then_free_not_needed() {
+; CHECK-LABEL: @malloc_then_free_not_needed(
+; CHECK-NEXT:    ret void
+;
+  %a = call i8* @malloc(i64 4)
+  store i8 0, i8* %a
+  call void @free(i8* %a)
+  ret void
+}
+
+define void @malloc_then_free_needed() {
+; CHECK-LABEL: @malloc_then_free_needed(
+; CHECK-NEXT:    [[A:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
+; CHECK-NEXT:    call void @user(i8* [[A]])
+; CHECK-NEXT:    call void @free(i8* [[A]])
+; CHECK-NEXT:    ret void
+;
+  %a = call i8* @malloc(i64 4)
+  call void @user(i8* %a)
+  call void @free(i8* %a)
+  ret void
+}