From a7ce507b46b1ca19e5af21325784a2f7e7ef7c75 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 8 Oct 2025 14:13:21 -0500 Subject: [PATCH] [NVPTX] Fix global destructor handling not working with a single dtor Summary: Very small typo here caused it to not enter the main loop if the size was greater than. The inner loop is "less than" so the inverse should be "greater than or equal" but we were only doing "greater than". Did not notice this because the libc handling does its own implementation and the OpenMP tests only tested multiple destructors, and most people only ever use global constructors when they show up on the GPU. --- llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp | 4 ++-- llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp index bb8cec05f4d84..4e069398d540f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp @@ -88,7 +88,7 @@ static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) { // reinterpret_cast(*start)(); // } // -// void call_init_array_callbacks() { +// void call_fini_array_callbacks() { // size_t fini_array_size = __fini_array_end - __fini_array_start; // for (size_t i = fini_array_size; i > 0; --i) // reinterpret_cast(__fini_array_start[i - 1])(); @@ -153,7 +153,7 @@ static void createInitOrFiniCalls(Function &F, bool IsCtor) { "start"); } IRB.CreateCondBr( - IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_NE : ICmpInst::ICMP_UGT, BeginVal, + IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_NE : ICmpInst::ICMP_UGE, BeginVal, EndVal), LoopBB, ExitBB); IRB.SetInsertPoint(LoopBB); diff --git a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll index 02118fbf741bf..b503da4a0bfde 100644 --- a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll +++ b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll @@ -72,7 +72,7 @@ define internal void @bar() { ; CHECK-NEXT: [[OFFSET:%.*]] = ashr exact i64 [[TMP2]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr addrspace(1) [[BEGIN]], i64 [[OFFSET]] ; CHECK-NEXT: [[START:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[TMP3]], i64 -1 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt ptr addrspace(1) [[START]], [[BEGIN]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp uge ptr addrspace(1) [[START]], [[BEGIN]] ; CHECK-NEXT: br i1 [[TMP4]], label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] ; CHECK: while.entry: ; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[START]], [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]