From 4651eb7df79ec80a46662398924c2fe31ce469d1 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 19 Sep 2025 12:22:09 +0200 Subject: [PATCH 1/2] Add test --- .../RelLookupTableConverter/nvptx.ll | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 llvm/test/Transforms/RelLookupTableConverter/nvptx.ll diff --git a/llvm/test/Transforms/RelLookupTableConverter/nvptx.ll b/llvm/test/Transforms/RelLookupTableConverter/nvptx.ll new file mode 100644 index 0000000000000..ac4cde795f7e2 --- /dev/null +++ b/llvm/test/Transforms/RelLookupTableConverter/nvptx.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 +; RUN: opt < %s -passes=rel-lookup-table-converter -relocation-model=pic -S | FileCheck %s +; REQUIRES: nvptx-registered-target +target triple = "nvptx64-nvidia-cuda" + +@a1 = internal constant i32 0, align 4 +@b1 = internal constant i32 0, align 4 +@c1 = internal constant i32 0, align 4 +@d1 = internal constant i32 0, align 4 + +@switch.table = private unnamed_addr constant [3 x ptr] [ptr @a1, ptr @b1, ptr @c1], align 8 + +;. +; CHECK: @a1 = internal constant i32 0, align 4 +; CHECK: @b1 = internal constant i32 0, align 4 +; CHECK: @c1 = internal constant i32 0, align 4 +; CHECK: @d1 = internal constant i32 0, align 4 +; CHECK: @switch.table.rel = private unnamed_addr constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @a1 to i64), i64 ptrtoint (ptr @switch.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @b1 to i64), i64 ptrtoint (ptr @switch.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @c1 to i64), i64 ptrtoint (ptr @switch.table.rel to i64)) to i32)], align 4 +;. +define ptr @internal_linkage(i32 %cond) { +; CHECK-LABEL: define ptr @internal_linkage( +; CHECK-SAME: i32 [[COND:%.*]]) { +; CHECK-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i32 [[COND]], 2 +; CHECK-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i32(ptr @switch.table.rel, i32 [[RELTABLE_SHIFT]]) +; CHECK-NEXT: ret ptr [[RELTABLE_INTRINSIC]] +; + %switch.gep = getelementptr inbounds [3 x ptr], ptr @switch.table, i32 0, i32 %cond + %switch.load = load ptr, ptr %switch.gep, align 8 + ret ptr %switch.load +} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } +;. From 005378330fe1328ce9e7139cd1c9564b7a8bdf7f Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 19 Sep 2025 12:24:29 +0200 Subject: [PATCH 2/2] Disable relative lookup tables on nvptx --- llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h | 5 +++++ llvm/test/Transforms/RelLookupTableConverter/nvptx.ll | 11 +++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index b32d931bd3074..78eb751cf3c2e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -190,6 +190,11 @@ class NVPTXTTIImpl final : public BasicTTIImplBase { void collectKernelLaunchBounds( const Function &F, SmallVectorImpl> &LB) const override; + + bool shouldBuildRelLookupTables() const override { + // Self-referential globals are not supported. + return false; + } }; } // end namespace llvm diff --git a/llvm/test/Transforms/RelLookupTableConverter/nvptx.ll b/llvm/test/Transforms/RelLookupTableConverter/nvptx.ll index ac4cde795f7e2..70ebf220c369c 100644 --- a/llvm/test/Transforms/RelLookupTableConverter/nvptx.ll +++ b/llvm/test/Transforms/RelLookupTableConverter/nvptx.ll @@ -3,6 +3,8 @@ ; REQUIRES: nvptx-registered-target target triple = "nvptx64-nvidia-cuda" +; Do not produce relative lookup table for nvptx target. + @a1 = internal constant i32 0, align 4 @b1 = internal constant i32 0, align 4 @c1 = internal constant i32 0, align 4 @@ -15,19 +17,16 @@ target triple = "nvptx64-nvidia-cuda" ; CHECK: @b1 = internal constant i32 0, align 4 ; CHECK: @c1 = internal constant i32 0, align 4 ; CHECK: @d1 = internal constant i32 0, align 4 -; CHECK: @switch.table.rel = private unnamed_addr constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @a1 to i64), i64 ptrtoint (ptr @switch.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @b1 to i64), i64 ptrtoint (ptr @switch.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @c1 to i64), i64 ptrtoint (ptr @switch.table.rel to i64)) to i32)], align 4 +; CHECK: @switch.table = private unnamed_addr constant [3 x ptr] [ptr @a1, ptr @b1, ptr @c1], align 8 ;. define ptr @internal_linkage(i32 %cond) { ; CHECK-LABEL: define ptr @internal_linkage( ; CHECK-SAME: i32 [[COND:%.*]]) { -; CHECK-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i32 [[COND]], 2 -; CHECK-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i32(ptr @switch.table.rel, i32 [[RELTABLE_SHIFT]]) +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x ptr], ptr @switch.table, i32 0, i32 [[COND]] +; CHECK-NEXT: [[RELTABLE_INTRINSIC:%.*]] = load ptr, ptr [[SWITCH_GEP]], align 8 ; CHECK-NEXT: ret ptr [[RELTABLE_INTRINSIC]] ; %switch.gep = getelementptr inbounds [3 x ptr], ptr @switch.table, i32 0, i32 %cond %switch.load = load ptr, ptr %switch.gep, align 8 ret ptr %switch.load } -;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } -;.