Skip to content

Commit 25d4193

Browse files
committed
Add convergent attribute to funcs for GPU targets
On targets with convergent operations, we need to add the convergent attribute to all functions that run convergent operations. Following clang, we can conservatively apply the attribute to all functions when compiling for such a target and rely on LLVM optimizing away the attribute in cases where it is not necessary. This currently applies to amdgpu and nvptx.
1 parent ad8f5bc commit 25d4193

File tree

5 files changed

+51
-1
lines changed

5 files changed

+51
-1
lines changed

compiler/rustc_codegen_llvm/src/allocator.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,15 @@ fn create_wrapper_function(
176176
None
177177
};
178178

179+
if tcx.sess.target.has_convergent {
180+
// Conservatively apply convergent to all functions
181+
attributes::apply_to_llfn(
182+
llfn,
183+
llvm::AttributePlace::Function,
184+
&[llvm::AttributeKind::Convergent.create_attr(cx.llcx)],
185+
);
186+
}
187+
179188
let llbb = unsafe { llvm::LLVMAppendBasicBlockInContext(cx.llcx, llfn, c"entry".as_ptr()) };
180189
let mut bx = SBuilder::build(&cx, llbb);
181190

compiler/rustc_codegen_llvm/src/declare.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
use std::borrow::Borrow;
1515

1616
use itertools::Itertools;
17-
use rustc_codegen_ssa::traits::TypeMembershipCodegenMethods;
17+
use rustc_codegen_ssa::traits::{MiscCodegenMethods, TypeMembershipCodegenMethods};
1818
use rustc_data_structures::fx::FxIndexSet;
1919
use rustc_middle::ty::{Instance, Ty};
2020
use rustc_sanitizers::{cfi, kcfi};
@@ -68,6 +68,15 @@ pub(crate) fn declare_raw_fn<'ll, 'tcx>(
6868
debug!("declare_raw_fn(name={:?}, ty={:?})", name, ty);
6969
let llfn = declare_simple_fn(cx, name, callconv, unnamed, visibility, ty);
7070

71+
if cx.sess().target.has_convergent {
72+
// Conservatively apply convergent to all functions
73+
attributes::apply_to_llfn(
74+
llfn,
75+
llvm::AttributePlace::Function,
76+
&[llvm::AttributeKind::Convergent.create_attr(cx.llcx)],
77+
);
78+
}
79+
7180
let mut attrs = SmallVec::<[_; 4]>::new();
7281

7382
if cx.tcx.sess.opts.cg.no_redzone.unwrap_or(cx.tcx.sess.target.disable_redzone) {

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ pub(crate) enum AttributeKind {
292292
CapturesNone = 46,
293293
SanitizeRealtimeNonblocking = 47,
294294
SanitizeRealtimeBlocking = 48,
295+
Convergent = 49,
295296
}
296297

297298
/// LLVMIntPredicate

compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ enum class LLVMRustAttributeKind {
328328
CapturesNone = 46,
329329
SanitizeRealtimeNonblocking = 47,
330330
SanitizeRealtimeBlocking = 48,
331+
Convergent = 49,
331332
};
332333

333334
static Attribute::AttrKind fromRust(LLVMRustAttributeKind Kind) {
@@ -428,6 +429,8 @@ static Attribute::AttrKind fromRust(LLVMRustAttributeKind Kind) {
428429
return Attribute::SanitizeRealtime;
429430
case LLVMRustAttributeKind::SanitizeRealtimeBlocking:
430431
return Attribute::SanitizeRealtimeBlocking;
432+
case LLVMRustAttributeKind::Convergent:
433+
return Attribute::Convergent;
431434
}
432435
report_fatal_error("bad LLVMRustAttributeKind");
433436
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Checks that when compiling for GPU targets, the convergent attribute
2+
// is added to function declarations and definitions.
3+
4+
//@ add-minicore
5+
//@ revisions: amdgpu nvptx
6+
//@ [amdgpu] compile-flags: --crate-type=rlib --target=amdgcn-amd-amdhsa -Ctarget-cpu=gfx900
7+
//@ [amdgpu] needs-llvm-components: amdgpu
8+
//@ [nvptx] compile-flags: --crate-type=rlib --target=nvptx64-nvidia-cuda
9+
//@ [nvptx] needs-llvm-components: nvptx
10+
#![feature(no_core, lang_items, abi_gpu_kernel)]
11+
#![no_core]
12+
13+
extern crate minicore;
14+
use minicore::*;
15+
16+
extern "C" {
17+
fn ext();
18+
}
19+
20+
// CHECK: define {{.*}}_kernel void @fun(i32{{.*}}) unnamed_addr #[[ATTR:[0-9]+]] {
21+
// CHECK: declare void @ext() unnamed_addr #[[ATTR]]
22+
// CHECK: attributes #[[ATTR]] = {{.*}} convergent
23+
#[no_mangle]
24+
pub extern "gpu-kernel" fn fun(_: i32) {
25+
unsafe {
26+
ext();
27+
}
28+
}

0 commit comments

Comments
 (0)