Skip to content

Commit 29cfccd

Browse files
authored
Unrolled build for #147936
Rollup merge of #147936 - Sa4dUs:offload-intrinsic, r=ZuseZ4 Offload intrinsic This PR implements the minimal mechanisms required to run a small subset of arbitrary offload kernels without relying on hardcoded names or metadata. - `offload(kernel, (..args))`: an intrinsic that generates the necessary host-side LLVM-IR code. - `rustc_offload_kernel`: a builtin attribute that marks device kernels to be handled appropriately. Example usage (pseudocode): ```rust fn kernel(x: *mut [f64; 128]) { core::intrinsics::offload(kernel_1, (x,)) } #[cfg(target_os = "linux")] extern "C" { pub fn kernel_1(array_b: *mut [f64; 128]); } #[cfg(not(target_os = "linux"))] #[rustc_offload_kernel] extern "gpu-kernel" fn kernel_1(x: *mut [f64; 128]) { unsafe { (*x)[0] = 21.0 }; } ```
2 parents c797096 + f39ec47 commit 29cfccd

File tree

23 files changed

+531
-180
lines changed

23 files changed

+531
-180
lines changed

compiler/rustc_codegen_llvm/messages.ftl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ codegen_llvm_lto_bitcode_from_rlib = failed to get bitcode from object file for
1818
codegen_llvm_mismatch_data_layout =
1919
data-layout for target `{$rustc_target}`, `{$rustc_layout}`, differs from LLVM target's `{$llvm_target}` default layout, `{$llvm_layout}`
2020
21+
codegen_llvm_offload_without_enable = using the offload feature requires -Z offload=Enable
22+
codegen_llvm_offload_without_fat_lto = using the offload feature requires -C lto=fat
23+
2124
codegen_llvm_parse_bitcode = failed to parse bitcode for LTO module
2225
codegen_llvm_parse_bitcode_with_llvm_err = failed to parse bitcode for LTO module: {$llvm_err}
2326

compiler/rustc_codegen_llvm/src/attributes.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,14 @@ pub(crate) fn apply_to_callsite(callsite: &Value, idx: AttributePlace, attrs: &[
3030
}
3131
}
3232

33+
pub(crate) fn has_string_attr(llfn: &Value, name: &str) -> bool {
34+
llvm::HasStringAttribute(llfn, name)
35+
}
36+
37+
pub(crate) fn remove_string_attr_from_llfn(llfn: &Value, name: &str) {
38+
llvm::RemoveStringAttrFromFn(llfn, name);
39+
}
40+
3341
/// Get LLVM attribute for the provided inline heuristic.
3442
pub(crate) fn inline_attr<'ll, 'tcx>(
3543
cx: &SimpleCx<'ll>,
@@ -408,6 +416,10 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
408416
to_add.push(llvm::CreateAttrString(cx.llcx, "no-builtins"));
409417
}
410418

419+
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::OFFLOAD_KERNEL) {
420+
to_add.push(llvm::CreateAttrString(cx.llcx, "offload-kernel"))
421+
}
422+
411423
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::COLD) {
412424
to_add.push(AttributeKind::Cold.create_attr(cx.llcx));
413425
}

compiler/rustc_codegen_llvm/src/back/lto.rs

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use crate::back::write::{
2626
};
2727
use crate::errors::{LlvmError, LtoBitcodeFromRlib};
2828
use crate::llvm::{self, build_string};
29-
use crate::{LlvmCodegenBackend, ModuleLlvm, SimpleCx};
29+
use crate::{LlvmCodegenBackend, ModuleLlvm};
3030

3131
/// We keep track of the computed LTO cache keys from the previous
3232
/// session to determine which CGUs we can reuse.
@@ -601,7 +601,6 @@ pub(crate) fn run_pass_manager(
601601
// We then run the llvm_optimize function a second time, to optimize the code which we generated
602602
// in the enzyme differentiation pass.
603603
let enable_ad = config.autodiff.contains(&config::AutoDiff::Enable);
604-
let enable_gpu = config.offload.contains(&config::Offload::Enable);
605604
let stage = if thin {
606605
write::AutodiffStage::PreAD
607606
} else {
@@ -616,13 +615,6 @@ pub(crate) fn run_pass_manager(
616615
write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage);
617616
}
618617

619-
// Here we only handle the GPU host (=cpu) code.
620-
if enable_gpu && !thin && !cgcx.target_is_like_gpu {
621-
let cx =
622-
SimpleCx::new(module.module_llvm.llmod(), &module.module_llvm.llcx, cgcx.pointer_size);
623-
crate::builder::gpu_offload::handle_gpu_code(cgcx, &cx);
624-
}
625-
626618
if cfg!(feature = "llvm_enzyme") && enable_ad && !thin {
627619
let opt_stage = llvm::OptStage::FatLTO;
628620
let stage = write::AutodiffStage::PostAD;

compiler/rustc_codegen_llvm/src/back/write.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ use crate::errors::{
4343
use crate::llvm::diagnostic::OptimizationDiagnosticKind::*;
4444
use crate::llvm::{self, DiagnosticInfo};
4545
use crate::type_::llvm_type_ptr;
46-
use crate::{LlvmCodegenBackend, ModuleLlvm, SimpleCx, base, common, llvm_util};
46+
use crate::{LlvmCodegenBackend, ModuleLlvm, SimpleCx, attributes, base, common, llvm_util};
4747

4848
pub(crate) fn llvm_err<'a>(dcx: DiagCtxtHandle<'_>, err: LlvmError<'a>) -> ! {
4949
match llvm::last_error() {
@@ -712,11 +712,12 @@ pub(crate) unsafe fn llvm_optimize(
712712
SimpleCx::new(module.module_llvm.llmod(), module.module_llvm.llcx, cgcx.pointer_size);
713713
// For now we only support up to 10 kernels named kernel_0 ... kernel_9, a follow-up PR is
714714
// introducing a proper offload intrinsic to solve this limitation.
715-
for num in 0..9 {
716-
let name = format!("kernel_{num}");
717-
if let Some(kernel) = cx.get_function(&name) {
718-
handle_offload(&cx, kernel);
715+
for func in cx.get_functions() {
716+
let offload_kernel = "offload-kernel";
717+
if attributes::has_string_attr(func, offload_kernel) {
718+
handle_offload(&cx, func);
719719
}
720+
attributes::remove_string_attr_from_llfn(func, offload_kernel);
720721
}
721722
}
722723

0 commit comments

Comments
 (0)