From b117ff4912d067af5e0fc5f93a226d9796c226f8 Mon Sep 17 00:00:00 2001 From: Manuel Drehwald Date: Fri, 21 Nov 2025 23:00:50 -0800 Subject: [PATCH] wip --- compiler/rustc_codegen_llvm/src/back/write.rs | 29 ++- compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 17 +- .../rustc_llvm/llvm-wrapper/RustWrapper.cpp | 165 +++++++++++++++++- src/doc/rustc-dev-guide/src/offload/usage.md | 19 +- 4 files changed, 210 insertions(+), 20 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index 56b4a4700fa8c..3edc4df78a113 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -773,11 +773,36 @@ pub(crate) unsafe fn llvm_optimize( }; if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Enable) { + let lib_bc_c = CString::new("/p/lustre1/drehwald1/prog/offload/r/lib.bc").unwrap(); + let host_out_c = CString::new("/p/lustre1/drehwald1/prog/offload/r/host.out").unwrap(); + let out_obj_c = CString::new("/p/lustre1/drehwald1/prog/offload/r/host.o").unwrap(); + unsafe { - llvm::LLVMRustBundleImages(module.module_llvm.llmod(), module.module_llvm.tm.raw()); + llvm::LLVMRustBundleImages( + module.module_llvm.llmod(), + module.module_llvm.tm.raw(), + host_out_c.as_ptr(), + ); } - } + unsafe { + // 1) Bundle device module into offload image host.out (device TM) + let ok = llvm::LLVMRustBundleImages( + module.module_llvm.llmod(), + module.module_llvm.tm.raw(), + host_out_c.as_ptr(), + ); + assert!(ok, "LLVMRustBundleImages (device -> host.out) failed"); + // 2) Finalize host: lib.bc + host.out -> host.offload.o (host TM created in C++) + let ok = llvm::LLVMRustFinalizeOffload( + lib_bc_c.as_ptr(), + host_out_c.as_ptr(), + out_obj_c.as_ptr(), + ); + assert!(ok, "LLVMRustFinalizeOffload (host finalize) failed"); + } + dbg!("done"); + } result.into_result().unwrap_or_else(|()| llvm_err(dcx, LlvmError::RunLlvmPasses)) } diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 09978dc6f873d..34624ba2d1fea 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -1726,7 +1726,16 @@ mod Offload { use super::*; unsafe extern "C" { /// Processes the module and writes it in an offload compatible way into a "host.out" file. - pub(crate) fn LLVMRustBundleImages<'a>(M: &'a Module, TM: &'a TargetMachine) -> bool; + pub(crate) fn LLVMRustBundleImages<'a>( + M: &'a Module, + TM: &'a TargetMachine, + host_out: *const c_char, + ) -> bool; + pub(crate) fn LLVMRustFinalizeOffload( + lib_bc_path: *const c_char, + host_out_path: *const c_char, + out_obj_path: *const c_char, + ) -> bool; pub(crate) fn LLVMRustOffloadMapper<'a>(OldFn: &'a Value, NewFn: &'a Value); } } @@ -1740,7 +1749,11 @@ mod Offload_fallback { /// Processes the module and writes it in an offload compatible way into a "host.out" file. /// Marked as unsafe to match the real offload wrapper which is unsafe due to FFI. #[allow(unused_unsafe)] - pub(crate) unsafe fn LLVMRustBundleImages<'a>(_M: &'a Module, _TM: &'a TargetMachine) -> bool { + pub(crate) unsafe fn LLVMRustBundleImages<'a>( + _M: &'a Module, + _TM: &'a TargetMachine, + _host_out: *const c_char, + ) -> bool { unimplemented!("This rustc version was not built with LLVM Offload support!"); } #[allow(unused_unsafe)] diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp index 8d95b7f3aa407..4ca6ad214234a 100644 --- a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp @@ -45,6 +45,18 @@ #ifdef OFFLOAD #include "llvm/Object/OffloadBinary.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Analysis/VectorUtils.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/xxhash.h" #endif // for raw `write` in the bad-alloc handler @@ -174,12 +186,12 @@ static Error writeFile(StringRef Filename, StringRef Data) { // --image=file=device.bc,triple=amdgcn-amd-amdhsa,arch=gfx90a,kind=openmp // The input module is the rust code compiled for a gpu target like amdgpu. // Based on clang/tools/clang-offload-packager/ClangOffloadPackager.cpp -extern "C" bool LLVMRustBundleImages(LLVMModuleRef M, TargetMachine &TM) { +extern "C" bool LLVMRustBundleImages(LLVMModuleRef M, TargetMachine &TM, const char *HostOutPath) { std::string Storage; llvm::raw_string_ostream OS1(Storage); llvm::WriteBitcodeToFile(*unwrap(M), OS1); OS1.flush(); - auto MB = llvm::MemoryBuffer::getMemBufferCopy(Storage, "module.bc"); + auto MB = llvm::MemoryBuffer::getMemBufferCopy(Storage, "device.bc"); SmallVector BinaryData; raw_svector_ostream OS2(BinaryData); @@ -188,19 +200,103 @@ extern "C" bool LLVMRustBundleImages(LLVMModuleRef M, TargetMachine &TM) { ImageBinary.TheImageKind = object::IMG_Bitcode; ImageBinary.Image = std::move(MB); ImageBinary.TheOffloadKind = object::OFK_OpenMP; - ImageBinary.StringData["triple"] = TM.getTargetTriple().str(); - ImageBinary.StringData["arch"] = TM.getTargetCPU(); + + + std::string TripleStr = TM.getTargetTriple().str(); + llvm::StringRef CPURef = TM.getTargetCPU(); + ImageBinary.StringData["triple"] = TripleStr; + ImageBinary.StringData["arch"] = CPURef; llvm::SmallString<0> Buffer = OffloadBinary::write(ImageBinary); if (Buffer.size() % OffloadBinary::getAlignment() != 0) // Offload binary has invalid size alignment return false; OS2 << Buffer; - if (Error E = writeFile("host.out", + if (Error E = writeFile(HostOutPath, StringRef(BinaryData.begin(), BinaryData.size()))) return false; return true; } +#include "llvm/Bitcode/BitcodeReader.h" +Expected> +loadHostModuleFromBitcode(LLVMContext &Ctx, StringRef LibBCPath) { + auto MBOrErr = MemoryBuffer::getFile(LibBCPath); + if (!MBOrErr) + return errorCodeToError(MBOrErr.getError()); + + MemoryBufferRef Ref = (*MBOrErr)->getMemBufferRef(); + return parseBitcodeFile(Ref, Ctx); +} + +extern "C" void embedBufferInModule(Module &M, MemoryBufferRef Buf) { + StringRef SectionName = ".llvm.offloading"; + Align Alignment = Align(8); + // Embed the memory buffer into the module. + Constant *ModuleConstant = ConstantDataArray::get( + M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize())); + GlobalVariable *GV = new GlobalVariable( + M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage, + ModuleConstant, "llvm.embedded.object"); + GV->setSection(SectionName); + GV->setAlignment(Alignment); + + LLVMContext &Ctx = M.getContext(); + NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects"); + Metadata *MDVals[] = {ConstantAsMetadata::get(GV), + MDString::get(Ctx, SectionName)}; + + MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); + GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {})); + + appendToCompilerUsed(M, GV); +} + +Error embedHostOutIntoHostModule(Module &HostM, StringRef HostOutPath) { + llvm::errs() << "embedHostOutIntoHostModule step 1:\n"; + auto MBOrErr = MemoryBuffer::getFile(HostOutPath); + llvm::errs() << "embedHostOutIntoHostModule step 2:\n"; + if (!MBOrErr) + return errorCodeToError(MBOrErr.getError()); + + llvm::errs() << "embedHostOutIntoHostModule step 3:\n"; + MemoryBufferRef Buf = (*MBOrErr)->getMemBufferRef(); + llvm::errs() << "embedHostOutIntoHostModule step 4:\n"; + embedBufferInModule(HostM, Buf); + return Error::success(); +} + +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/IR/LegacyPassManager.h" +//#include "llvm/Support/Host.h" +//#include "llvm/Support/TargetRegistry.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/CodeGen.h" // <-- new + +Error emitHostObjectWithTM(Module &HostM, + TargetMachine &TM, + StringRef OutObjPath) { + // Make sure module matches the TM + //HostM.setDataLayout(TM.createDataLayout()); + //HostM.setTargetTriple(TM.getTargetTriple().str()); + + legacy::PassManager PM; + std::error_code EC; + raw_fd_ostream OS(OutObjPath, EC, sys::fs::OF_None); + if (EC) + return errorCodeToError(EC); + + if (TM.addPassesToEmitFile(PM, OS, nullptr, llvm::CodeGenFileType::ObjectFile)) + return createStringError(inconvertibleErrorCode(), + "TargetMachine can't emit a file of this type"); + + PM.run(HostM); + return Error::success(); +} + extern "C" void LLVMRustOffloadMapper(LLVMValueRef OldFn, LLVMValueRef NewFn) { llvm::Function *oldFn = llvm::unwrap(OldFn); llvm::Function *newFn = llvm::unwrap(NewFn); @@ -222,6 +318,65 @@ extern "C" void LLVMRustOffloadMapper(LLVMValueRef OldFn, LLVMValueRef NewFn) { } #endif +// Create a host TargetMachine with HARDCODED triple/CPU +static std::unique_ptr createHostTargetMachine() { + static bool Initialized = false; + if (!Initialized) { + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmPrinters(); + InitializeAllAsmParsers(); + Initialized = true; + } + + // Hardcoded host triple + CPU (adapt if your CI/host differs) + std::string TripleStr = "x86_64-unknown-linux-gnu"; + std::string CPU = "x86-64"; // OK for X86 + + std::string Err; + const Target *T = TargetRegistry::lookupTarget(TripleStr, Err); + if (!T) { + // Could log Err here + return nullptr; + } + + TargetOptions Opts; + auto RM = std::optional(Reloc::PIC_); + + std::unique_ptr TM( + T->createTargetMachine(TripleStr, CPU, /*Features*/"", Opts, RM)); + + return TM; +} + +// Top-level entry: host finalize in second rustc invocation +// lib.bc (from first rustc) + host.out (from LLVMRustBundleImages) => host.offload.o +extern "C" bool LLVMRustFinalizeOffload(const char *LibBCPath, + const char *HostOutPath, + const char *OutObjPath) { + LLVMContext Ctx; + + // 1. Load host lib.bc + auto ModOrErr = loadHostModuleFromBitcode(Ctx, LibBCPath); + if (!ModOrErr) + return !errorToBool(ModOrErr.takeError()); + std::unique_ptr HostM = std::move(*ModOrErr); + + // 2. Embed host.out + if (Error E = embedHostOutIntoHostModule(*HostM, HostOutPath)) + return !errorToBool(std::move(E)); + + // 3. Create host TM and emit host object + auto HostTM = createHostTargetMachine(); + if (!HostTM) + return false; + + if (Error E = emitHostObjectWithTM(*HostM, *HostTM, OutObjPath)) + return !errorToBool(std::move(E)); + + return true; +} + extern "C" LLVMValueRef LLVMRustGetNamedValue(LLVMModuleRef M, const char *Name, size_t NameLen) { return wrap(unwrap(M)->getNamedValue(StringRef(Name, NameLen))); diff --git a/src/doc/rustc-dev-guide/src/offload/usage.md b/src/doc/rustc-dev-guide/src/offload/usage.md index 8350fb5777fba..fabbc03ea57cd 100644 --- a/src/doc/rustc-dev-guide/src/offload/usage.md +++ b/src/doc/rustc-dev-guide/src/offload/usage.md @@ -68,28 +68,25 @@ pub extern "gpu-kernel" fn kernel_1(x: *mut [f64; 256]) { ## Compile instructions It is important to use a clang compiler build on the same llvm as rustc. Just calling clang without the full path will likely use your system clang, which probably will be incompatible. So either substitute clang/lld invocations below with absolute path, or set your `PATH` accordingly. -First we generate the host (cpu) code. The first build is just to compile libc, take note of the hashed path. Then we call rustc directly to build our host code, while providing the libc artifact to rustc. +First we generate the host (cpu) code. ``` -cargo +offload build -r -v -rustc +offload --edition 2024 src/lib.rs -g --crate-type cdylib -C opt-level=3 -C panic=abort -C lto=fat -L dependency=/absolute_path_to/target/release/deps --extern libc=/absolute_path_to/target/release/deps/liblibc-.rlib --emit=llvm-bc,llvm-ir -Zoffload=Enable -Zunstable-options +RUSTFLAGS="--emit=llvm-bc -Zoffload=Enable -Zunstable-options" cargo +offload build -r ``` +You might afterwards need to copy your target/release/deps/.bc to lib.bc for now, before the next step. Now we generate the device code. Replace the target-cpu with the right code for your gpu. ``` -RUSTFLAGS="-Ctarget-cpu=gfx90a --emit=llvm-bc,llvm-ir -Zoffload=Enable -Zunstable-options" cargo +offload build -Zunstable-options -r -v --target amdgcn-amd-amdhsa -Zbuild-std=core +RUSTFLAGS="-Ctarget-cpu=gfx90a -Zoffload=Enable -Zunstable-options" cargo +offload build -Zunstable-options -r --target amdgcn-amd-amdhsa -Zbuild-std=core ``` - +This call also does a lot of work and generates multiple intermediate files for llvm offload. +While we integrated most offload steps into rustc by now, one binary invocation still remains for now: ``` -"clang-21" "-cc1" "-triple" "x86_64-unknown-linux-gnu" "-S" "-save-temps=cwd" "-disable-free" "-clear-ast-before-backend" "-main-file-name" "lib.rs" "-mrelocation-model" "pic" "-pic-level" "2" "-pic-is-pie" "-mframe-pointer=all" "-fmath-errno" "-ffp-contract=on" "-fno-rounding-math" "-mconstructor-aliases" "-funwind-tables=2" "-target-cpu" "x86-64" "-tune-cpu" "generic" "-resource-dir" "//rust/build/x86_64-unknown-linux-gnu/llvm/lib/clang/21" "-ferror-limit" "19" "-fopenmp" "-fopenmp-offload-mandatory" "-fgnuc-version=4.2.1" "-fskip-odr-check-in-gmf" "-fembed-offload-object=host.out" "-fopenmp-targets=amdgcn-amd-amdhsa" "-faddrsig" "-D__GCC_HAVE_DWARF2_CFI_ASM=1" "-o" "host.s" "-x" "ir" "lib.bc" - -"clang-21" "-cc1as" "-triple" "x86_64-unknown-linux-gnu" "-filetype" "obj" "-main-file-name" "lib.rs" "-target-cpu" "x86-64" "-mrelocation-model" "pic" "-o" "host.o" "host.s" - "clang-linker-wrapper" "--should-extract=gfx90a" "--device-compiler=amdgcn-amd-amdhsa=-g" "--device-compiler=amdgcn-amd-amdhsa=-save-temps=cwd" "--device-linker=amdgcn-amd-amdhsa=-lompdevice" "--host-triple=x86_64-unknown-linux-gnu" "--save-temps" "--linker-path=/ABSOlUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/lld/bin/ld.lld" "--hash-style=gnu" "--eh-frame-hdr" "-m" "elf_x86_64" "-pie" "-dynamic-linker" "/lib64/ld-linux-x86-64.so.2" "-o" "bare" "/lib/../lib64/Scrt1.o" "/lib/../lib64/crti.o" "/ABSOLUTE_PATH_TO/crtbeginS.o" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/bin/../lib/x86_64-unknown-linux-gnu" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/lib/clang/21/lib/x86_64-unknown-linux-gnu" "-L/lib/../lib64" "-L/usr/lib64" "-L/lib" "-L/usr/lib" "host.o" "-lstdc++" "-lm" "-lomp" "-lomptarget" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/lib" "-lgcc_s" "-lgcc" "-lpthread" "-lc" "-lgcc_s" "-lgcc" "/ABSOLUTE_PATH_TO/crtendS.o" "/lib/../lib64/crtn.o" ``` -Especially for the last three commands I recommend to not fix the paths, but rather just re-generate them by copying a bare-mode openmp example and compiling it with your clang. By adding `-###` to your clang invocation, you can see the invidual steps. -You can ignore other steps, e.g. the invocation of a "clang-offload-packager". +You can try to find the paths to those files on your system. However, I recommend to not fix the paths, but rather just re-generate them by copying a bare-mode openmp example and compiling it with your clang. By adding `-###` to your clang invocation, you can see the invidual steps. +It will show multiple steps, just look for the clang-linker-wrapper example. Make sure to still include the `host.o` file, and not whatever tmp file you got when compiling your c++ example with the following call. ``` myclang++ -fuse-ld=lld -O3 -fopenmp -fopenmp-offload-mandatory --offload-arch=gfx90a omp_bare.cpp -o main -### ```