diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index 5b43272bfa62f..49f93451db7bb 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -760,10 +760,10 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // to contain the fat binary but will be populated somewhere else, // e.g. by lld through link script. FatBinStr = new llvm::GlobalVariable( - CGM.getModule(), CGM.Int8Ty, - /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr, - "__hip_fatbin", nullptr, - llvm::GlobalVariable::NotThreadLocal); + CGM.getModule(), CGM.Int8Ty, + /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr, + "__hip_fatbin_" + CGM.getContext().getCUIDHash(), nullptr, + llvm::GlobalVariable::NotThreadLocal); cast(FatBinStr)->setSection(FatbinConstantName); } @@ -816,8 +816,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // thread safety of the loaded program. Therefore we can assume sequential // execution of constructor functions here. if (IsHIP) { - auto Linkage = CudaGpuBinary ? llvm::GlobalValue::InternalLinkage : - llvm::GlobalValue::LinkOnceAnyLinkage; + auto Linkage = CudaGpuBinary ? llvm::GlobalValue::InternalLinkage + : llvm::GlobalValue::ExternalLinkage; llvm::BasicBlock *IfBlock = llvm::BasicBlock::Create(Context, "if", ModuleCtorFunc); llvm::BasicBlock *ExitBlock = @@ -826,11 +826,11 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // of HIP ABI. GpuBinaryHandle = new llvm::GlobalVariable( TheModule, PtrTy, /*isConstant=*/false, Linkage, - /*Initializer=*/llvm::ConstantPointerNull::get(PtrTy), - "__hip_gpubin_handle"); - if (Linkage == llvm::GlobalValue::LinkOnceAnyLinkage) - GpuBinaryHandle->setComdat( - CGM.getModule().getOrInsertComdat(GpuBinaryHandle->getName())); + /*Initializer=*/ + CudaGpuBinary ? llvm::ConstantPointerNull::get(PtrTy) : nullptr, + CudaGpuBinary + ? "__hip_gpubin_handle" + : "__hip_gpubin_handle_" + CGM.getContext().getCUIDHash()); GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign()); // Prevent the weak symbol in different shared libraries being merged. if (Linkage != llvm::GlobalValue::InternalLinkage) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 77fb3a62b356e..95e457bef28ed 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -915,7 +915,15 @@ void CodeGenModule::Release() { llvm::ConstantArray::get(ATy, UsedArray), "__clang_gpu_used_external"); addCompilerUsedGlobal(GV); } - + if (LangOpts.HIP) { + // Emit a unique ID so that host and device binaries from the same + // compilation unit can be associated. + auto *GV = new llvm::GlobalVariable( + getModule(), Int8Ty, false, llvm::GlobalValue::ExternalLinkage, + llvm::Constant::getNullValue(Int8Ty), + "__hip_cuid_" + getContext().getCUIDHash()); + addCompilerUsedGlobal(GV); + } emitLLVMUsed(); if (SanStats) SanStats->finish(); diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp index b1091aca5616f..99a34d25cfcd5 100644 --- a/clang/lib/Driver/OffloadBundler.cpp +++ b/clang/lib/Driver/OffloadBundler.cpp @@ -588,8 +588,15 @@ class ObjectFileHandler final : public FileHandler { StringRef Content = *ContentOrErr; // Copy fat object contents to the output when extracting host bundle. - if (Content.size() == 1u && Content.front() == 0) - Content = StringRef(Input.getBufferStart(), Input.getBufferSize()); + std::string ModifiedContent; + if (Content.size() == 1u && Content.front() == 0) { + auto HostBundleOrErr = getHostBundle(); + if (!HostBundleOrErr) + return HostBundleOrErr.takeError(); + + ModifiedContent = std::move(*HostBundleOrErr); + Content = ModifiedContent; + } OS.write(Content.data(), Content.size()); return Error::success(); @@ -692,6 +699,35 @@ class ObjectFileHandler final : public FileHandler { } return Error::success(); } + + Expected getHostBundle() { + TempFileHandlerRAII TempFiles; + + auto ModifiedObjPathOrErr = TempFiles.Create(std::nullopt); + if (!ModifiedObjPathOrErr) + return ModifiedObjPathOrErr.takeError(); + StringRef ModifiedObjPath = *ModifiedObjPathOrErr; + + BumpPtrAllocator Alloc; + StringSaver SS{Alloc}; + SmallVector ObjcopyArgs{"llvm-objcopy"}; + + ObjcopyArgs.push_back("--regex"); + ObjcopyArgs.push_back("--remove-section=__CLANG_OFFLOAD_BUNDLE__.*"); + ObjcopyArgs.push_back("--"); + ObjcopyArgs.push_back(BundlerConfig.InputFileNames.front()); + ObjcopyArgs.push_back(ModifiedObjPath); + + if (Error Err = executeObjcopy(BundlerConfig.ObjcopyPath, ObjcopyArgs)) + return std::move(Err); + + auto BufOrErr = MemoryBuffer::getFile(ModifiedObjPath); + if (!BufOrErr) + return createStringError(BufOrErr.getError(), + "Failed to read back the modified object file"); + + return BufOrErr->get()->getBuffer().str(); + } }; /// Handler for text files. The bundled file will have the following format. diff --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp index f692458b775de..fcecf2e1313bb 100644 --- a/clang/lib/Driver/ToolChains/HIPUtility.cpp +++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp @@ -9,13 +9,24 @@ #include "HIPUtility.h" #include "CommonArgs.h" #include "clang/Driver/Compilation.h" +#include "clang/Driver/Options.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/Triple.h" +#include +#include +using namespace clang; using namespace clang::driver; using namespace clang::driver::tools; using namespace llvm::opt; +using llvm::dyn_cast; #if defined(_WIN32) || defined(_WIN64) #define NULL_FILE "nul" @@ -36,6 +47,169 @@ static std::string normalizeForBundler(const llvm::Triple &T, : T.normalize(); } +// Collect undefined __hip_fatbin* and __hip_gpubin_handle* symbols from all +// input object or archive files. +class HIPUndefinedFatBinSymbols { +public: + HIPUndefinedFatBinSymbols(const Compilation &C) + : C(C), DiagID(C.getDriver().getDiags().getCustomDiagID( + DiagnosticsEngine::Error, + "Error collecting HIP undefined fatbin symbols: %0")), + Quiet(C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)), + Verbose(C.getArgs().hasArg(options::OPT_v)) { + populateSymbols(); + if (Verbose) { + for (auto Name : FatBinSymbols) + llvm::errs() << "Found undefined HIP fatbin symbol: " << Name << "\n"; + for (auto Name : GPUBinHandleSymbols) + llvm::errs() << "Found undefined HIP gpubin handle symbol: " << Name + << "\n"; + } + } + + const std::set &getFatBinSymbols() const { + return FatBinSymbols; + } + + const std::set &getGPUBinHandleSymbols() const { + return GPUBinHandleSymbols; + } + +private: + const Compilation &C; + unsigned DiagID; + bool Quiet; + bool Verbose; + std::set FatBinSymbols; + std::set GPUBinHandleSymbols; + std::set DefinedFatBinSymbols; + std::set DefinedGPUBinHandleSymbols; + const std::string FatBinPrefix = "__hip_fatbin"; + const std::string GPUBinHandlePrefix = "__hip_gpubin_handle"; + + void populateSymbols() { + std::deque WorkList; + std::set Visited; + + for (const auto &Action : C.getActions()) + WorkList.push_back(Action); + + while (!WorkList.empty()) { + const Action *CurrentAction = WorkList.front(); + WorkList.pop_front(); + + if (!CurrentAction || !Visited.insert(CurrentAction).second) + continue; + + if (const auto *IA = dyn_cast(CurrentAction)) { + std::string ID = IA->getId().str(); + if (!ID.empty()) { + ID = llvm::utohexstr(llvm::MD5Hash(ID), /*LowerCase=*/true); + FatBinSymbols.insert(Twine(FatBinPrefix + "_" + ID).str()); + GPUBinHandleSymbols.insert( + Twine(GPUBinHandlePrefix + "_" + ID).str()); + continue; + } + if (IA->getInputArg().getNumValues() == 0) + continue; + const char *Filename = IA->getInputArg().getValue(); + if (!Filename) + continue; + auto BufferOrErr = llvm::MemoryBuffer::getFile(Filename); + // Input action could be options to linker, therefore, ignore it + // if cannot read it. If it turns out to be a file that cannot be read, + // the error will be caught by the linker. + if (!BufferOrErr) + continue; + + processInput(BufferOrErr.get()->getMemBufferRef()); + } else + WorkList.insert(WorkList.end(), CurrentAction->getInputs().begin(), + CurrentAction->getInputs().end()); + } + } + + void processInput(const llvm::MemoryBufferRef &Buffer) { + // Try processing as object file first. + auto ObjFileOrErr = llvm::object::ObjectFile::createObjectFile(Buffer); + if (ObjFileOrErr) { + processSymbols(**ObjFileOrErr); + return; + } + + // Then try processing as archive files. + llvm::consumeError(ObjFileOrErr.takeError()); + auto ArchiveOrErr = llvm::object::Archive::create(Buffer); + if (ArchiveOrErr) { + llvm::Error Err = llvm::Error::success(); + llvm::object::Archive &Archive = *ArchiveOrErr.get(); + for (auto &Child : Archive.children(Err)) { + auto ChildBufOrErr = Child.getMemoryBufferRef(); + if (ChildBufOrErr) + processInput(*ChildBufOrErr); + else + errorHandler(ChildBufOrErr.takeError()); + } + + if (Err) + errorHandler(std::move(Err)); + return; + } + + // Ignore other files. + llvm::consumeError(ArchiveOrErr.takeError()); + } + + void processSymbols(const llvm::object::ObjectFile &Obj) { + for (const auto &Symbol : Obj.symbols()) { + auto FlagOrErr = Symbol.getFlags(); + if (!FlagOrErr) { + errorHandler(FlagOrErr.takeError()); + continue; + } + + auto NameOrErr = Symbol.getName(); + if (!NameOrErr) { + errorHandler(NameOrErr.takeError()); + continue; + } + llvm::StringRef Name = *NameOrErr; + + bool isUndefined = + FlagOrErr.get() & llvm::object::SymbolRef::SF_Undefined; + bool isFatBinSymbol = Name.starts_with(FatBinPrefix); + bool isGPUBinHandleSymbol = Name.starts_with(GPUBinHandlePrefix); + + // Handling for defined symbols + if (!isUndefined) { + if (isFatBinSymbol) { + DefinedFatBinSymbols.insert(Name.str()); + FatBinSymbols.erase(Name.str()); + } else if (isGPUBinHandleSymbol) { + DefinedGPUBinHandleSymbols.insert(Name.str()); + GPUBinHandleSymbols.erase(Name.str()); + } + continue; + } + + // Add undefined symbols if they are not in the defined sets + if (isFatBinSymbol && + DefinedFatBinSymbols.find(Name.str()) == DefinedFatBinSymbols.end()) + FatBinSymbols.insert(Name.str()); + else if (isGPUBinHandleSymbol && + DefinedGPUBinHandleSymbols.find(Name.str()) == + DefinedGPUBinHandleSymbols.end()) + GPUBinHandleSymbols.insert(Name.str()); + } + } + + void errorHandler(llvm::Error Err) { + if (Quiet) + return; + C.getDriver().Diag(DiagID) << llvm::toString(std::move(Err)); + } +}; + // Construct a clang-offload-bundler command to bundle code objects for // different devices into a HIP fat binary. void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA, @@ -130,26 +304,84 @@ void HIP::constructGenerateObjFileFromHIPFatBinary( auto HostTriple = C.getSingleOffloadToolChain()->getTriple(); + HIPUndefinedFatBinSymbols Symbols(C); + + std::string PrimaryHipFatbinSymbol; + std::string PrimaryGpuBinHandleSymbol; + bool FoundPrimaryHipFatbinSymbol = false; + bool FoundPrimaryGpuBinHandleSymbol = false; + + std::vector AliasHipFatbinSymbols; + std::vector AliasGpuBinHandleSymbols; + + // Iterate through symbols to find the primary ones and collect others for + // aliasing + for (const auto &Symbol : Symbols.getFatBinSymbols()) { + if (!FoundPrimaryHipFatbinSymbol) { + PrimaryHipFatbinSymbol = Symbol; + FoundPrimaryHipFatbinSymbol = true; + } else + AliasHipFatbinSymbols.push_back(Symbol); + } + + for (const auto &Symbol : Symbols.getGPUBinHandleSymbols()) { + if (!FoundPrimaryGpuBinHandleSymbol) { + PrimaryGpuBinHandleSymbol = Symbol; + FoundPrimaryGpuBinHandleSymbol = true; + } else + AliasGpuBinHandleSymbols.push_back(Symbol); + } + // Add MC directives to embed target binaries. We ensure that each // section and image is 16-byte aligned. This is not mandatory, but // increases the likelihood of data to be aligned with a cache block // in several main host machines. ObjStream << "# HIP Object Generator\n"; ObjStream << "# *** Automatically generated by Clang ***\n"; - if (HostTriple.isWindowsMSVCEnvironment()) { - ObjStream << " .section .hip_fatbin, \"dw\"\n"; - } else { - ObjStream << " .protected __hip_fatbin\n"; - ObjStream << " .type __hip_fatbin,@object\n"; - ObjStream << " .section .hip_fatbin,\"a\",@progbits\n"; + if (FoundPrimaryGpuBinHandleSymbol) { + // Define the first gpubin handle symbol + if (HostTriple.isWindowsMSVCEnvironment()) + ObjStream << " .section .hip_gpubin_handle,\"dw\"\n"; + else { + ObjStream << " .protected " << PrimaryGpuBinHandleSymbol << "\n"; + ObjStream << " .type " << PrimaryGpuBinHandleSymbol << ",@object\n"; + ObjStream << " .section .hip_gpubin_handle,\"aw\"\n"; + } + ObjStream << " .globl " << PrimaryGpuBinHandleSymbol << "\n"; + ObjStream << " .p2align 3\n"; // Align 8 + ObjStream << PrimaryGpuBinHandleSymbol << ":\n"; + ObjStream << " .zero 8\n"; // Size 8 + + // Generate alias directives for other gpubin handle symbols + for (const auto &AliasSymbol : AliasGpuBinHandleSymbols) { + ObjStream << " .globl " << AliasSymbol << "\n"; + ObjStream << " .set " << AliasSymbol << "," << PrimaryGpuBinHandleSymbol + << "\n"; + } + } + if (FoundPrimaryHipFatbinSymbol) { + // Define the first fatbin symbol + if (HostTriple.isWindowsMSVCEnvironment()) + ObjStream << " .section .hip_fatbin,\"dw\"\n"; + else { + ObjStream << " .protected " << PrimaryHipFatbinSymbol << "\n"; + ObjStream << " .type " << PrimaryHipFatbinSymbol << ",@object\n"; + ObjStream << " .section .hip_fatbin,\"a\",@progbits\n"; + } + ObjStream << " .globl " << PrimaryHipFatbinSymbol << "\n"; + ObjStream << " .p2align " << llvm::Log2(llvm::Align(HIPCodeObjectAlign)) + << "\n"; + // Generate alias directives for other fatbin symbols + for (const auto &AliasSymbol : AliasHipFatbinSymbols) { + ObjStream << " .globl " << AliasSymbol << "\n"; + ObjStream << " .set " << AliasSymbol << "," << PrimaryHipFatbinSymbol + << "\n"; + } + ObjStream << PrimaryHipFatbinSymbol << ":\n"; + ObjStream << " .incbin "; + llvm::sys::printArg(ObjStream, BundleFile, /*Quote=*/true); + ObjStream << "\n"; } - ObjStream << " .globl __hip_fatbin\n"; - ObjStream << " .p2align " << llvm::Log2(llvm::Align(HIPCodeObjectAlign)) - << "\n"; - ObjStream << "__hip_fatbin:\n"; - ObjStream << " .incbin "; - llvm::sys::printArg(ObjStream, BundleFile, /*Quote=*/true); - ObjStream << "\n"; if (HostTriple.isOSLinux() && HostTriple.isOSBinFormatELF()) ObjStream << " .section .note.GNU-stack, \"\", @progbits\n"; ObjStream.flush(); diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt index 6b5cb0a18457b..fcfca354f4a75 100644 --- a/clang/test/CMakeLists.txt +++ b/clang/test/CMakeLists.txt @@ -136,6 +136,7 @@ if( NOT CLANG_BUILT_STANDALONE ) llvm-strip llvm-symbolizer llvm-windres + obj2yaml opt split-file yaml2obj diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu index d7a7b1bb9fe95..60304647bd4c5 100644 --- a/clang/test/CodeGenCUDA/device-stub.cu +++ b/clang/test/CodeGenCUDA/device-stub.cu @@ -50,21 +50,19 @@ // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ // RUN: -fgpu-rdc -fcuda-include-gpubinary %t -o - -x hip \ // RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,LNX,RDC,HIP,HIPEF -// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - -x hip\ +// RUN: %clang_cc1 -cuid=123 -triple x86_64-linux-gnu -emit-llvm %s -o - -x hip\ // RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=ALL,LNX,NORDC,HIP,HIPNEF // RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -aux-triple amdgcn -emit-llvm %s \ // RUN: -fcuda-include-gpubinary %t -o - -x hip\ // RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,WIN -// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -aux-triple amdgcn -emit-llvm %s \ +// RUN: %clang_cc1 -cuid=123 -triple x86_64-pc-windows-msvc -aux-triple amdgcn -emit-llvm %s \ // RUN: -o - -x hip\ // RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,WIN,HIP,HIPNEF #include "Inputs/cuda.h" -// HIPNEF: $__hip_gpubin_handle = comdat any - #ifndef NOGLOBALS // NORDC-DAG: @device_var = internal global i32 // RDC-DAG: @device_var = global i32 @@ -161,7 +159,7 @@ __device__ void device_use() { // * constant unnamed string with GPU binary // CUDA: @[[FATBIN:.*]] = private constant{{.*}} c"GPU binary would be here.", // HIPEF: @[[FATBIN:.*]] = private constant{{.*}} c"GPU binary would be here.",{{.*}}align 4096 -// HIPNEF: @[[FATBIN:__hip_fatbin]] = external constant i8, section ".hip_fatbin" +// HIPNEF: @[[FATBIN:__hip_fatbin_[0-9a-f]+]] = external constant i8, section ".hip_fatbin" // CUDANORDC-SAME: section ".nv_fatbin", align 8 // CUDARDC-SAME: section "__nv_relfatbin", align 8 // * constant struct that wraps GPU binary @@ -177,7 +175,7 @@ __device__ void device_use() { // HIP-SAME: section ".hipFatBinSegment" // * variable to save GPU binary handle after initialization // CUDANORDC: @__[[PREFIX]]_gpubin_handle = internal global ptr null -// HIPNEF: @__[[PREFIX]]_gpubin_handle = linkonce hidden global ptr null +// HIPNEF: @__[[PREFIX]]_gpubin_handle_{{[0-9a-f]+}} = external hidden global ptr, align 8 // * constant unnamed string with NVModuleID // CUDARDC: [[MODULE_ID_GLOBAL:@.*]] = private constant // CUDARDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32 diff --git a/clang/test/CodeGenCUDA/host-used-device-var.cu b/clang/test/CodeGenCUDA/host-used-device-var.cu index 7cb31aff84264..5328660c9dc9d 100644 --- a/clang/test/CodeGenCUDA/host-used-device-var.cu +++ b/clang/test/CodeGenCUDA/host-used-device-var.cu @@ -1,9 +1,9 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \ // RUN: -std=c++17 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \ -// RUN: | FileCheck -check-prefix=DEV %s +// RUN: -cuid=123 | FileCheck -check-prefix=DEV %s // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x hip %s \ -// RUN: -std=c++17 -O3 -emit-llvm -o - | FileCheck -check-prefix=HOST %s +// RUN: -std=c++17 -O3 -emit-llvm -o - -cuid=123 | FileCheck -check-prefix=HOST %s // Negative tests. @@ -187,6 +187,7 @@ public: // DEV-SAME: {{^[^@]*}} @_ZL2u3 // DEV-SAME: {{^[^@]*}} @_ZZ4fun1vE11static_var1 // DEV-SAME: {{^[^@]*}} @_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var2 +// DEV-SAME: {{^[^@]*}} @__hip_cuid_{{[0-9a-f]+}} // DEV-SAME: {{^[^@]*}} @constexpr_var2b // DEV-SAME: {{^[^@]*}} @inline_var // DEV-SAME: {{^[^@]*}} @u1 diff --git a/clang/test/Driver/Inputs/hip.h b/clang/test/Driver/Inputs/hip.h new file mode 100644 index 0000000000000..5be772a7b3413 --- /dev/null +++ b/clang/test/Driver/Inputs/hip.h @@ -0,0 +1,25 @@ +/* Minimal declarations for HIP support. Testing purposes only. */ + +#define __constant__ __attribute__((constant)) +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) +#define __host__ __attribute__((host)) +#define __shared__ __attribute__((shared)) +#define __managed__ __attribute__((managed)) + +struct dim3 { + unsigned x, y, z; + __host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {} +}; + +typedef struct hipStream *hipStream_t; +typedef enum hipError {} hipError_t; +int hipConfigureCall(dim3 gridSize, dim3 blockSize, unsigned long long sharedSize = 0, + hipStream_t stream = 0); +extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize, + unsigned long long sharedSize = 0, + hipStream_t stream = 0); +extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim, + dim3 blockDim, void **args, + unsigned long long sharedMem, + hipStream_t stream); diff --git a/clang/test/Driver/clang-offload-bundler.c b/clang/test/Driver/clang-offload-bundler.c index 7d0b6b27a60ae..9d8b81ee9806e 100644 --- a/clang/test/Driver/clang-offload-bundler.c +++ b/clang/test/Driver/clang-offload-bundler.c @@ -10,6 +10,7 @@ // RUN: %clang -O0 -target %itanium_abi_triple %s -c -emit-llvm -o %t.bc // RUN: %clang -O0 -target %itanium_abi_triple %s -S -o %t.s // RUN: %clang -O0 -target %itanium_abi_triple %s -c -o %t.o +// RUN: obj2yaml %t.o > %t.o.yaml // RUN: %clang -O0 -target %itanium_abi_triple %s -emit-ast -o %t.ast // @@ -305,11 +306,13 @@ // RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -input=%t.o -input=%t.tgt1 -input=%t.tgt2 -output=%t.bundle3.o // RUN: clang-offload-bundler -type=o -input=%t.bundle3.o -list | FileCheck -check-prefix=CKLST %s // RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -output=%t.res.o -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.bundle3.o -unbundle -// RUN: diff %t.bundle3.o %t.res.o +// RUN: obj2yaml %t.res.o > %t.res.o.yaml +// RUN: diff %t.o.yaml %t.res.o.yaml // RUN: diff %t.tgt1 %t.res.tgt1 // RUN: diff %t.tgt2 %t.res.tgt2 // RUN: clang-offload-bundler -type=o -targets=openmp-powerpc64le-ibm-linux-gnu,host-%itanium_abi_triple,openmp-x86_64-pc-linux-gnu -output=%t.res.tgt1 -output=%t.res.o -output=%t.res.tgt2 -input=%t.bundle3.o -unbundle -// RUN: diff %t.bundle3.o %t.res.o +// RUN: obj2yaml %t.res.o > %t.res.o.yaml +// RUN: diff %t.o.yaml %t.res.o.yaml // RUN: diff %t.tgt1 %t.res.tgt1 // RUN: diff %t.tgt2 %t.res.tgt2 // RUN: clang-offload-bundler -type=o -targets=openmp-powerpc64le-ibm-linux-gnu -output=%t.res.tgt1 -input=%t.bundle3.o -unbundle @@ -318,11 +321,13 @@ // Check if we can unbundle a file with no magic strings. // RUN: clang-offload-bundler -type=o -input=%t.o -list | FileCheck -check-prefix=CKLST2 --allow-empty %s // RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -output=%t.res.o -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.o -unbundle -allow-missing-bundles -// RUN: diff %t.o %t.res.o +// RUN: obj2yaml %t.res.o > %t.res.o.yaml +// RUN: diff %t.o.yaml %t.res.o.yaml // RUN: diff %t.empty %t.res.tgt1 // RUN: diff %t.empty %t.res.tgt2 // RUN: clang-offload-bundler -type=o -targets=openmp-powerpc64le-ibm-linux-gnu,host-%itanium_abi_triple,openmp-x86_64-pc-linux-gnu -output=%t.res.tgt1 -output=%t.res.o -output=%t.res.tgt2 -input=%t.o -unbundle -allow-missing-bundles -// RUN: diff %t.o %t.res.o +// RUN: obj2yaml %t.res.o > %t.res.o.yaml +// RUN: diff %t.o.yaml %t.res.o.yaml // RUN: diff %t.empty %t.res.tgt1 // RUN: diff %t.empty %t.res.tgt2 diff --git a/clang/test/Driver/hip-partial-link.hip b/clang/test/Driver/hip-partial-link.hip new file mode 100644 index 0000000000000..a1d31f9a65195 --- /dev/null +++ b/clang/test/Driver/hip-partial-link.hip @@ -0,0 +1,97 @@ +// REQUIRES: x86-registered-target, amdgpu-registered-target, lld, system-linux + +// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \ +// RUN: --offload-arch=gfx906 -c -nostdinc -nogpuinc -nohipwrapperinc \ +// RUN: -nogpulib -fgpu-rdc -I%S/Inputs %s -o %t.1.o + +// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -DLIB \ +// RUN: --offload-arch=gfx906 -c -nostdinc -nogpuinc -nohipwrapperinc \ +// RUN: -nogpulib -fgpu-rdc -I%S/Inputs %s -o %t.2.o + +// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -DMAIN \ +// RUN: --offload-arch=gfx906 -c -nostdinc -nogpuinc -nohipwrapperinc \ +// RUN: -nogpulib -fgpu-rdc -I%S/Inputs %s -o %t.main.o + +// RUN: llvm-nm %t.1.o | FileCheck -check-prefix=OBJ1 %s +// OBJ1: B __hip_cuid_[[ID:[0-9a-f]+]] +// OBJ1: U __hip_fatbin_[[ID]] +// OBJ1: U __hip_gpubin_handle_[[ID]] + +// RUN: llvm-nm %t.2.o | FileCheck -check-prefix=OBJ2 %s +// OBJ2: B __hip_cuid_[[ID:[0-9a-f]+]] +// OBJ2: U __hip_fatbin_[[ID]] +// OBJ2: U __hip_gpubin_handle_[[ID]] + +// Link %t.1.o and %t.2.o by -r and then link with %t.main.o + +// RUN: %clang -v --target=x86_64-unknown-linux-gnu \ +// RUN: --hip-link -fgpu-rdc --offload-arch=gfx906 \ +// RUN: -r -fuse-ld=lld -nostdlib %t.1.o %t.2.o -o %t.lib.o \ +// RUN: 2>&1 | FileCheck -check-prefix=LD-R %s +// LD-R: Found undefined HIP fatbin symbol: __hip_fatbin_[[ID1:[0-9a-f]+]] +// LD-R: Found undefined HIP fatbin symbol: __hip_fatbin_[[ID2:[0-9a-f]+]] +// LD-R: Found undefined HIP gpubin handle symbol: __hip_gpubin_handle_[[ID1]] +// LD-R: Found undefined HIP gpubin handle symbol: __hip_gpubin_handle_[[ID2]] +// LD-R: "{{.*}}/clang-offload-bundler" {{.*}}-unbundle +// LD-R: "{{.*}}/lld" -flavor gnu -m elf64_amdgpu +// LD-R: "{{.*}}/clang-offload-bundler" +// LD-R: "{{.*}}/llvm-mc" -triple x86_64-unknown-linux-gnu +// LD-R: "{{.*}}/ld.lld" {{.*}} -r + +// RUN: llvm-nm %t.lib.o | FileCheck -check-prefix=OBJ %s +// OBJ: B __hip_cuid_[[ID1:[0-9a-f]+]] +// OBJ: B __hip_cuid_[[ID2:[0-9a-f]+]] +// OBJ: R __hip_fatbin_[[ID1]] +// OBJ: R __hip_fatbin_[[ID2]] +// OBJ: D __hip_gpubin_handle_[[ID1]] +// OBJ: D __hip_gpubin_handle_[[ID2]] + +// RUN: %clang -v --target=x86_64-unknown-linux-gnu \ +// RUN: --hip-link -no-hip-rt -fgpu-rdc --offload-arch=gfx906 \ +// RUN: -fuse-ld=lld -nostdlib -r %t.main.o %t.lib.o -o %t.final.o \ +// RUN: 2>&1 | FileCheck -check-prefix=LINK-O %s +// LINK-O-NOT: Found undefined HIP {{.*}}symbol + +// Generate a static lib with %t.1.o and %t.2.o then link with %t.main.o + +// RUN: %clang -v --target=x86_64-unknown-linux-gnu \ +// RUN: --hip-link -fgpu-rdc --offload-arch=gfx906 \ +// RUN: --emit-static-lib -fuse-ld=lld -nostdlib %t.1.o %t.2.o -o %t.a \ +// RUN: 2>&1 | FileCheck -check-prefix=STATIC %s +// STATIC: Found undefined HIP fatbin symbol: __hip_fatbin_[[ID1:[0-9a-f]+]] +// STATIC: Found undefined HIP fatbin symbol: __hip_fatbin_[[ID2:[0-9a-f]+]] +// STATIC: Found undefined HIP gpubin handle symbol: __hip_gpubin_handle_[[ID1]] +// STATIC: Found undefined HIP gpubin handle symbol: __hip_gpubin_handle_[[ID2]] +// STATIC: "{{.*}}/clang-offload-bundler" {{.*}}-unbundle +// STATIC: "{{.*}}/lld" -flavor gnu -m elf64_amdgpu +// STATIC: "{{.*}}/clang-offload-bundler" +// STATIC: "{{.*}}/llvm-mc" -triple x86_64-unknown-linux-gnu +// STATIC: "{{.*}}/llvm-ar" + +// RUN: %clang -v --target=x86_64-unknown-linux-gnu \ +// RUN: --hip-link -no-hip-rt -fgpu-rdc --offload-arch=gfx906 \ +// RUN: -fuse-ld=lld -nostdlib -r %t.main.o %t.a -o %t.final.o \ +// RUN: 2>&1 | FileCheck -check-prefix=LINK-A %s +// LINK-A-NOT: Found undefined HIP {{.*}}symbol + +#include "hip.h" + +#ifdef LIB +__device__ int x; +__device__ void libfun() { + x = 1; +} +#elif !defined(MAIN) +__device__ void libfun(); +__global__ void kern() { + libfun(); +} +void run() { + kern<<<1,1>>>(); +} +#else +extern void run(); +int main() { + run(); +} +#endif diff --git a/clang/test/Driver/hip-toolchain-rdc.hip b/clang/test/Driver/hip-toolchain-rdc.hip index 1827531f9cab7..d19d8ccd6cb29 100644 --- a/clang/test/Driver/hip-toolchain-rdc.hip +++ b/clang/test/Driver/hip-toolchain-rdc.hip @@ -1,7 +1,7 @@ // REQUIRES: x86-registered-target // REQUIRES: amdgpu-registered-target -// RUN: %clang -### --target=x86_64-linux-gnu \ +// RUN: %clang -### --target=x86_64-linux-gnu -v \ // RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \ // RUN: --hip-device-lib=lib1.bc --hip-device-lib=lib2.bc \ // RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \ @@ -12,7 +12,7 @@ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck -check-prefixes=CHECK,LNX %s -// RUN: %clang -### --target=x86_64-pc-windows-msvc \ +// RUN: %clang -### --target=x86_64-pc-windows-msvc -v \ // RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \ // RUN: --hip-device-lib=lib1.bc --hip-device-lib=lib2.bc \ // RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \ @@ -23,15 +23,31 @@ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck -check-prefixes=CHECK,MSVC %s -// check code object alignment in dumped llvm-mc input -// LNX: .protected __hip_fatbin -// LNX: .type __hip_fatbin,@object -// LNX: .section .hip_fatbin,"a",@progbits -// MSVC: .section .hip_fatbin, "dw" -// CHECK: .globl __hip_fatbin -// CHECK: .p2align 12 -// CHECK: __hip_fatbin: -// CHECK: .incbin "[[BUNDLE:.*hipfb]]" +// check HIP fatbin and gpubin handle symbols and code object alignment in dumped llvm-mc input +// CHECK: Found undefined HIP fatbin symbol: __hip_fatbin_[[ID1:[0-9a-f]+]] +// CHECK: Found undefined HIP fatbin symbol: __hip_fatbin_[[ID2:[0-9a-f]+]] +// CHECK: Found undefined HIP gpubin handle symbol: __hip_gpubin_handle_[[ID1]] +// CHECK: Found undefined HIP gpubin handle symbol: __hip_gpubin_handle_[[ID2]] +// LNX: .protected __hip_gpubin_handle_[[ID1]] +// LNX: .type __hip_gpubin_handle_[[ID1]] +// LNX-LABEL: .section .hip_gpubin_handle,"aw" +// MSVC-LABEL: .section .hip_gpubin_handle,"dw" +// CHECK: .globl __hip_gpubin_handle_[[ID1]] +// CHECK-NEXT: .p2align 3 +// CHECK-NEXT:__hip_gpubin_handle_[[ID1]]: +// CHECK-NEXT: .zero 8 +// CHECK-NEXT: .globl __hip_gpubin_handle_[[ID2]] +// CHECK-NEXT: .set __hip_gpubin_handle_[[ID2]],__hip_gpubin_handle_[[ID1]] +// LNX: .protected __hip_fatbin_[[ID1]] +// LNX: .type __hip_fatbin_[[ID1]],@object +// LNX-LABEL: .section .hip_fatbin,"a",@progbits +// MSVC-LABEL: .section .hip_fatbin,"dw" +// CHECK: .globl __hip_fatbin_[[ID1]] +// CHECK-NEXT: .p2align 12 +// CHECK-NEXT: .globl __hip_fatbin_[[ID2]] +// CHECK-NEXT: .set __hip_fatbin_[[ID2]],__hip_fatbin_[[ID1]] +// CHECK-NEXT: __hip_fatbin_[[ID1]]: +// CHECK-NEXT: .incbin "[[BUNDLE:.*hipfb]]" // LNX: .section .note.GNU-stack, "", @progbits // MSVC-NOT: .note.GNU-stack