From e9f9a6a546b1c5d34d4cb6c15e8ff29547d25b41 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 11 Nov 2025 10:52:34 -0500 Subject: [PATCH] [offload-arch] Fix amdgpu-arch crash on Windows with ROCm 7.1 The tool was crashing on Windows with ROCm 7.1 due to two issues: misuse of hipDeviceGet which should not be used (it worked before by accident but was undefined behavior), and ABI incompatibility from hipDeviceProp_t struct layout changes between HIP versions where the gcnArchName offset changed from 396 to 1160 bytes. The fix removes hipDeviceGet and queries properties directly by device index. It defines separate struct layouts for R0600 (HIP 6.x+) and R0000 (legacy) to handle the different memory layouts correctly. An automatic API fallback mechanism tries R0600, then R0000, then the unversioned API until one succeeds, ensuring compatibility across different HIP runtime versions. A new --hip-api-version option allows manually selecting the API version when needed. Additional improvements include enhanced error handling with hipGetErrorString, verbose logging throughout the detection process, and runtime version detection using hipRuntimeGetVersion when available. The versioned API functions provide stable ABI across HIP versions. Fixes: SWDEV-564272 --- clang/tools/offload-arch/AMDGPUArchByHIP.cpp | 195 ++++++++++++++++--- clang/tools/offload-arch/OffloadArch.cpp | 4 +- 2 files changed, 174 insertions(+), 25 deletions(-) diff --git a/clang/tools/offload-arch/AMDGPUArchByHIP.cpp b/clang/tools/offload-arch/AMDGPUArchByHIP.cpp index ff39a85d15628..d7f6d79b135df 100644 --- a/clang/tools/offload-arch/AMDGPUArchByHIP.cpp +++ b/clang/tools/offload-arch/AMDGPUArchByHIP.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/DynamicLibrary.h" @@ -32,22 +33,54 @@ using namespace llvm; -typedef struct { +// R0600 struct layout (HIP 6.x+) +typedef struct alignas(8) { + char padding[1160]; + char gcnArchName[256]; + char padding2[56]; +} hipDeviceProp_tR0600; + +// R0000 struct layout (legacy) +typedef struct alignas(8) { char padding[396]; char gcnArchName[256]; char padding2[1024]; -} hipDeviceProp_t; +} hipDeviceProp_tR0000; typedef enum { hipSuccess = 0, } hipError_t; typedef hipError_t (*hipGetDeviceCount_t)(int *); -typedef hipError_t (*hipDeviceGet_t)(int *, int); -typedef hipError_t (*hipGetDeviceProperties_t)(hipDeviceProp_t *, int); +typedef hipError_t (*hipGetDevicePropertiesR0600_t)(hipDeviceProp_tR0600 *, + int); +typedef hipError_t (*hipGetDevicePropertiesR0000_t)(hipDeviceProp_tR0000 *, + int); +typedef hipError_t (*hipGetDeviceProperties_t)(hipDeviceProp_tR0000 *, int); +typedef hipError_t (*hipRuntimeGetVersion_t)(int *); +typedef const char *(*hipGetErrorString_t)(hipError_t); extern cl::opt Verbose; +cl::OptionCategory AMDGPUArchByHIPCategory("amdgpu-arch (HIP) options"); + +enum class HipApiVersion { + Auto, // Automatic fallback (R0600 -> R0000 -> unversioned) + R0600, // Force R0600 API (HIP 6.x+) + R0000, // Force R0000 API (legacy HIP) + Unversioned // Force unversioned API (very old HIP) +}; + +static cl::opt HipApi( + "hip-api-version", cl::desc("Select HIP API version for device properties"), + cl::values(clEnumValN(HipApiVersion::Auto, "auto", + "Auto-detect (R0600 -> R0000 -> unversioned)"), + clEnumValN(HipApiVersion::R0600, "r0600", "Force R0600 API"), + clEnumValN(HipApiVersion::R0000, "r0000", "Force R0000 API"), + clEnumValN(HipApiVersion::Unversioned, "unversioned", + "Force unversioned API")), + cl::init(HipApiVersion::Auto), cl::cat(AMDGPUArchByHIPCategory)); + #ifdef _WIN32 static std::vector getSearchPaths() { std::vector Paths; @@ -177,6 +210,9 @@ int printGPUsByHIP() { return 1; } + if (Verbose) + outs() << "Successfully loaded HIP runtime library\n"; + #define DYNAMIC_INIT_HIP(SYMBOL) \ { \ void *SymbolPtr = DynlibHandle->getAddressOfSymbol(#SYMBOL); \ @@ -184,42 +220,153 @@ int printGPUsByHIP() { llvm::errs() << "Failed to find symbol " << #SYMBOL << '\n'; \ return 1; \ } \ + if (Verbose) \ + outs() << "Found symbol: " << #SYMBOL << '\n'; \ SYMBOL = reinterpret_cast(SymbolPtr); \ } hipGetDeviceCount_t hipGetDeviceCount; - hipDeviceGet_t hipDeviceGet; - hipGetDeviceProperties_t hipGetDeviceProperties; + hipRuntimeGetVersion_t hipRuntimeGetVersion = nullptr; + hipGetDevicePropertiesR0600_t hipGetDevicePropertiesR0600 = nullptr; + hipGetDevicePropertiesR0000_t hipGetDevicePropertiesR0000 = nullptr; + hipGetDeviceProperties_t hipGetDeviceProperties = nullptr; + hipGetErrorString_t hipGetErrorString = nullptr; DYNAMIC_INIT_HIP(hipGetDeviceCount); - DYNAMIC_INIT_HIP(hipDeviceGet); - DYNAMIC_INIT_HIP(hipGetDeviceProperties); #undef DYNAMIC_INIT_HIP - int deviceCount; - hipError_t err = hipGetDeviceCount(&deviceCount); - if (err != hipSuccess) { - llvm::errs() << "Failed to get device count\n"; + auto LoadSymbol = [&](const char *Name, auto &FuncPtr, + const char *Desc = "") { + void *Sym = DynlibHandle->getAddressOfSymbol(Name); + if (Sym) { + FuncPtr = reinterpret_cast(Sym); + if (Verbose) + outs() << "Found symbol: " << Name << (Desc[0] ? " " : "") << Desc + << '\n'; + return true; + } + return false; + }; + + LoadSymbol("hipGetErrorString", hipGetErrorString); + + if (LoadSymbol("hipRuntimeGetVersion", hipRuntimeGetVersion)) { + int RuntimeVersion = 0; + if (hipRuntimeGetVersion(&RuntimeVersion) == hipSuccess) { + int Major = RuntimeVersion / 10000000; + int Minor = (RuntimeVersion / 100000) % 100; + int Patch = RuntimeVersion % 100000; + if (Verbose) + outs() << "HIP Runtime Version: " << Major << "." << Minor << "." + << Patch << '\n'; + } + } + + LoadSymbol("hipGetDevicePropertiesR0600", hipGetDevicePropertiesR0600, + "(HIP 6.x+ API)"); + LoadSymbol("hipGetDevicePropertiesR0000", hipGetDevicePropertiesR0000, + "(legacy API)"); + if (!hipGetDevicePropertiesR0600 && !hipGetDevicePropertiesR0000) + LoadSymbol("hipGetDeviceProperties", hipGetDeviceProperties, + "(unversioned legacy API)"); + + int DeviceCount; + if (Verbose) + outs() << "Calling hipGetDeviceCount...\n"; + hipError_t Err = hipGetDeviceCount(&DeviceCount); + if (Err != hipSuccess) { + llvm::errs() << "Failed to get device count"; + if (hipGetErrorString) { + llvm::errs() << ": " << hipGetErrorString(Err); + } + llvm::errs() << " (error code: " << Err << ")\n"; return 1; } - for (int i = 0; i < deviceCount; ++i) { - int deviceId; - err = hipDeviceGet(&deviceId, i); - if (err != hipSuccess) { - llvm::errs() << "Failed to get device id for ordinal " << i << '\n'; - return 1; + if (Verbose) + outs() << "Found " << DeviceCount << " device(s)\n"; + + auto TryGetProperties = [&](auto *ApiFunc, auto *DummyProp, const char *Name, + int DeviceId) -> std::string { + if (!ApiFunc) + return ""; + + if (Verbose) + outs() << "Using " << Name << "...\n"; + + using PropType = std::remove_pointer_t; + PropType Prop; + hipError_t Err = ApiFunc(&Prop, DeviceId); + + if (Err == hipSuccess) { + if (Verbose) { + outs() << Name << " struct: sizeof = " << sizeof(PropType) + << " bytes, offsetof(gcnArchName) = " + << offsetof(PropType, gcnArchName) << " bytes\n"; + } + return Prop.gcnArchName; } - hipDeviceProp_t prop; - err = hipGetDeviceProperties(&prop, deviceId); - if (err != hipSuccess) { - llvm::errs() << "Failed to get device properties for device " << deviceId - << '\n'; + if (Verbose) + llvm::errs() << Name << " failed (error code: " << Err << ")\n"; + return ""; + }; + + for (auto I : llvm::seq(DeviceCount)) { + if (Verbose) + outs() << "Processing device " << I << "...\n"; + + std::string ArchName; + auto TryR0600 = [&](int Dev) -> bool { + if (!hipGetDevicePropertiesR0600) + return false; + ArchName = TryGetProperties(hipGetDevicePropertiesR0600, + (hipDeviceProp_tR0600 *)nullptr, + "R0600 API (HIP 6.x+)", Dev); + return !ArchName.empty(); + }; + auto TryR0000 = [&](int Dev) -> bool { + if (!hipGetDevicePropertiesR0000) + return false; + ArchName = TryGetProperties(hipGetDevicePropertiesR0000, + (hipDeviceProp_tR0000 *)nullptr, + "R0000 API (legacy HIP)", Dev); + return !ArchName.empty(); + }; + auto TryUnversioned = [&](int Dev) -> bool { + if (!hipGetDeviceProperties) + return false; + ArchName = TryGetProperties(hipGetDeviceProperties, + (hipDeviceProp_tR0000 *)nullptr, + "unversioned API (very old HIP)", Dev); + return !ArchName.empty(); + }; + + [[maybe_unused]] bool OK; + switch (HipApi) { + case HipApiVersion::Auto: + OK = TryR0600(I) || TryR0000(I) || TryUnversioned(I); + break; + case HipApiVersion::R0600: + OK = TryR0600(I); + break; + case HipApiVersion::R0000: + OK = TryR0000(I); + break; + case HipApiVersion::Unversioned: + OK = TryUnversioned(I); + } + + if (ArchName.empty()) { + llvm::errs() << "Failed to get device properties for device " << I + << " - no APIs available or all failed\n"; return 1; } - llvm::outs() << prop.gcnArchName << '\n'; + + if (Verbose) + outs() << "Device " << I << " arch name: "; + llvm::outs() << ArchName << '\n'; } return 0; diff --git a/clang/tools/offload-arch/OffloadArch.cpp b/clang/tools/offload-arch/OffloadArch.cpp index 3c5131eb7c06c..91493676918cb 100644 --- a/clang/tools/offload-arch/OffloadArch.cpp +++ b/clang/tools/offload-arch/OffloadArch.cpp @@ -17,6 +17,8 @@ static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden); // Mark all our options with this category. static cl::OptionCategory OffloadArchCategory("offload-arch options"); +extern cl::OptionCategory AMDGPUArchByHIPCategory; + enum VendorName { all, amdgpu, @@ -62,7 +64,7 @@ const std::array>, 3> VendorTable{ {VendorName::intel, printIntel}}}; int main(int argc, char *argv[]) { - cl::HideUnrelatedOptions(OffloadArchCategory); + cl::HideUnrelatedOptions({&OffloadArchCategory, &AMDGPUArchByHIPCategory}); cl::SetVersionPrinter(PrintVersion); cl::ParseCommandLineOptions(