diff --git a/clang/tools/offload-arch/AMDGPUArchByHIP.cpp b/clang/tools/offload-arch/AMDGPUArchByHIP.cpp index ff39a85d15628..d7f6d79b135df 100644 --- a/clang/tools/offload-arch/AMDGPUArchByHIP.cpp +++ b/clang/tools/offload-arch/AMDGPUArchByHIP.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/DynamicLibrary.h" @@ -32,22 +33,54 @@ using namespace llvm; -typedef struct { +// R0600 struct layout (HIP 6.x+) +typedef struct alignas(8) { + char padding[1160]; + char gcnArchName[256]; + char padding2[56]; +} hipDeviceProp_tR0600; + +// R0000 struct layout (legacy) +typedef struct alignas(8) { char padding[396]; char gcnArchName[256]; char padding2[1024]; -} hipDeviceProp_t; +} hipDeviceProp_tR0000; typedef enum { hipSuccess = 0, } hipError_t; typedef hipError_t (*hipGetDeviceCount_t)(int *); -typedef hipError_t (*hipDeviceGet_t)(int *, int); -typedef hipError_t (*hipGetDeviceProperties_t)(hipDeviceProp_t *, int); +typedef hipError_t (*hipGetDevicePropertiesR0600_t)(hipDeviceProp_tR0600 *, + int); +typedef hipError_t (*hipGetDevicePropertiesR0000_t)(hipDeviceProp_tR0000 *, + int); +typedef hipError_t (*hipGetDeviceProperties_t)(hipDeviceProp_tR0000 *, int); +typedef hipError_t (*hipRuntimeGetVersion_t)(int *); +typedef const char *(*hipGetErrorString_t)(hipError_t); extern cl::opt Verbose; +cl::OptionCategory AMDGPUArchByHIPCategory("amdgpu-arch (HIP) options"); + +enum class HipApiVersion { + Auto, // Automatic fallback (R0600 -> R0000 -> unversioned) + R0600, // Force R0600 API (HIP 6.x+) + R0000, // Force R0000 API (legacy HIP) + Unversioned // Force unversioned API (very old HIP) +}; + +static cl::opt HipApi( + "hip-api-version", cl::desc("Select HIP API version for device properties"), + cl::values(clEnumValN(HipApiVersion::Auto, "auto", + "Auto-detect (R0600 -> R0000 -> unversioned)"), + clEnumValN(HipApiVersion::R0600, "r0600", "Force R0600 API"), + clEnumValN(HipApiVersion::R0000, "r0000", "Force R0000 API"), + clEnumValN(HipApiVersion::Unversioned, "unversioned", + "Force unversioned API")), + cl::init(HipApiVersion::Auto), cl::cat(AMDGPUArchByHIPCategory)); + #ifdef _WIN32 static std::vector getSearchPaths() { std::vector Paths; @@ -177,6 +210,9 @@ int printGPUsByHIP() { return 1; } + if (Verbose) + outs() << "Successfully loaded HIP runtime library\n"; + #define DYNAMIC_INIT_HIP(SYMBOL) \ { \ void *SymbolPtr = DynlibHandle->getAddressOfSymbol(#SYMBOL); \ @@ -184,42 +220,153 @@ int printGPUsByHIP() { llvm::errs() << "Failed to find symbol " << #SYMBOL << '\n'; \ return 1; \ } \ + if (Verbose) \ + outs() << "Found symbol: " << #SYMBOL << '\n'; \ SYMBOL = reinterpret_cast(SymbolPtr); \ } hipGetDeviceCount_t hipGetDeviceCount; - hipDeviceGet_t hipDeviceGet; - hipGetDeviceProperties_t hipGetDeviceProperties; + hipRuntimeGetVersion_t hipRuntimeGetVersion = nullptr; + hipGetDevicePropertiesR0600_t hipGetDevicePropertiesR0600 = nullptr; + hipGetDevicePropertiesR0000_t hipGetDevicePropertiesR0000 = nullptr; + hipGetDeviceProperties_t hipGetDeviceProperties = nullptr; + hipGetErrorString_t hipGetErrorString = nullptr; DYNAMIC_INIT_HIP(hipGetDeviceCount); - DYNAMIC_INIT_HIP(hipDeviceGet); - DYNAMIC_INIT_HIP(hipGetDeviceProperties); #undef DYNAMIC_INIT_HIP - int deviceCount; - hipError_t err = hipGetDeviceCount(&deviceCount); - if (err != hipSuccess) { - llvm::errs() << "Failed to get device count\n"; + auto LoadSymbol = [&](const char *Name, auto &FuncPtr, + const char *Desc = "") { + void *Sym = DynlibHandle->getAddressOfSymbol(Name); + if (Sym) { + FuncPtr = reinterpret_cast(Sym); + if (Verbose) + outs() << "Found symbol: " << Name << (Desc[0] ? " " : "") << Desc + << '\n'; + return true; + } + return false; + }; + + LoadSymbol("hipGetErrorString", hipGetErrorString); + + if (LoadSymbol("hipRuntimeGetVersion", hipRuntimeGetVersion)) { + int RuntimeVersion = 0; + if (hipRuntimeGetVersion(&RuntimeVersion) == hipSuccess) { + int Major = RuntimeVersion / 10000000; + int Minor = (RuntimeVersion / 100000) % 100; + int Patch = RuntimeVersion % 100000; + if (Verbose) + outs() << "HIP Runtime Version: " << Major << "." << Minor << "." + << Patch << '\n'; + } + } + + LoadSymbol("hipGetDevicePropertiesR0600", hipGetDevicePropertiesR0600, + "(HIP 6.x+ API)"); + LoadSymbol("hipGetDevicePropertiesR0000", hipGetDevicePropertiesR0000, + "(legacy API)"); + if (!hipGetDevicePropertiesR0600 && !hipGetDevicePropertiesR0000) + LoadSymbol("hipGetDeviceProperties", hipGetDeviceProperties, + "(unversioned legacy API)"); + + int DeviceCount; + if (Verbose) + outs() << "Calling hipGetDeviceCount...\n"; + hipError_t Err = hipGetDeviceCount(&DeviceCount); + if (Err != hipSuccess) { + llvm::errs() << "Failed to get device count"; + if (hipGetErrorString) { + llvm::errs() << ": " << hipGetErrorString(Err); + } + llvm::errs() << " (error code: " << Err << ")\n"; return 1; } - for (int i = 0; i < deviceCount; ++i) { - int deviceId; - err = hipDeviceGet(&deviceId, i); - if (err != hipSuccess) { - llvm::errs() << "Failed to get device id for ordinal " << i << '\n'; - return 1; + if (Verbose) + outs() << "Found " << DeviceCount << " device(s)\n"; + + auto TryGetProperties = [&](auto *ApiFunc, auto *DummyProp, const char *Name, + int DeviceId) -> std::string { + if (!ApiFunc) + return ""; + + if (Verbose) + outs() << "Using " << Name << "...\n"; + + using PropType = std::remove_pointer_t; + PropType Prop; + hipError_t Err = ApiFunc(&Prop, DeviceId); + + if (Err == hipSuccess) { + if (Verbose) { + outs() << Name << " struct: sizeof = " << sizeof(PropType) + << " bytes, offsetof(gcnArchName) = " + << offsetof(PropType, gcnArchName) << " bytes\n"; + } + return Prop.gcnArchName; } - hipDeviceProp_t prop; - err = hipGetDeviceProperties(&prop, deviceId); - if (err != hipSuccess) { - llvm::errs() << "Failed to get device properties for device " << deviceId - << '\n'; + if (Verbose) + llvm::errs() << Name << " failed (error code: " << Err << ")\n"; + return ""; + }; + + for (auto I : llvm::seq(DeviceCount)) { + if (Verbose) + outs() << "Processing device " << I << "...\n"; + + std::string ArchName; + auto TryR0600 = [&](int Dev) -> bool { + if (!hipGetDevicePropertiesR0600) + return false; + ArchName = TryGetProperties(hipGetDevicePropertiesR0600, + (hipDeviceProp_tR0600 *)nullptr, + "R0600 API (HIP 6.x+)", Dev); + return !ArchName.empty(); + }; + auto TryR0000 = [&](int Dev) -> bool { + if (!hipGetDevicePropertiesR0000) + return false; + ArchName = TryGetProperties(hipGetDevicePropertiesR0000, + (hipDeviceProp_tR0000 *)nullptr, + "R0000 API (legacy HIP)", Dev); + return !ArchName.empty(); + }; + auto TryUnversioned = [&](int Dev) -> bool { + if (!hipGetDeviceProperties) + return false; + ArchName = TryGetProperties(hipGetDeviceProperties, + (hipDeviceProp_tR0000 *)nullptr, + "unversioned API (very old HIP)", Dev); + return !ArchName.empty(); + }; + + [[maybe_unused]] bool OK; + switch (HipApi) { + case HipApiVersion::Auto: + OK = TryR0600(I) || TryR0000(I) || TryUnversioned(I); + break; + case HipApiVersion::R0600: + OK = TryR0600(I); + break; + case HipApiVersion::R0000: + OK = TryR0000(I); + break; + case HipApiVersion::Unversioned: + OK = TryUnversioned(I); + } + + if (ArchName.empty()) { + llvm::errs() << "Failed to get device properties for device " << I + << " - no APIs available or all failed\n"; return 1; } - llvm::outs() << prop.gcnArchName << '\n'; + + if (Verbose) + outs() << "Device " << I << " arch name: "; + llvm::outs() << ArchName << '\n'; } return 0; diff --git a/clang/tools/offload-arch/OffloadArch.cpp b/clang/tools/offload-arch/OffloadArch.cpp index 3c5131eb7c06c..91493676918cb 100644 --- a/clang/tools/offload-arch/OffloadArch.cpp +++ b/clang/tools/offload-arch/OffloadArch.cpp @@ -17,6 +17,8 @@ static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden); // Mark all our options with this category. static cl::OptionCategory OffloadArchCategory("offload-arch options"); +extern cl::OptionCategory AMDGPUArchByHIPCategory; + enum VendorName { all, amdgpu, @@ -62,7 +64,7 @@ const std::array>, 3> VendorTable{ {VendorName::intel, printIntel}}}; int main(int argc, char *argv[]) { - cl::HideUnrelatedOptions(OffloadArchCategory); + cl::HideUnrelatedOptions({&OffloadArchCategory, &AMDGPUArchByHIPCategory}); cl::SetVersionPrinter(PrintVersion); cl::ParseCommandLineOptions(