diff --git a/offload/liboffload/API/Device.td b/offload/liboffload/API/Device.td index 5b54c79d83f9d..e9c154818c4a1 100644 --- a/offload/liboffload/API/Device.td +++ b/offload/liboffload/API/Device.td @@ -29,6 +29,7 @@ def ol_device_info_t : Enum { TaggedEtor<"PLATFORM", "ol_platform_handle_t", "the platform associated with the device">, TaggedEtor<"NAME", "char[]", "Device name">, TaggedEtor<"PRODUCT_NAME", "char[]", "Device user-facing marketing name">, + TaggedEtor<"UID", "char[]", "Device UID">, TaggedEtor<"VENDOR", "char[]", "Device vendor">, TaggedEtor<"DRIVER_VERSION", "char[]", "Driver version">, TaggedEtor<"MAX_WORK_GROUP_SIZE", "uint32_t", "Maximum total work group size in work items">, diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index 6d22faeb0e57e..84bc414396811 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -147,8 +147,8 @@ llvm::Error ol_platform_impl_t::init() { if (llvm::Error Err = Plugin->initDevice(Id)) return Err; - auto Device = &Plugin->getDevice(Id); - auto Info = Device->obtainInfoImpl(); + GenericDeviceTy *Device = &Plugin->getDevice(Id); + llvm::Expected Info = Device->obtainInfo(); if (llvm::Error Err = Info.takeError()) return Err; Devices.emplace_back(std::make_unique(Id, Device, *this, @@ -467,6 +467,7 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device, switch (PropName) { case OL_DEVICE_INFO_NAME: case OL_DEVICE_INFO_PRODUCT_NAME: + case OL_DEVICE_INFO_UID: case OL_DEVICE_INFO_VENDOR: case OL_DEVICE_INFO_DRIVER_VERSION: { // String values @@ -544,6 +545,8 @@ Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device, return Info.writeString("Virtual Host Device"); case OL_DEVICE_INFO_PRODUCT_NAME: return Info.writeString("Virtual Host Device"); + case OL_DEVICE_INFO_UID: + return Info.writeString(GenericPluginTy::getHostDeviceUid()); case OL_DEVICE_INFO_VENDOR: return Info.writeString("Liboffload"); case OL_DEVICE_INFO_DRIVER_VERSION: diff --git a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h index 29cfe78082dbb..ddfa65c76cf2d 100644 --- a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h +++ b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h @@ -72,6 +72,7 @@ typedef enum hsa_amd_agent_info_s { HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU = 0xA00A, HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU = 0xA00B, HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES = 0xA010, + HSA_AMD_AGENT_INFO_UUID = 0xA011, HSA_AMD_AGENT_INFO_TIMESTAMP_FREQUENCY = 0xA016, } hsa_amd_agent_info_t; diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index 0b03ef534d273..928c6cd7569e3 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -2083,6 +2083,20 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return Err; ComputeUnitKind = GPUName; + // From the ROCm HSA documentation: + // Query the UUID of the agent. The value is an Ascii string with a maximum + // of 21 chars including NUL. The string value consists of two parts: header + // and body. The header identifies the device type (GPU, CPU, DSP) while the + // body encodes the UUID as a 16 digit hex string. + // + // Agents that do not support UUID will return the string "GPU-XX" or + // "CPU-XX" or "DSP-XX" depending on their device type. + char UUID[24] = {0}; + if (auto Err = getDeviceAttr(HSA_AMD_AGENT_INFO_UUID, UUID)) + return Err; + if (!StringRef(UUID).ends_with("-XX")) + setDeviceUidFromVendorUid(UUID); + // Get the wavefront size. uint32_t WavefrontSize = 0; if (auto Err = getDeviceAttr(HSA_AGENT_INFO_WAVEFRONT_SIZE, WavefrontSize)) diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index f9bff9abd903c..2d5140988b02f 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -791,6 +791,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy { /// this id is not unique between different plugins; they may overlap. int32_t getDeviceId() const { return DeviceId; } + /// Get the unique identifier of the device. + const char *getDeviceUid() const { return DeviceUid.c_str(); } + /// Set the context of the device if needed, before calling device-specific /// functions. Plugins may implement this function as a no-op if not needed. virtual Error setContext() = 0; @@ -989,9 +992,12 @@ struct GenericDeviceTy : public DeviceAllocatorTy { Error syncEvent(void *EventPtr); virtual Error syncEventImpl(void *EventPtr) = 0; + /// Obtain information about the device. + Expected obtainInfo(); + virtual Expected obtainInfoImpl() = 0; + /// Print information about the device. Error printInfo(); - virtual Expected obtainInfoImpl() = 0; /// Return true if the device has work that is either queued or currently /// running @@ -1204,6 +1210,14 @@ struct GenericDeviceTy : public DeviceAllocatorTy { /// global device id and is not the device id visible to the OpenMP user. const int32_t DeviceId; + /// The unique identifier of the device. + /// Per default, the unique identifier of the device is set to the device id, + /// combined with the plugin name, since the offload device id may overlap + /// between different plugins. + std::string DeviceUid; + /// Construct the device UID from the vendor (U)UID. + void setDeviceUidFromVendorUid(StringRef VendorUid); + /// The default grid values used for this device. llvm::omp::GV GridValues; @@ -1280,6 +1294,9 @@ struct GenericPluginTy { return *Devices[DeviceId]; } + const GenericDeviceTy &getDevice(int32_t DeviceId) const { + return const_cast(this)->getDevice(DeviceId); + } /// Get the number of active devices. int32_t getNumDevices() const { return NumDevices; } @@ -1290,6 +1307,14 @@ struct GenericPluginTy { return UserDeviceIds.at(DeviceId); } + /// Get the UID for the given device. + const char *getDeviceUid(int32_t DeviceId) const { + return getDevice(DeviceId).getDeviceUid(); + } + + /// Get the UID for the host device. + static constexpr const char *getHostDeviceUid() { return "HOST"; } + /// Get the ELF code to recognize the binary image of this plugin. virtual uint16_t getMagicElfBits() const = 0; diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index 36d643b65922d..c7862d9088177 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -715,6 +715,9 @@ GenericDeviceTy::GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, DeviceId(DeviceId), GridValues(OMPGridValues), PeerAccesses(NumDevices, PeerAccessState::PENDING), PeerAccessesLock(), PinnedAllocs(*this), RPCServer(nullptr) { + DeviceUid = std::string(Plugin.getName()) + "-" + + std::to_string(static_cast(DeviceId)); + #ifdef OMPT_SUPPORT OmptInitialized.store(false); // Bind the callbacks to this device's member functions @@ -1524,15 +1527,22 @@ Error GenericDeviceTy::enqueueHostCall(void (*Callback)(void *), void *UserData, return Err; } +Expected GenericDeviceTy::obtainInfo() { + auto InfoOrErr = obtainInfoImpl(); + if (InfoOrErr) + InfoOrErr->add("UID", getDeviceUid(), "", DeviceInfo::UID); + return InfoOrErr; +} + Error GenericDeviceTy::printInfo() { - auto Info = obtainInfoImpl(); + auto InfoOrErr = obtainInfo(); // Get the vendor-specific info entries describing the device properties. - if (auto Err = Info.takeError()) + if (auto Err = InfoOrErr.takeError()) return Err; // Print all info entries. - Info->print(); + InfoOrErr->print(); return Plugin::success(); } @@ -1603,6 +1613,10 @@ Expected GenericDeviceTy::isAccessiblePtr(const void *Ptr, size_t Size) { return isAccessiblePtrImpl(Ptr, Size); } +void GenericDeviceTy::setDeviceUidFromVendorUid(StringRef VendorUid) { + DeviceUid = std::string(Plugin.getName()) + "-" + std::string(VendorUid); +} + Error GenericPluginTy::init() { if (Initialized) return Plugin::success(); diff --git a/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp b/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp index f5b2d074a47e7..e7a1ca38b3c13 100644 --- a/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp +++ b/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp @@ -35,6 +35,7 @@ DLWRAP(cuFuncSetAttribute, 3) // Device info DLWRAP(cuDeviceGetName, 3) +DLWRAP(cuDeviceGetUuid, 2) DLWRAP(cuDeviceTotalMem, 2) DLWRAP(cuDriverGetVersion, 1) diff --git a/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h b/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h index dec4e33508c62..a470d6df1079d 100644 --- a/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h +++ b/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h @@ -33,6 +33,9 @@ typedef struct CUfunc_st *CUfunction; typedef void (*CUhostFn)(void *userData); typedef struct CUstream_st *CUstream; typedef struct CUevent_st *CUevent; +typedef struct CUuuid_st { + char bytes[16]; +} CUuuid; #define CU_DEVICE_INVALID ((CUdevice)(-2)) @@ -301,6 +304,7 @@ CUresult cuFuncSetAttribute(CUfunction, CUfunction_attribute, int); // Device info CUresult cuDeviceGetName(char *, int, CUdevice); +CUresult cuDeviceGetUuid(CUuuid *, CUdevice); CUresult cuDeviceTotalMem(size_t *, CUdevice); CUresult cuDriverGetVersion(int *); diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index db94f7f2dd995..a9adcc397fb7b 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -25,6 +25,7 @@ #include "PluginInterface.h" #include "Utils/ELF.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h" @@ -293,6 +294,12 @@ struct CUDADeviceTy : public GenericDeviceTy { if (auto Err = Plugin::check(Res, "error in cuDeviceGet: %s")) return Err; + CUuuid UUID = {0}; + Res = cuDeviceGetUuid(&UUID, Device); + if (auto Err = Plugin::check(Res, "error in cuDeviceGetUuid: %s")) + return Err; + setDeviceUidFromVendorUid(toHex(UUID.bytes, true)); + // Query the current flags of the primary context and set its flags if // it is inactive. unsigned int FormerPrimaryCtxFlags = 0; diff --git a/offload/tools/deviceinfo/llvm-offload-device-info.cpp b/offload/tools/deviceinfo/llvm-offload-device-info.cpp index 9b58d67f017ca..42ffb97d6d77c 100644 --- a/offload/tools/deviceinfo/llvm-offload-device-info.cpp +++ b/offload/tools/deviceinfo/llvm-offload-device-info.cpp @@ -176,6 +176,7 @@ ol_result_t printDevice(std::ostream &S, ol_device_handle_t D) { printDeviceValue(S, D, OL_DEVICE_INFO_NAME, "Name")); OFFLOAD_ERR(printDeviceValue(S, D, OL_DEVICE_INFO_PRODUCT_NAME, "Product Name")); + OFFLOAD_ERR(printDeviceValue(S, D, OL_DEVICE_INFO_UID, "UID")); OFFLOAD_ERR( printDeviceValue(S, D, OL_DEVICE_INFO_TYPE, "Type")); OFFLOAD_ERR(printDeviceValue( diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp index 8cb0b8065c33e..30eafee026316 100644 --- a/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp +++ b/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp @@ -98,6 +98,16 @@ TEST_P(olGetDeviceInfoTest, SuccessProductName) { ASSERT_EQ(std::strlen(Name.data()), Size - 1); } +TEST_P(olGetDeviceInfoTest, SuccessUID) { + size_t Size = 0; + ASSERT_SUCCESS(olGetDeviceInfoSize(Device, OL_DEVICE_INFO_UID, &Size)); + ASSERT_GT(Size, 0ul); + std::vector UID; + UID.resize(Size); + ASSERT_SUCCESS(olGetDeviceInfo(Device, OL_DEVICE_INFO_UID, Size, UID.data())); + ASSERT_EQ(std::strlen(UID.data()), Size - 1); +} + TEST_P(olGetDeviceInfoTest, HostProductName) { size_t Size = 0; ASSERT_SUCCESS(olGetDeviceInfoSize(Host, OL_DEVICE_INFO_PRODUCT_NAME, &Size)); @@ -109,6 +119,16 @@ TEST_P(olGetDeviceInfoTest, HostProductName) { ASSERT_EQ(std::strlen(Name.data()), Size - 1); } +TEST_P(olGetDeviceInfoTest, HostUID) { + size_t Size = 0; + ASSERT_SUCCESS(olGetDeviceInfoSize(Host, OL_DEVICE_INFO_UID, &Size)); + ASSERT_GT(Size, 0ul); + std::vector UID; + UID.resize(Size); + ASSERT_SUCCESS(olGetDeviceInfo(Host, OL_DEVICE_INFO_UID, Size, UID.data())); + ASSERT_EQ(std::strlen(UID.data()), Size - 1); +} + TEST_P(olGetDeviceInfoTest, SuccessVendor) { size_t Size = 0; ASSERT_SUCCESS(olGetDeviceInfoSize(Device, OL_DEVICE_INFO_VENDOR, &Size)); diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp index c4a3c2d5e3c75..79a18c1d133dc 100644 --- a/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp +++ b/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp @@ -32,6 +32,7 @@ OL_DEVICE_INFO_SIZE_TEST_EQ(Platform, ol_platform_handle_t, OL_DEVICE_INFO_PLATFORM); OL_DEVICE_INFO_SIZE_TEST_NONZERO(Name, OL_DEVICE_INFO_NAME); OL_DEVICE_INFO_SIZE_TEST_NONZERO(ProductName, OL_DEVICE_INFO_PRODUCT_NAME); +OL_DEVICE_INFO_SIZE_TEST_NONZERO(UID, OL_DEVICE_INFO_UID); OL_DEVICE_INFO_SIZE_TEST_NONZERO(Vendor, OL_DEVICE_INFO_VENDOR); OL_DEVICE_INFO_SIZE_TEST_NONZERO(DriverVersion, OL_DEVICE_INFO_DRIVER_VERSION); OL_DEVICE_INFO_SIZE_TEST_EQ(MaxWorkGroupSize, uint32_t,