diff --git a/openmp/libomptarget/CMakeLists.txt b/openmp/libomptarget/CMakeLists.txt index b382137b70ee2..531198fae0169 100644 --- a/openmp/libomptarget/CMakeLists.txt +++ b/openmp/libomptarget/CMakeLists.txt @@ -145,8 +145,7 @@ add_subdirectory(DeviceRTL) add_subdirectory(tools) # Build target agnostic offloading library. -set(LIBOMPTARGET_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) -add_subdirectory(${LIBOMPTARGET_SRC_DIR}) +add_subdirectory(src) # Add tests. add_subdirectory(test) diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp index fce7454bf2800..14461f14dd670 100644 --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -2088,7 +2088,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { /// Allocate and construct an AMDGPU kernel. Expected constructKernel(const char *Name) override { // Allocate and construct the AMDGPU kernel. - AMDGPUKernelTy *AMDGPUKernel = Plugin::get().allocate(); + AMDGPUKernelTy *AMDGPUKernel = PluginTy::get().allocate(); if (!AMDGPUKernel) return Plugin::error("Failed to allocate memory for AMDGPU kernel"); @@ -2139,7 +2139,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { int32_t ImageId) override { // Allocate and initialize the image object. AMDGPUDeviceImageTy *AMDImage = - Plugin::get().allocate(); + PluginTy::get().allocate(); new (AMDImage) AMDGPUDeviceImageTy(ImageId, *this, TgtImage); // Load the HSA executable. @@ -2697,7 +2697,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { } Error setDeviceHeapSize(uint64_t Value) override { for (DeviceImageTy *Image : LoadedImages) - if (auto Err = setupDeviceMemoryPool(Plugin::get(), *Image, Value)) + if (auto Err = setupDeviceMemoryPool(PluginTy::get(), *Image, Value)) return Err; DeviceMemoryPoolSize = Value; return Plugin::success(); @@ -2737,7 +2737,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return utils::iterateAgentMemoryPools( Agent, [&](hsa_amd_memory_pool_t HSAMemoryPool) { AMDGPUMemoryPoolTy *MemoryPool = - Plugin::get().allocate(); + PluginTy::get().allocate(); new (MemoryPool) AMDGPUMemoryPoolTy(HSAMemoryPool); AllMemoryPools.push_back(MemoryPool); return HSA_STATUS_SUCCESS; @@ -3115,6 +3115,17 @@ struct AMDGPUPluginTy final : public GenericPluginTy { return Plugin::check(Status, "Error in hsa_shut_down: %s"); } + /// Creates an AMDGPU device. + GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices) override { + return new AMDGPUDeviceTy(DeviceId, NumDevices, getHostDevice(), + getKernelAgent(DeviceId)); + } + + /// Creates an AMDGPU global handler. + GenericGlobalHandlerTy *createGlobalHandler() override { + return new AMDGPUGlobalHandlerTy(); + } + Triple::ArchType getTripleArch() const override { return Triple::amdgcn; } /// Get the ELF code for recognizing the compatible image binary. @@ -3237,7 +3248,7 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice, // 56 bytes per allocation. uint32_t AllArgsSize = KernelArgsSize + ImplicitArgsSize; - AMDHostDeviceTy &HostDevice = Plugin::get().getHostDevice(); + AMDHostDeviceTy &HostDevice = PluginTy::get().getHostDevice(); AMDGPUMemoryManagerTy &ArgsMemoryManager = HostDevice.getArgsMemoryManager(); void *AllArgs = nullptr; @@ -3347,20 +3358,10 @@ Error AMDGPUKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice, return Plugin::success(); } -GenericPluginTy *Plugin::createPlugin() { return new AMDGPUPluginTy(); } - -GenericDeviceTy *Plugin::createDevice(int32_t DeviceId, int32_t NumDevices) { - AMDGPUPluginTy &Plugin = get(); - return new AMDGPUDeviceTy(DeviceId, NumDevices, Plugin.getHostDevice(), - Plugin.getKernelAgent(DeviceId)); -} - -GenericGlobalHandlerTy *Plugin::createGlobalHandler() { - return new AMDGPUGlobalHandlerTy(); -} +GenericPluginTy *PluginTy::createPlugin() { return new AMDGPUPluginTy(); } template -Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) { +static Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) { hsa_status_t ResultCode = static_cast(Code); if (ResultCode == HSA_STATUS_SUCCESS || ResultCode == HSA_STATUS_INFO_BREAK) return Error::success(); @@ -3384,7 +3385,7 @@ void *AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr, } assert(Ptr && "Invalid pointer"); - auto &KernelAgents = Plugin::get().getKernelAgents(); + auto &KernelAgents = PluginTy::get().getKernelAgents(); // Allow all kernel agents to access the allocation. if (auto Err = MemoryPool->enableAccess(Ptr, Size, KernelAgents)) { @@ -3427,7 +3428,7 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) { } if (Alloc) { - auto &KernelAgents = Plugin::get().getKernelAgents(); + auto &KernelAgents = PluginTy::get().getKernelAgents(); // Inherently necessary for host or shared allocations // Also enabled for device memory to allow device to device memcpy diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h index b7be7b645ba33..1440f8f678d82 100644 --- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h +++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h @@ -976,6 +976,13 @@ struct GenericPluginTy { Error deinit(); virtual Error deinitImpl() = 0; + /// Create a new device for the underlying plugin. + virtual GenericDeviceTy *createDevice(int32_t DeviceID, + int32_t NumDevices) = 0; + + /// Create a new global handler for the underlying plugin. + virtual GenericGlobalHandlerTy *createGlobalHandler() = 0; + /// Get the reference to the device with a certain device id. GenericDeviceTy &getDevice(int32_t DeviceId) { assert(isValidDeviceId(DeviceId) && "Invalid device id"); @@ -1085,29 +1092,53 @@ struct GenericPluginTy { RPCServerTy *RPCServer; }; +namespace Plugin { +/// Create a success error. This is the same as calling Error::success(), but +/// it is recommended to use this one for consistency with Plugin::error() and +/// Plugin::check(). +static Error success() { return Error::success(); } + +/// Create a string error. +template +static Error error(const char *ErrFmt, ArgsTy... Args) { + return createStringError(inconvertibleErrorCode(), ErrFmt, Args...); +} + +/// Check the plugin-specific error code and return an error or success +/// accordingly. In case of an error, create a string error with the error +/// description. The ErrFmt should follow the format: +/// "Error in []: %s" +/// The last format specifier "%s" is mandatory and will be used to place the +/// error code's description. Notice this function should be only called from +/// the plugin-specific code. +/// TODO: Refactor this, must be defined individually by each plugin. +template +static Error check(int32_t ErrorCode, const char *ErrFmt, ArgsTy... Args); +} // namespace Plugin + /// Class for simplifying the getter operation of the plugin. Anywhere on the /// code, the current plugin can be retrieved by Plugin::get(). The class also /// declares functions to create plugin-specific object instances. The check(), /// createPlugin(), createDevice() and createGlobalHandler() functions should be /// defined by each plugin implementation. -class Plugin { +class PluginTy { // Reference to the plugin instance. static GenericPluginTy *SpecificPlugin; - Plugin() { + PluginTy() { if (auto Err = init()) REPORT("Failed to initialize plugin: %s\n", toString(std::move(Err)).data()); } - ~Plugin() { + ~PluginTy() { if (auto Err = deinit()) REPORT("Failed to deinitialize plugin: %s\n", toString(std::move(Err)).data()); } - Plugin(const Plugin &) = delete; - void operator=(const Plugin &) = delete; + PluginTy(const PluginTy &) = delete; + void operator=(const PluginTy &) = delete; /// Create and intialize the plugin instance. static Error init() { @@ -1158,7 +1189,7 @@ class Plugin { // This static variable will initialize the underlying plugin instance in // case there was no previous explicit initialization. The initialization is // thread safe. - static Plugin Plugin; + static PluginTy Plugin; assert(SpecificPlugin && "Plugin is not active"); return *SpecificPlugin; @@ -1170,35 +1201,8 @@ class Plugin { /// Indicate whether the plugin is active. static bool isActive() { return SpecificPlugin != nullptr; } - /// Create a success error. This is the same as calling Error::success(), but - /// it is recommended to use this one for consistency with Plugin::error() and - /// Plugin::check(). - static Error success() { return Error::success(); } - - /// Create a string error. - template - static Error error(const char *ErrFmt, ArgsTy... Args) { - return createStringError(inconvertibleErrorCode(), ErrFmt, Args...); - } - - /// Check the plugin-specific error code and return an error or success - /// accordingly. In case of an error, create a string error with the error - /// description. The ErrFmt should follow the format: - /// "Error in []: %s" - /// The last format specifier "%s" is mandatory and will be used to place the - /// error code's description. Notice this function should be only called from - /// the plugin-specific code. - template - static Error check(int32_t ErrorCode, const char *ErrFmt, ArgsTy... Args); - /// Create a plugin instance. static GenericPluginTy *createPlugin(); - - /// Create a plugin-specific device. - static GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices); - - /// Create a plugin-specific global handler. - static GenericGlobalHandlerTy *createGlobalHandler(); }; /// Auxiliary interface class for GenericDeviceResourceManagerTy. This class diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp index f39f913d85eec..4db787a2ea918 100644 --- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp @@ -39,7 +39,7 @@ using namespace omp; using namespace target; using namespace plugin; -GenericPluginTy *Plugin::SpecificPlugin = nullptr; +GenericPluginTy *PluginTy::SpecificPlugin = nullptr; // TODO: Fix any thread safety issues for multi-threaded kernel recording. struct RecordReplayTy { @@ -438,7 +438,7 @@ Error GenericKernelTy::init(GenericDeviceTy &GenericDevice, // Retrieve kernel environment object for the kernel. GlobalTy KernelEnv(std::string(Name) + "_kernel_environment", sizeof(KernelEnvironment), &KernelEnvironment); - GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler(); + GenericGlobalHandlerTy &GHandler = PluginTy::get().getGlobalHandler(); if (auto Err = GHandler.readGlobalFromImage(GenericDevice, *ImagePtr, KernelEnv)) { [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); @@ -1488,7 +1488,7 @@ Error GenericPluginTy::init() { assert(Devices.size() == 0 && "Plugin already initialized"); Devices.resize(NumDevices, nullptr); - GlobalHandler = Plugin::createGlobalHandler(); + GlobalHandler = createGlobalHandler(); assert(GlobalHandler && "Invalid global handler"); RPCServer = new RPCServerTy(NumDevices); @@ -1522,7 +1522,7 @@ Error GenericPluginTy::initDevice(int32_t DeviceId) { assert(!Devices[DeviceId] && "Device already initialized"); // Create the device and save the reference. - GenericDeviceTy *Device = Plugin::createDevice(DeviceId, NumDevices); + GenericDeviceTy *Device = createDevice(DeviceId, NumDevices); assert(Device && "Invalid device"); // Save the device reference into the list. @@ -1581,7 +1581,7 @@ extern "C" { #endif int32_t __tgt_rtl_init_plugin() { - auto Err = Plugin::initIfNeeded(); + auto Err = PluginTy::initIfNeeded(); if (Err) { [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); DP("Failed to init plugin: %s", ErrStr.c_str()); @@ -1592,7 +1592,7 @@ int32_t __tgt_rtl_init_plugin() { } int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) { - if (!Plugin::isActive()) + if (!PluginTy::isActive()) return false; StringRef Buffer(reinterpret_cast(Image->ImageStart), @@ -1609,13 +1609,13 @@ int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) { case file_magic::elf_executable: case file_magic::elf_shared_object: case file_magic::elf_core: { - auto MatchOrErr = Plugin::get().checkELFImage(Buffer); + auto MatchOrErr = PluginTy::get().checkELFImage(Buffer); if (Error Err = MatchOrErr.takeError()) return HandleError(std::move(Err)); return *MatchOrErr; } case file_magic::bitcode: { - auto MatchOrErr = Plugin::get().getJIT().checkBitcodeImage(Buffer); + auto MatchOrErr = PluginTy::get().getJIT().checkBitcodeImage(Buffer); if (Error Err = MatchOrErr.takeError()) return HandleError(std::move(Err)); return *MatchOrErr; @@ -1626,7 +1626,7 @@ int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) { } int32_t __tgt_rtl_init_device(int32_t DeviceId) { - auto Err = Plugin::get().initDevice(DeviceId); + auto Err = PluginTy::get().initDevice(DeviceId); if (Err) { REPORT("Failure to initialize device %d: %s\n", DeviceId, toString(std::move(Err)).data()); @@ -1636,23 +1636,25 @@ int32_t __tgt_rtl_init_device(int32_t DeviceId) { return OFFLOAD_SUCCESS; } -int32_t __tgt_rtl_number_of_devices() { return Plugin::get().getNumDevices(); } +int32_t __tgt_rtl_number_of_devices() { + return PluginTy::get().getNumDevices(); +} int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) { - Plugin::get().setRequiresFlag(RequiresFlags); + PluginTy::get().setRequiresFlag(RequiresFlags); return OFFLOAD_SUCCESS; } int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDeviceId, int32_t DstDeviceId) { - return Plugin::get().isDataExchangable(SrcDeviceId, DstDeviceId); + return PluginTy::get().isDataExchangable(SrcDeviceId, DstDeviceId); } int32_t __tgt_rtl_initialize_record_replay(int32_t DeviceId, int64_t MemorySize, void *VAddr, bool isRecord, bool SaveOutput, uint64_t &ReqPtrArgOffset) { - GenericPluginTy &Plugin = Plugin::get(); + GenericPluginTy &Plugin = PluginTy::get(); GenericDeviceTy &Device = Plugin.getDevice(DeviceId); RecordReplayTy::RRStatusTy Status = isRecord ? RecordReplayTy::RRStatusTy::RRRecording @@ -1674,7 +1676,7 @@ int32_t __tgt_rtl_initialize_record_replay(int32_t DeviceId, int64_t MemorySize, int32_t __tgt_rtl_load_binary(int32_t DeviceId, __tgt_device_image *TgtImage, __tgt_device_binary *Binary) { - GenericPluginTy &Plugin = Plugin::get(); + GenericPluginTy &Plugin = PluginTy::get(); GenericDeviceTy &Device = Plugin.getDevice(DeviceId); auto ImageOrErr = Device.loadBinary(Plugin, TgtImage); @@ -1695,7 +1697,7 @@ int32_t __tgt_rtl_load_binary(int32_t DeviceId, __tgt_device_image *TgtImage, void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr, int32_t Kind) { - auto AllocOrErr = Plugin::get().getDevice(DeviceId).dataAlloc( + auto AllocOrErr = PluginTy::get().getDevice(DeviceId).dataAlloc( Size, HostPtr, (TargetAllocTy)Kind); if (!AllocOrErr) { auto Err = AllocOrErr.takeError(); @@ -1709,8 +1711,8 @@ void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr, } int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr, int32_t Kind) { - auto Err = - Plugin::get().getDevice(DeviceId).dataDelete(TgtPtr, (TargetAllocTy)Kind); + auto Err = PluginTy::get().getDevice(DeviceId).dataDelete( + TgtPtr, (TargetAllocTy)Kind); if (Err) { REPORT("Failure to deallocate device pointer %p: %s\n", TgtPtr, toString(std::move(Err)).data()); @@ -1722,7 +1724,7 @@ int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr, int32_t Kind) { int32_t __tgt_rtl_data_lock(int32_t DeviceId, void *Ptr, int64_t Size, void **LockedPtr) { - auto LockedPtrOrErr = Plugin::get().getDevice(DeviceId).dataLock(Ptr, Size); + auto LockedPtrOrErr = PluginTy::get().getDevice(DeviceId).dataLock(Ptr, Size); if (!LockedPtrOrErr) { auto Err = LockedPtrOrErr.takeError(); REPORT("Failure to lock memory %p: %s\n", Ptr, @@ -1740,7 +1742,7 @@ int32_t __tgt_rtl_data_lock(int32_t DeviceId, void *Ptr, int64_t Size, } int32_t __tgt_rtl_data_unlock(int32_t DeviceId, void *Ptr) { - auto Err = Plugin::get().getDevice(DeviceId).dataUnlock(Ptr); + auto Err = PluginTy::get().getDevice(DeviceId).dataUnlock(Ptr); if (Err) { REPORT("Failure to unlock memory %p: %s\n", Ptr, toString(std::move(Err)).data()); @@ -1752,7 +1754,7 @@ int32_t __tgt_rtl_data_unlock(int32_t DeviceId, void *Ptr) { int32_t __tgt_rtl_data_notify_mapped(int32_t DeviceId, void *HstPtr, int64_t Size) { - auto Err = Plugin::get().getDevice(DeviceId).notifyDataMapped(HstPtr, Size); + auto Err = PluginTy::get().getDevice(DeviceId).notifyDataMapped(HstPtr, Size); if (Err) { REPORT("Failure to notify data mapped %p: %s\n", HstPtr, toString(std::move(Err)).data()); @@ -1763,7 +1765,7 @@ int32_t __tgt_rtl_data_notify_mapped(int32_t DeviceId, void *HstPtr, } int32_t __tgt_rtl_data_notify_unmapped(int32_t DeviceId, void *HstPtr) { - auto Err = Plugin::get().getDevice(DeviceId).notifyDataUnmapped(HstPtr); + auto Err = PluginTy::get().getDevice(DeviceId).notifyDataUnmapped(HstPtr); if (Err) { REPORT("Failure to notify data unmapped %p: %s\n", HstPtr, toString(std::move(Err)).data()); @@ -1782,8 +1784,8 @@ int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr, int32_t __tgt_rtl_data_submit_async(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size, __tgt_async_info *AsyncInfoPtr) { - auto Err = Plugin::get().getDevice(DeviceId).dataSubmit(TgtPtr, HstPtr, Size, - AsyncInfoPtr); + auto Err = PluginTy::get().getDevice(DeviceId).dataSubmit(TgtPtr, HstPtr, + Size, AsyncInfoPtr); if (Err) { REPORT("Failure to copy data from host to device. Pointers: host " "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n", @@ -1804,8 +1806,8 @@ int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, int32_t __tgt_rtl_data_retrieve_async(int32_t DeviceId, void *HstPtr, void *TgtPtr, int64_t Size, __tgt_async_info *AsyncInfoPtr) { - auto Err = Plugin::get().getDevice(DeviceId).dataRetrieve(HstPtr, TgtPtr, - Size, AsyncInfoPtr); + auto Err = PluginTy::get().getDevice(DeviceId).dataRetrieve( + HstPtr, TgtPtr, Size, AsyncInfoPtr); if (Err) { REPORT("Faliure to copy data from device to host. Pointers: host " "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n", @@ -1829,8 +1831,8 @@ int32_t __tgt_rtl_data_exchange_async(int32_t SrcDeviceId, void *SrcPtr, int DstDeviceId, void *DstPtr, int64_t Size, __tgt_async_info *AsyncInfo) { - GenericDeviceTy &SrcDevice = Plugin::get().getDevice(SrcDeviceId); - GenericDeviceTy &DstDevice = Plugin::get().getDevice(DstDeviceId); + GenericDeviceTy &SrcDevice = PluginTy::get().getDevice(SrcDeviceId); + GenericDeviceTy &DstDevice = PluginTy::get().getDevice(DstDeviceId); auto Err = SrcDevice.dataExchange(SrcPtr, DstDevice, DstPtr, Size, AsyncInfo); if (Err) { REPORT("Failure to copy data from device (%d) to device (%d). Pointers: " @@ -1847,7 +1849,7 @@ int32_t __tgt_rtl_launch_kernel(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, KernelArgsTy *KernelArgs, __tgt_async_info *AsyncInfoPtr) { - auto Err = Plugin::get().getDevice(DeviceId).launchKernel( + auto Err = PluginTy::get().getDevice(DeviceId).launchKernel( TgtEntryPtr, TgtArgs, TgtOffsets, *KernelArgs, AsyncInfoPtr); if (Err) { REPORT("Failure to run target region " DPxMOD " in device %d: %s\n", @@ -1860,7 +1862,7 @@ int32_t __tgt_rtl_launch_kernel(int32_t DeviceId, void *TgtEntryPtr, int32_t __tgt_rtl_synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr) { - auto Err = Plugin::get().getDevice(DeviceId).synchronize(AsyncInfoPtr); + auto Err = PluginTy::get().getDevice(DeviceId).synchronize(AsyncInfoPtr); if (Err) { REPORT("Failure to synchronize stream %p: %s\n", AsyncInfoPtr->Queue, toString(std::move(Err)).data()); @@ -1872,7 +1874,7 @@ int32_t __tgt_rtl_synchronize(int32_t DeviceId, int32_t __tgt_rtl_query_async(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr) { - auto Err = Plugin::get().getDevice(DeviceId).queryAsync(AsyncInfoPtr); + auto Err = PluginTy::get().getDevice(DeviceId).queryAsync(AsyncInfoPtr); if (Err) { REPORT("Failure to query stream %p: %s\n", AsyncInfoPtr->Queue, toString(std::move(Err)).data()); @@ -1883,13 +1885,13 @@ int32_t __tgt_rtl_query_async(int32_t DeviceId, } void __tgt_rtl_print_device_info(int32_t DeviceId) { - if (auto Err = Plugin::get().getDevice(DeviceId).printInfo()) + if (auto Err = PluginTy::get().getDevice(DeviceId).printInfo()) REPORT("Failure to print device %d info: %s\n", DeviceId, toString(std::move(Err)).data()); } int32_t __tgt_rtl_create_event(int32_t DeviceId, void **EventPtr) { - auto Err = Plugin::get().getDevice(DeviceId).createEvent(EventPtr); + auto Err = PluginTy::get().getDevice(DeviceId).createEvent(EventPtr); if (Err) { REPORT("Failure to create event: %s\n", toString(std::move(Err)).data()); return OFFLOAD_FAIL; @@ -1901,7 +1903,7 @@ int32_t __tgt_rtl_create_event(int32_t DeviceId, void **EventPtr) { int32_t __tgt_rtl_record_event(int32_t DeviceId, void *EventPtr, __tgt_async_info *AsyncInfoPtr) { auto Err = - Plugin::get().getDevice(DeviceId).recordEvent(EventPtr, AsyncInfoPtr); + PluginTy::get().getDevice(DeviceId).recordEvent(EventPtr, AsyncInfoPtr); if (Err) { REPORT("Failure to record event %p: %s\n", EventPtr, toString(std::move(Err)).data()); @@ -1914,7 +1916,7 @@ int32_t __tgt_rtl_record_event(int32_t DeviceId, void *EventPtr, int32_t __tgt_rtl_wait_event(int32_t DeviceId, void *EventPtr, __tgt_async_info *AsyncInfoPtr) { auto Err = - Plugin::get().getDevice(DeviceId).waitEvent(EventPtr, AsyncInfoPtr); + PluginTy::get().getDevice(DeviceId).waitEvent(EventPtr, AsyncInfoPtr); if (Err) { REPORT("Failure to wait event %p: %s\n", EventPtr, toString(std::move(Err)).data()); @@ -1925,7 +1927,7 @@ int32_t __tgt_rtl_wait_event(int32_t DeviceId, void *EventPtr, } int32_t __tgt_rtl_sync_event(int32_t DeviceId, void *EventPtr) { - auto Err = Plugin::get().getDevice(DeviceId).syncEvent(EventPtr); + auto Err = PluginTy::get().getDevice(DeviceId).syncEvent(EventPtr); if (Err) { REPORT("Failure to synchronize event %p: %s\n", EventPtr, toString(std::move(Err)).data()); @@ -1936,7 +1938,7 @@ int32_t __tgt_rtl_sync_event(int32_t DeviceId, void *EventPtr) { } int32_t __tgt_rtl_destroy_event(int32_t DeviceId, void *EventPtr) { - auto Err = Plugin::get().getDevice(DeviceId).destroyEvent(EventPtr); + auto Err = PluginTy::get().getDevice(DeviceId).destroyEvent(EventPtr); if (Err) { REPORT("Failure to destroy event %p: %s\n", EventPtr, toString(std::move(Err)).data()); @@ -1955,7 +1957,7 @@ int32_t __tgt_rtl_init_async_info(int32_t DeviceId, __tgt_async_info **AsyncInfoPtr) { assert(AsyncInfoPtr && "Invalid async info"); - auto Err = Plugin::get().getDevice(DeviceId).initAsyncInfo(AsyncInfoPtr); + auto Err = PluginTy::get().getDevice(DeviceId).initAsyncInfo(AsyncInfoPtr); if (Err) { REPORT("Failure to initialize async info at " DPxMOD " on device %d: %s\n", DPxPTR(*AsyncInfoPtr), DeviceId, toString(std::move(Err)).data()); @@ -1970,7 +1972,7 @@ int32_t __tgt_rtl_init_device_info(int32_t DeviceId, const char **ErrStr) { *ErrStr = ""; - auto Err = Plugin::get().getDevice(DeviceId).initDeviceInfo(DeviceInfo); + auto Err = PluginTy::get().getDevice(DeviceId).initDeviceInfo(DeviceInfo); if (Err) { REPORT("Failure to initialize device info at " DPxMOD " on device %d: %s\n", DPxPTR(DeviceInfo), DeviceId, toString(std::move(Err)).data()); @@ -1981,7 +1983,7 @@ int32_t __tgt_rtl_init_device_info(int32_t DeviceId, } int32_t __tgt_rtl_set_device_offset(int32_t DeviceIdOffset) { - Plugin::get().setDeviceIdStartIndex(DeviceIdOffset); + PluginTy::get().setDeviceIdStartIndex(DeviceIdOffset); return OFFLOAD_SUCCESS; } @@ -1990,9 +1992,9 @@ int32_t __tgt_rtl_use_auto_zero_copy(int32_t DeviceId) { // Automatic zero-copy only applies to programs that did // not request unified_shared_memory and are deployed on an // APU with XNACK enabled. - if (Plugin::get().getRequiresFlags() & OMP_REQ_UNIFIED_SHARED_MEMORY) + if (PluginTy::get().getRequiresFlags() & OMP_REQ_UNIFIED_SHARED_MEMORY) return false; - return Plugin::get().getDevice(DeviceId).useAutoZeroCopy(); + return PluginTy::get().getDevice(DeviceId).useAutoZeroCopy(); } int32_t __tgt_rtl_get_global(__tgt_device_binary Binary, uint64_t Size, @@ -2000,7 +2002,7 @@ int32_t __tgt_rtl_get_global(__tgt_device_binary Binary, uint64_t Size, assert(Binary.handle && "Invalid device binary handle"); DeviceImageTy &Image = *reinterpret_cast(Binary.handle); - GenericPluginTy &Plugin = Plugin::get(); + GenericPluginTy &Plugin = PluginTy::get(); GenericDeviceTy &Device = Image.getDevice(); GlobalTy DeviceGlobal(Name, Size); diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp index b862bc7490925..d43c723c35028 100644 --- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp @@ -471,7 +471,7 @@ struct CUDADeviceTy : public GenericDeviceTy { /// Allocate and construct a CUDA kernel. Expected constructKernel(const char *Name) override { // Allocate and construct the CUDA kernel. - CUDAKernelTy *CUDAKernel = Plugin::get().allocate(); + CUDAKernelTy *CUDAKernel = PluginTy::get().allocate(); if (!CUDAKernel) return Plugin::error("Failed to allocate memory for CUDA kernel"); @@ -529,7 +529,8 @@ struct CUDADeviceTy : public GenericDeviceTy { return std::move(Err); // Allocate and initialize the image object. - CUDADeviceImageTy *CUDAImage = Plugin::get().allocate(); + CUDADeviceImageTy *CUDAImage = + PluginTy::get().allocate(); new (CUDAImage) CUDADeviceImageTy(ImageId, *this, TgtImage); // Load the CUDA module. @@ -1371,6 +1372,16 @@ struct CUDAPluginTy final : public GenericPluginTy { /// Deinitialize the plugin. Error deinitImpl() override { return Plugin::success(); } + /// Creates a CUDA device to use for offloading. + GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices) override { + return new CUDADeviceTy(DeviceId, NumDevices); + } + + /// Creates a CUDA global handler. + GenericGlobalHandlerTy *createGlobalHandler() override { + return new CUDAGlobalHandlerTy(); + } + /// Get the ELF code for recognizing the compatible image binary. uint16_t getMagicElfBits() const override { return ELF::EM_CUDA; } @@ -1484,18 +1495,10 @@ Error CUDADeviceTy::dataExchangeImpl(const void *SrcPtr, return Plugin::check(Res, "Error in cuMemcpyDtoDAsync: %s"); } -GenericPluginTy *Plugin::createPlugin() { return new CUDAPluginTy(); } - -GenericDeviceTy *Plugin::createDevice(int32_t DeviceId, int32_t NumDevices) { - return new CUDADeviceTy(DeviceId, NumDevices); -} - -GenericGlobalHandlerTy *Plugin::createGlobalHandler() { - return new CUDAGlobalHandlerTy(); -} +GenericPluginTy *PluginTy::createPlugin() { return new CUDAPluginTy(); } template -Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) { +static Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) { CUresult ResultCode = static_cast(Code); if (ResultCode == CUDA_SUCCESS) return Error::success(); diff --git a/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp index 1ef18814a26ac..ea8ed8d6a8569 100644 --- a/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp @@ -66,7 +66,7 @@ struct GenELF64KernelTy : public GenericKernelTy { GlobalTy Global(getName(), 0); // Get the metadata (address) of the kernel function. - GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler(); + GenericGlobalHandlerTy &GHandler = PluginTy::get().getGlobalHandler(); if (auto Err = GHandler.getGlobalMetadataFromDevice(Device, Image, Global)) return Err; @@ -150,7 +150,7 @@ struct GenELF64DeviceTy : public GenericDeviceTy { Expected constructKernel(const char *Name) override { // Allocate and construct the kernel. GenELF64KernelTy *GenELF64Kernel = - Plugin::get().allocate(); + PluginTy::get().allocate(); if (!GenELF64Kernel) return Plugin::error("Failed to allocate memory for GenELF64 kernel"); @@ -167,7 +167,7 @@ struct GenELF64DeviceTy : public GenericDeviceTy { int32_t ImageId) override { // Allocate and initialize the image object. GenELF64DeviceImageTy *Image = - Plugin::get().allocate(); + PluginTy::get().allocate(); new (Image) GenELF64DeviceImageTy(ImageId, *this, TgtImage); // Create a temporary file. @@ -399,6 +399,16 @@ struct GenELF64PluginTy final : public GenericPluginTy { /// Deinitialize the plugin. Error deinitImpl() override { return Plugin::success(); } + /// Creates a generic ELF device. + GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices) override { + return new GenELF64DeviceTy(DeviceId, NumDevices); + } + + /// Creates a generic global handler. + GenericGlobalHandlerTy *createGlobalHandler() override { + return new GenELF64GlobalHandlerTy(); + } + /// Get the ELF code to recognize the compatible binary images. uint16_t getMagicElfBits() const override { return ELF::TARGET_ELF_ID; } @@ -415,18 +425,10 @@ struct GenELF64PluginTy final : public GenericPluginTy { } }; -GenericPluginTy *Plugin::createPlugin() { return new GenELF64PluginTy(); } - -GenericDeviceTy *Plugin::createDevice(int32_t DeviceId, int32_t NumDevices) { - return new GenELF64DeviceTy(DeviceId, NumDevices); -} - -GenericGlobalHandlerTy *Plugin::createGlobalHandler() { - return new GenELF64GlobalHandlerTy(); -} +GenericPluginTy *PluginTy::createPlugin() { return new GenELF64PluginTy(); } template -Error Plugin::check(int32_t Code, const char *ErrMsg, ArgsTy... Args) { +static Error Plugin::check(int32_t Code, const char *ErrMsg, ArgsTy... Args) { if (Code == 0) return Error::success();