Skip to content

Commit

Permalink
Revert "[OpenMP][libomptarget] Enable automatic unified shared memory…
Browse files Browse the repository at this point in the history
… executi…" (#77371)

Reverts #75999

lit test is failing.
  • Loading branch information
carlobertolli committed Jan 8, 2024
1 parent 6684a09 commit ce41444
Show file tree
Hide file tree
Showing 12 changed files with 6 additions and 197 deletions.
3 changes: 0 additions & 3 deletions openmp/libomptarget/include/Shared/PluginAPI.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,9 +219,6 @@ int32_t __tgt_rtl_initialize_record_replay(int32_t DeviceId, int64_t MemorySize,
void *VAddr, bool isRecord,
bool SaveOutput,
uint64_t &ReqPtrArgOffset);

// Returns true if the device \p DeviceId suggests to use auto zero-copy.
int32_t __tgt_rtl_use_auto_zero_copy(int32_t DeviceId);
}

#endif // OMPTARGET_SHARED_PLUGIN_API_H
1 change: 0 additions & 1 deletion openmp/libomptarget/include/Shared/PluginAPI.inc
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,3 @@ PLUGIN_API_HANDLE(data_notify_mapped, false);
PLUGIN_API_HANDLE(data_notify_unmapped, false);
PLUGIN_API_HANDLE(set_device_offset, false);
PLUGIN_API_HANDLE(initialize_record_replay, false);
PLUGIN_API_HANDLE(use_auto_zero_copy, false);
15 changes: 1 addition & 14 deletions openmp/libomptarget/include/Shared/Requirements.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,7 @@ enum OpenMPOffloadingRequiresDirFlags : int64_t {
/// unified_shared_memory clause.
OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
/// dynamic_allocators clause.
OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
/// Auto zero-copy extension:
/// when running on an APU, the GPU plugin may decide to
/// run in zero-copy even though the user did not program
/// their application with unified_shared_memory requirement.
OMPX_REQ_AUTO_ZERO_COPY = 0x020
OMP_REQ_DYNAMIC_ALLOCATORS = 0x010
};

class RequirementCollection {
Expand Down Expand Up @@ -70,14 +65,6 @@ class RequirementCollection {
return;
}

// Auto zero-copy is only valid when no other requirement has been set
// and it is computed at device initialization time, after the requirement
// flag has already been set to OMP_REQ_NONE.
if (SetFlags == OMP_REQ_NONE && NewFlags == OMPX_REQ_AUTO_ZERO_COPY) {
SetFlags = NewFlags;
return;
}

// If multiple compilation units are present enforce
// consistency across all of them for require clauses:
// - reverse_offload
Expand Down
3 changes: 0 additions & 3 deletions openmp/libomptarget/include/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,6 @@ struct DeviceTy {
/// Print all offload entries to stderr.
void dumpOffloadEntries();

/// Ask the device whether the runtime should use auto zero-copy.
bool useAutoZeroCopy();

private:
/// Deinitialize the device (and plugin).
void deinit();
Expand Down
47 changes: 2 additions & 45 deletions openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1848,9 +1848,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
OMPX_StreamBusyWait("LIBOMPTARGET_AMDGPU_STREAM_BUSYWAIT", 2000000),
OMPX_UseMultipleSdmaEngines(
"LIBOMPTARGET_AMDGPU_USE_MULTIPLE_SDMA_ENGINES", false),
HSAXnackEnv("HSA_XNACK", false), AMDGPUStreamManager(*this, Agent),
AMDGPUEventManager(*this), AMDGPUSignalManager(*this), Agent(Agent),
HostDevice(HostDevice) {}
AMDGPUStreamManager(*this, Agent), AMDGPUEventManager(*this),
AMDGPUSignalManager(*this), Agent(Agent), HostDevice(HostDevice) {}

~AMDGPUDeviceTy() {}

Expand Down Expand Up @@ -1941,10 +1940,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (auto Err = AMDGPUSignalManager.init(OMPX_InitialNumSignals))
return Err;

// detect if device is an APU.
if (auto Err = checkIfAPU())
return Err;

return Plugin::success();
}

Expand Down Expand Up @@ -2636,14 +2631,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Plugin::success();
}

/// Returns true if auto zero-copy the best configuration for the current
/// arch.
bool useAutoZeroCopyImpl() override {
// XNACK can be enabled with with kernel boot parameter or with
// environment variable.
return (IsAPU && (HSAXnackEnv || utils::isXnackEnabledViaKernelParam()));
}

/// Getters and setters for stack and heap sizes.
Error getDeviceStackSize(uint64_t &Value) override {
Value = StackSize;
Expand Down Expand Up @@ -2741,30 +2728,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Err;
}

/// Detect if current architecture is an APU.
Error checkIfAPU() {
std::string StrGfxName(ComputeUnitKind);
std::transform(std::begin(StrGfxName), std::end(StrGfxName),
std::begin(StrGfxName),
[](char c) { return std::tolower(c); });
if (StrGfxName == "gfx940") {
IsAPU = true;
return Plugin::success();
}
if (StrGfxName == "gfx942") {
// can be MI300A or MI300X
uint32_t ChipID = 0;
if (auto Err = getDeviceAttr(HSA_AMD_AGENT_INFO_CHIP_ID, ChipID))
return Err;

if (!(ChipID & 0x1)) {
IsAPU = true;
return Plugin::success();
}
}
return Plugin::success();
}

/// Envar for controlling the number of HSA queues per device. High number of
/// queues may degrade performance.
UInt32Envar OMPX_NumQueues;
Expand Down Expand Up @@ -2801,9 +2764,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
/// Use ROCm 5.7 interface for multiple SDMA engines
BoolEnvar OMPX_UseMultipleSdmaEngines;

/// Value of HSA_XNACK environment variable.
BoolEnvar HSAXnackEnv;

/// Stream manager for AMDGPU streams.
AMDGPUStreamManagerTy AMDGPUStreamManager;

Expand Down Expand Up @@ -2834,9 +2794,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
/// The current size of the stack that will be used in cases where it could
/// not be statically determined.
uint64_t StackSize = 16 * 1024 /* 16 KB */;

/// Is the plugin associated with an APU?
bool IsAPU{false};
};

Error AMDGPUDeviceImageTy::loadExecutable(const AMDGPUDeviceTy &Device) {
Expand Down
28 changes: 0 additions & 28 deletions openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,34 +116,6 @@ inline bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
return true;
}

inline bool isXnackEnabledViaKernelParam() {

ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrError =
MemoryBuffer::getFileAsStream("/proc/cmdline");

if (std::error_code ErrorCode = FileOrError.getError()) {
FAILURE_MESSAGE("Cannot open /proc/cmdline : %s\n",
ErrorCode.message().c_str());
return false;
}

StringRef FileContent = (FileOrError.get())->getBuffer();

StringRef RefString("amdgpu.noretry=");
int SizeOfRefString = RefString.size();

size_t Pos = FileContent.find_insensitive(RefString);
// Is noretry defined?
if (Pos != StringRef::npos) {
bool NoRetryValue = FileContent[Pos + SizeOfRefString] - '0';
// is noretry set to 0
if (!NoRetryValue)
return true;
}

return false;
}

struct KernelMetaDataTy {
uint64_t KernelObject;
uint32_t GroupSegmentList;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -872,11 +872,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy {

virtual Error getDeviceStackSize(uint64_t &V) = 0;

/// Returns true if current plugin architecture is an APU
/// and unified_shared_memory was not requested by the program.
bool useAutoZeroCopy();
virtual bool useAutoZeroCopyImpl() { return false; }

private:
/// Register offload entry for global variable.
Error registerGlobalOffloadEntry(DeviceImageTy &DeviceImage,
Expand Down
10 changes: 0 additions & 10 deletions openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1561,8 +1561,6 @@ Error GenericDeviceTy::syncEvent(void *EventPtr) {
return syncEventImpl(EventPtr);
}

bool GenericDeviceTy::useAutoZeroCopy() { return useAutoZeroCopyImpl(); }

Error GenericPluginTy::init() {
auto NumDevicesOrErr = initImpl();
if (!NumDevicesOrErr)
Expand Down Expand Up @@ -2075,14 +2073,6 @@ int32_t __tgt_rtl_set_device_offset(int32_t DeviceIdOffset) {
return OFFLOAD_SUCCESS;
}

int32_t __tgt_rtl_use_auto_zero_copy(int32_t DeviceId) {
// Automatic zero-copy only applies to programs that did
// not request unified_shared_memory and are deployed on an
// APU with XNACK enabled.
if (Plugin::get().getRequiresFlags() & OMP_REQ_UNIFIED_SHARED_MEMORY)
return false;
return Plugin::get().getDevice(DeviceId).useAutoZeroCopy();
}
#ifdef __cplusplus
}
#endif
12 changes: 3 additions & 9 deletions openmp/libomptarget/src/OpenMP/Mapping.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,20 +252,15 @@ TargetPointerResultTy MappingInfoTy::getTargetPointer(
MESSAGE("device mapping required by 'present' map type modifier does not "
"exist for host address " DPxMOD " (%" PRId64 " bytes)",
DPxPTR(HstPtrBegin), Size);
} else if ((PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY &&
!HasCloseModifier) ||
(PM->getRequirements() & OMPX_REQ_AUTO_ZERO_COPY)) {
} else if (PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY &&
!HasCloseModifier) {
// If unified shared memory is active, implicitly mapped variables that are
// not privatized use host address. Any explicitly mapped variables also use
// host address where correctness is not impeded. In all other cases maps
// are respected.
// In addition to the mapping rules above, the close map modifier forces the
// mapping of the variable to the device.
if (Size) {
INFO(OMP_INFOTYPE_MAPPING_CHANGED, Device.DeviceID,
"Return HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared "
"memory\n",
DPxPTR((uintptr_t)HstPtrBegin), Size);
DP("Return HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared "
"memory\n",
DPxPTR((uintptr_t)HstPtrBegin), Size);
Expand Down Expand Up @@ -420,8 +415,7 @@ TargetPointerResultTy MappingInfoTy::getTgtPtrBegin(
LR.TPR.getEntry()->dynRefCountToStr().c_str(), DynRefCountAction,
LR.TPR.getEntry()->holdRefCountToStr().c_str(), HoldRefCountAction);
LR.TPR.TargetPointer = (void *)TP;
} else if (PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY ||
PM->getRequirements() & OMPX_REQ_AUTO_ZERO_COPY) {
} else if (PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) {
// If the value isn't found in the mapping and unified shared memory
// is on then it means we have stumbled upon a value which we need to
// use directly from the host.
Expand Down
14 changes: 0 additions & 14 deletions openmp/libomptarget/src/PluginManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,33 +144,19 @@ void PluginAdaptorTy::initDevices(PluginManager &PM) {

int32_t NumPD = getNumberOfPluginDevices();
ExclusiveDevicesAccessor->reserve(DeviceOffset + NumPD);
// Auto zero-copy is a per-device property. We need to ensure
// that all devices are suggesting to use it.
bool UseAutoZeroCopy = true;
if (NumPD == 0)
UseAutoZeroCopy = false;
for (int32_t PDevI = 0, UserDevId = DeviceOffset; PDevI < NumPD; PDevI++) {
auto Device = std::make_unique<DeviceTy>(this, UserDevId, PDevI);

if (auto Err = Device->init()) {
DP("Skip plugin known device %d: %s\n", PDevI,
toString(std::move(Err)).c_str());
continue;
}
UseAutoZeroCopy = UseAutoZeroCopy && Device->useAutoZeroCopy();

ExclusiveDevicesAccessor->push_back(std::move(Device));
++NumberOfUserDevices;
++UserDevId;
}

// Auto Zero-Copy can only be currently triggered when the system is an
// homogeneous APU architecture without attached discrete GPUs.
// If all devices suggest to use it, change requirment flags to trigger
// zero-copy behavior when mapping memory.
if (UseAutoZeroCopy)
PM.addRequirements(OMPX_REQ_AUTO_ZERO_COPY);

DP("Plugin adaptor " DPxMOD " has index %d, exposes %d out of %d devices!\n",
DPxPTR(LibraryHandler.get()), DeviceOffset, NumberOfUserDevices,
NumberOfPluginDevices);
Expand Down
6 changes: 0 additions & 6 deletions openmp/libomptarget/src/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,9 +340,3 @@ void DeviceTy::dumpOffloadEntries() {
fprintf(stderr, " %11s: %s\n", Kind, It.second->getNameAsCStr());
}
}

bool DeviceTy::useAutoZeroCopy() {
if (RTL->use_auto_zero_copy)
return RTL->use_auto_zero_copy(RTLDeviceID);
return false;
}
59 changes: 0 additions & 59 deletions openmp/libomptarget/test/mapping/auto_zero_copy.cpp

This file was deleted.

0 comments on commit ce41444

Please sign in to comment.