Skip to content

Commit 1e447d0

Browse files
committed
[OpenMP] Introduce an environment variable to disable atomic map clauses
Atomic handling of map clauses was introduced to comply with the OpenMP standard (see D104418). However, many apps won't need this feature which can be costly in certain situations. To allow for applications to opt-out we now introduce the `LIBOMPTARGET_MAP_FORCE_ATOMIC` environment flag that voids the atomicity guarantee of the standard for map clauses again, shifting the burden to the user. This patch also de-duplicates the code that introduces the events used to enforce atomicity as a cleanup. Differential Revision: https://reviews.llvm.org/D117627
1 parent eb675e9 commit 1e447d0

File tree

5 files changed

+89
-51
lines changed

5 files changed

+89
-51
lines changed

openmp/docs/design/Runtimes.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,7 @@ variables is defined below.
683683
* ``LIBOMPTARGET_HEAP_SIZE=<Num>``
684684
* ``LIBOMPTARGET_STACK_SIZE=<Num>``
685685
* ``LIBOMPTARGET_SHARED_MEMORY_SIZE=<Num>``
686+
* ``LIBOMPTARGET_MAP_FORCE_ATOMIC=[TRUE/FALSE] (default TRUE)``
686687

687688
LIBOMPTARGET_DEBUG
688689
""""""""""""""""""
@@ -1003,6 +1004,23 @@ runtime call.
10031004

10041005
Offloading
10051006

1007+
1008+
LIBOMPTARGET_MAP_FORCE_ATOMIC
1009+
"""""""""""""""""""""""""""""
1010+
1011+
The OpenMP standard guarantees that map clauses are atomic. However, the this
1012+
can have a drastic performance impact. Users that do not require atomic map
1013+
clauses can disable them to potentially recover lost performance. As a
1014+
consequence, users have to guarantee themselves that no two map clauses will
1015+
concurrently map the same memory. If the memory is already mapped and the
1016+
map clauses will only modify the reference counter from a non-zero count to
1017+
another non-zero count, concurrent map clauses are supported regardless of
1018+
this option. To disable forced atomic map clauses use "false"/"FALSE" as the
1019+
value of the ``LIBOMPTARGET_MAP_FORCE_ATOMIC`` environment variable.
1020+
The default behavior of LLVM 14 is to force atomic maps clauses, prior versions
1021+
of LLVM did not.
1022+
1023+
10061024
LLVM/OpenMP Target Host Runtime Plugins (``libomptarget.rtl.XXXX``)
10071025
-------------------------------------------------------------------
10081026

openmp/libomptarget/include/device.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,10 @@ struct HostDataToTargetTy {
126126
/// Get the event bound to this data map.
127127
void *getEvent() const { return States->Event; }
128128

129+
/// Add a new event, if necessary.
130+
/// Returns OFFLOAD_FAIL if something went wrong, OFFLOAD_SUCCESS otherwise.
131+
int addEventIfNecessary(DeviceTy &Device, AsyncInfoTy &AsyncInfo) const;
132+
129133
/// Set the event bound to this data map.
130134
void setEvent(void *Event) const { States->Event = Event; }
131135

@@ -192,6 +196,18 @@ struct HostDataToTargetTy {
192196
return ThisRefCount == 1;
193197
}
194198

199+
/// Helper to make sure the entry is locked in a scope.
200+
/// TODO: We should generalize this and use it for all our objects that use
201+
/// lock/unlock methods.
202+
struct LockGuard {
203+
const HostDataToTargetTy &Entry;
204+
205+
public:
206+
LockGuard(const HostDataToTargetTy &Entry) : Entry(Entry) { Entry.lock(); }
207+
~LockGuard() { Entry.unlock(); }
208+
};
209+
210+
private:
195211
void lock() const { States->UpdateMtx.lock(); }
196212

197213
void unlock() const { States->UpdateMtx.unlock(); }
@@ -396,6 +412,9 @@ extern bool device_is_ready(int device_num);
396412

397413
/// Struct for the data required to handle plugins
398414
struct PluginManager {
415+
PluginManager(bool UseEventsForAtomicTransfers)
416+
: UseEventsForAtomicTransfers(UseEventsForAtomicTransfers) {}
417+
399418
/// RTLs identified on the host
400419
RTLsTy RTLs;
401420

@@ -416,6 +435,10 @@ struct PluginManager {
416435
// Store target policy (disabled, mandatory, default)
417436
kmp_target_offload_kind_t TargetOffloadPolicy = tgt_default;
418437
std::mutex TargetOffloadMtx; ///< For TargetOffloadPolicy
438+
439+
/// Flag to indicate if we use events to ensure the atomicity of
440+
/// map clauses or not. Can be modified with an environment variable.
441+
const bool UseEventsForAtomicTransfers;
419442
};
420443

421444
extern PluginManager *PM;

openmp/libomptarget/src/device.cpp

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,33 @@
1919
#include <cstdio>
2020
#include <string>
2121

22+
int HostDataToTargetTy::addEventIfNecessary(
23+
DeviceTy &Device, AsyncInfoTy &AsyncInfo) const {
24+
// First, check if the user disabled atomic map transfer/malloc/dealloc.
25+
if (!PM->UseEventsForAtomicTransfers)
26+
return OFFLOAD_SUCCESS;
27+
28+
void *Event = getEvent();
29+
bool NeedNewEvent = Event == nullptr;
30+
if (NeedNewEvent && Device.createEvent(&Event) != OFFLOAD_SUCCESS) {
31+
REPORT("Failed to create event\n");
32+
return OFFLOAD_FAIL;
33+
}
34+
35+
// We cannot assume the event should not be nullptr because we don't
36+
// know if the target support event. But if a target doesn't,
37+
// recordEvent should always return success.
38+
if (Device.recordEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) {
39+
REPORT("Failed to set dependence on event " DPxMOD "\n", DPxPTR(Event));
40+
return OFFLOAD_FAIL;
41+
}
42+
43+
if (NeedNewEvent)
44+
setEvent(Event);
45+
46+
return OFFLOAD_SUCCESS;
47+
}
48+
2249
DeviceTy::DeviceTy(RTLInfoTy *RTL)
2350
: DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(),
2451
HasPendingGlobals(false), HostDataToTargetMap(), PendingCtorsDtors(),
@@ -259,7 +286,7 @@ DeviceTy::getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
259286
if (TargetPointer && !IsHostPtr && HasFlagTo && (IsNew || HasFlagAlways)) {
260287
// Lock the entry before releasing the mapping table lock such that another
261288
// thread that could issue data movement will get the right result.
262-
Entry->lock();
289+
HostDataToTargetTy::LockGuard LG(*Entry);
263290
// Release the mapping table lock right after the entry is locked.
264291
DataMapMtx.unlock();
265292

@@ -268,38 +295,16 @@ DeviceTy::getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
268295

269296
int Ret = submitData(TargetPointer, HstPtrBegin, Size, AsyncInfo);
270297
if (Ret != OFFLOAD_SUCCESS) {
271-
Entry->unlock();
272298
REPORT("Copying data to device failed.\n");
273299
// We will also return nullptr if the data movement fails because that
274300
// pointer points to a corrupted memory region so it doesn't make any
275301
// sense to continue to use it.
276302
TargetPointer = nullptr;
277-
}
278-
279-
void *Event = Entry->getEvent();
280-
bool NeedNewEvent = Event == nullptr;
281-
if (NeedNewEvent && createEvent(&Event) != OFFLOAD_SUCCESS) {
282-
Entry->unlock();
283-
REPORT("Failed to create event\n");
284-
return {{false /* IsNewEntry */, false /* IsHostPointer */},
285-
{} /* MapTableEntry */,
286-
nullptr /* TargetPointer */};
287-
}
288-
// We cannot assume the event should not be nullptr because we don't
289-
// know if the target support event. But if a target doesn't,
290-
// recordEvent should always return success.
291-
Ret = recordEvent(Event, AsyncInfo);
292-
if (Ret != OFFLOAD_SUCCESS) {
293-
Entry->unlock();
294-
REPORT("Failed to set dependence on event " DPxMOD "\n", DPxPTR(Event));
303+
} else if (Entry->addEventIfNecessary(*this, AsyncInfo) !=
304+
OFFLOAD_SUCCESS)
295305
return {{false /* IsNewEntry */, false /* IsHostPointer */},
296306
{} /* MapTableEntry */,
297307
nullptr /* TargetPointer */};
298-
}
299-
if (NeedNewEvent)
300-
Entry->setEvent(Event);
301-
// We're done with the entry. Release the entry.
302-
Entry->unlock();
303308
} else {
304309
// Release the mapping table lock directly.
305310
DataMapMtx.unlock();
@@ -308,11 +313,10 @@ DeviceTy::getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
308313
// Note: Entry might be nullptr because of zero length array section.
309314
if (Entry != HostDataToTargetListTy::iterator() && !IsHostPtr &&
310315
!HasPresentModifier) {
311-
Entry->lock();
316+
HostDataToTargetTy::LockGuard LG(*Entry);
312317
void *Event = Entry->getEvent();
313318
if (Event) {
314319
int Ret = waitEvent(Event, AsyncInfo);
315-
Entry->unlock();
316320
if (Ret != OFFLOAD_SUCCESS) {
317321
// If it fails to wait for the event, we need to return nullptr in
318322
// case of any data race.
@@ -321,8 +325,6 @@ DeviceTy::getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
321325
{} /* MapTableEntry */,
322326
nullptr /* TargetPointer */};
323327
}
324-
} else {
325-
Entry->unlock();
326328
}
327329
}
328330
}

openmp/libomptarget/src/omptarget.cpp

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,7 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
572572
}
573573

574574
if (UpdateDevPtr) {
575-
Pointer_TPR.MapTableEntry->lock();
575+
HostDataToTargetTy::LockGuard LG(*Pointer_TPR.MapTableEntry);
576576
Device.ShadowMtx.unlock();
577577

578578
DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
@@ -584,30 +584,12 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
584584
int Ret = Device.submitData(PointerTgtPtrBegin, &TgtPtrBase,
585585
sizeof(void *), AsyncInfo);
586586
if (Ret != OFFLOAD_SUCCESS) {
587-
Pointer_TPR.MapTableEntry->unlock();
588587
REPORT("Copying data to device failed.\n");
589588
return OFFLOAD_FAIL;
590589
}
591-
void *Event = Pointer_TPR.MapTableEntry->getEvent();
592-
bool NeedNewEvent = Event == nullptr;
593-
if (NeedNewEvent && Device.createEvent(&Event) != OFFLOAD_SUCCESS) {
594-
Pointer_TPR.MapTableEntry->unlock();
595-
REPORT("Failed to create event.\n");
590+
if (Pointer_TPR.MapTableEntry->addEventIfNecessary(Device, AsyncInfo) !=
591+
OFFLOAD_SUCCESS)
596592
return OFFLOAD_FAIL;
597-
}
598-
// We cannot assume the event should not be nullptr because we don't
599-
// know if the target support event. But if a target doesn't,
600-
// recordEvent should always return success.
601-
Ret = Device.recordEvent(Event, AsyncInfo);
602-
if (Ret != OFFLOAD_SUCCESS) {
603-
Pointer_TPR.MapTableEntry->unlock();
604-
REPORT("Failed to set dependence on event " DPxMOD "\n",
605-
DPxPTR(Event));
606-
return OFFLOAD_FAIL;
607-
}
608-
if (NeedNewEvent)
609-
Pointer_TPR.MapTableEntry->setEvent(Event);
610-
Pointer_TPR.MapTableEntry->unlock();
611593
} else
612594
Device.ShadowMtx.unlock();
613595
}

openmp/libomptarget/src/rtl.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,20 @@ static char *ProfileTraceFile = nullptr;
4040

4141
__attribute__((constructor(101))) void init() {
4242
DP("Init target library!\n");
43-
PM = new PluginManager();
43+
44+
bool UseEventsForAtomicTransfers = true;
45+
if (const char *ForceAtomicMap = getenv("LIBOMPTARGET_MAP_FORCE_ATOMIC")) {
46+
std::string ForceAtomicMapStr(ForceAtomicMap);
47+
if (ForceAtomicMapStr == "false" || ForceAtomicMapStr == "FALSE")
48+
UseEventsForAtomicTransfers = false;
49+
else if (ForceAtomicMapStr != "true" && ForceAtomicMapStr != "TRUE")
50+
fprintf(stderr,
51+
"Warning: 'LIBOMPTARGET_MAP_FORCE_ATOMIC' accepts only "
52+
"'true'/'TRUE' or 'false'/'FALSE' as options, '%s' ignored\n",
53+
ForceAtomicMap);
54+
}
55+
56+
PM = new PluginManager(UseEventsForAtomicTransfers);
4457

4558
#ifdef OMPTARGET_PROFILE_ENABLED
4659
ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE");

0 commit comments

Comments
 (0)