Skip to content

Commit

Permalink
[OpenMP][FIX] Runtime args are not kernel args
Browse files Browse the repository at this point in the history
Clang passes `KernelArgs.NumArgs` to the runtime but not all are kernel
arguments. This ensures we fallback to the old logic. In a follow up we
should introduce a new `KernelArgs.NumKernelArgs` field and set it in
the runtime.
  • Loading branch information
jdoerfert committed Jan 21, 2023
1 parent 366fc10 commit 3820d0e
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 7 deletions.
2 changes: 1 addition & 1 deletion openmp/libomptarget/include/omptarget.h
Expand Up @@ -137,7 +137,7 @@ static_assert(sizeof(KernelArgsTy().Flags) == sizeof(uint64_t),
"Invalid struct size");
static_assert(sizeof(KernelArgsTy) == (8 * sizeof(int32_t) + 3 * sizeof(int64_t) + 4 * sizeof(void**) + 2 * sizeof(int64_t*)),
"Invalid struct size");
constexpr KernelArgsTy CTorDTorKernelArgs = {1, 0, nullptr, nullptr,
inline KernelArgsTy CTorDTorKernelArgs = {1, 0, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr,
0, {0,0}, {1, 0, 0}, {1, 0, 0}, 0};

Expand Down
18 changes: 13 additions & 5 deletions openmp/libomptarget/src/omptarget.cpp
Expand Up @@ -1619,7 +1619,7 @@ static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr,
/// returns 0 if it was able to transfer the execution to a target and an
/// integer different from zero otherwise.
int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
const KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo) {
KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo) {
int32_t DeviceId = Device.DeviceID;
TableMap *TM = getTableMap(HostPtr);
// No map for this host pointer found!
Expand Down Expand Up @@ -1655,10 +1655,11 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,

PrivateArgumentManagerTy PrivateArgumentManager(Device, AsyncInfo);

int NumClangLaunchArgs = KernelArgs.NumArgs;
int Ret = OFFLOAD_SUCCESS;
if (KernelArgs.NumArgs) {
if (NumClangLaunchArgs) {
// Process data, such as data mapping, before launching the kernel
Ret = processDataBefore(Loc, DeviceId, HostPtr, KernelArgs.NumArgs,
Ret = processDataBefore(Loc, DeviceId, HostPtr, NumClangLaunchArgs,
KernelArgs.ArgBasePtrs, KernelArgs.ArgPtrs,
KernelArgs.ArgSizes, KernelArgs.ArgTypes,
KernelArgs.ArgNames, KernelArgs.ArgMappers, TgtArgs,
Expand All @@ -1667,6 +1668,12 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
REPORT("Failed to process data before launching the kernel.\n");
return OFFLOAD_FAIL;
}

// Clang might pass more values via the ArgPtrs to the runtime that we pass
// on to the kernel.
// TOOD: Next time we adjust the KernelArgsTy we should introduce a new
// NumKernelArgs field.
KernelArgs.NumArgs = TgtArgs.size();
}

// Launch device execution.
Expand All @@ -1675,6 +1682,7 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
TargetTable->EntriesBegin[TM->Index].name, DPxPTR(TgtEntryPtr), TM->Index);

{
assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!");
TIMESCOPE_WITH_NAME_AND_IDENT("Initiate Kernel Launch", Loc);
Ret = Device.launchKernel(TgtEntryPtr, TgtArgs.data(), TgtOffsets.data(),
KernelArgs, AsyncInfo);
Expand All @@ -1685,10 +1693,10 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
return OFFLOAD_FAIL;
}

if (KernelArgs.NumArgs) {
if (NumClangLaunchArgs) {
// Transfer data back and deallocate target memory for (first-)private
// variables
Ret = processDataAfter(Loc, DeviceId, HostPtr, KernelArgs.NumArgs,
Ret = processDataAfter(Loc, DeviceId, HostPtr, NumClangLaunchArgs,
KernelArgs.ArgBasePtrs, KernelArgs.ArgPtrs,
KernelArgs.ArgSizes, KernelArgs.ArgTypes,
KernelArgs.ArgNames, KernelArgs.ArgMappers,
Expand Down
2 changes: 1 addition & 1 deletion openmp/libomptarget/src/private.h
Expand Up @@ -39,7 +39,7 @@ extern int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
bool FromMapper = false);

extern int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
const KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo);
KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo);

extern int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr,
void *DeviceMemory, int64_t DeviceMemorySize,
Expand Down

0 comments on commit 3820d0e

Please sign in to comment.