-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[OFFLOAD] Restore interop functionality #161429
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Implement them as wrappers to the new API
@llvm/pr-subscribers-offload @llvm/pr-subscribers-backend-amdgpu Author: Alex Duran (adurang) ChangesThis implements two pieces to restore the interop functionality (that I broke) when the 6.0 interfaces were added:
Full diff: https://github.com/llvm/llvm-project/pull/161429.diff 4 Files Affected:
diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp
index eb5425ecbf062..7aa3a6b019627 100644
--- a/offload/libomptarget/OpenMP/InteropAPI.cpp
+++ b/offload/libomptarget/OpenMP/InteropAPI.cpp
@@ -275,7 +275,7 @@ omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType,
return Interop;
}
-int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop,
+int __tgt_interop_use60(ident_t *LocRef, omp_interop_val_t *Interop,
interop_ctx_t *Ctx, dep_pack_t *Deps) {
bool Nowait = Ctx->flags.nowait;
DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__,
@@ -359,6 +359,36 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop,
return omp_irc_success;
}
+// Backwards compatibility wrappers
+void __tgt_interop_init(ident_t *LocRef, int32_t Gtid,
+ omp_interop_val_t *&InteropPtr,
+ kmp_interop_type_t InteropType, int32_t DeviceId,
+ int32_t Ndeps, kmp_depend_info_t *DepList,
+ int32_t HaveNowait) {
+ interop_ctx_t Ctx = {0, {false, (bool)HaveNowait, 0}, Gtid};
+ dep_pack_t Deps = {Ndeps, 0, DepList, nullptr};
+ InteropPtr = __tgt_interop_get(LocRef, InteropType == 2 ? 1 : 0, DeviceId, 0,
+ nullptr, &Ctx, Ndeps ? &Deps : nullptr);
+}
+
+void __tgt_interop_use(ident_t *LocRef, int32_t Gtid,
+ omp_interop_val_t *&InteropPtr, int32_t DeviceId,
+ int32_t Ndeps, kmp_depend_info_t *DepList,
+ int32_t HaveNowait) {
+ interop_ctx_t Ctx = {0, {false, (bool)HaveNowait, 0}, Gtid};
+ dep_pack_t Deps = {Ndeps, 0, DepList, nullptr};
+ __tgt_interop_use60(LocRef, InteropPtr, &Ctx, Ndeps ? &Deps : nullptr);
+}
+
+void __tgt_interop_destroy(ident_t *LocRef, int32_t Gtid,
+ omp_interop_val_t *&InteropPtr, int32_t DeviceId,
+ int32_t Ndeps, kmp_depend_info_t *DepList,
+ int32_t HaveNowait) {
+ interop_ctx_t Ctx = {0, {false, (bool)HaveNowait, 0}, Gtid};
+ dep_pack_t Deps = {Ndeps, 0, DepList, nullptr};
+ __tgt_interop_release(LocRef, InteropPtr, &Ctx, Ndeps ? &Deps : nullptr);
+}
+
} // extern "C"
llvm::Expected<DeviceTy &> omp_interop_val_t::getDevice() const {
diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports
index 8e2db6ba8bba4..1374bfea81511 100644
--- a/offload/libomptarget/exports
+++ b/offload/libomptarget/exports
@@ -68,8 +68,11 @@ VERS1.0 {
omp_get_interop_int;
omp_get_interop_name;
omp_get_interop_type_desc;
- __tgt_interop_get;
+ __tgt_interop_init;
__tgt_interop_use;
+ __tgt_interop_destroy;
+ __tgt_interop_get;
+ __tgt_interop_use60;
__tgt_interop_release;
__tgt_target_sync;
__llvmPushCallConfiguration;
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 7b834ee346e5d..086220cb0057d 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2712,6 +2712,38 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Plugin::success();
}
+ interop_spec_t selectInteropPreference(int32_t InteropType,
+ int32_t NumPrefers,
+ interop_spec_t *Prefers) override {
+ // TODO: update once targetsync is supported
+ if (InteropType != kmp_interop_type_target)
+ return interop_spec_t{tgt_fr_hip, {false, 0}, 0};
+ return interop_spec_t{tgt_fr_none, {false, 0}, 0};
+ }
+
+ Expected<omp_interop_val_t *>
+ createInterop(int32_t InteropType, interop_spec_t &InteropSpec) override {
+ auto *Ret = new omp_interop_val_t(
+ DeviceId, static_cast<kmp_interop_type_t>(InteropType));
+ Ret->fr_id = tgt_fr_hip;
+ Ret->vendor_id = omp_vendor_amd;
+
+ // TODO: implement targetsync support
+
+ Ret->device_info.Platform = nullptr;
+ Ret->device_info.Device = reinterpret_cast<void *>(Agent.handle);
+ Ret->device_info.Context = nullptr;
+
+ return Ret;
+ }
+
+ Error releaseInterop(omp_interop_val_t *Interop) override {
+ if (!Interop)
+ return Plugin::success();
+ delete Interop;
+ return Plugin::success();
+ }
+
Error enqueueHostCallImpl(void (*Callback)(void *), void *UserData,
AsyncInfoWrapperTy &AsyncInfo) override {
AMDGPUStreamTy *Stream = nullptr;
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index b30c651223cad..74fc50c6bbcda 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -917,6 +917,53 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Plugin::success();
}
+ interop_spec_t selectInteropPreference(int32_t InteropType,
+ int32_t NumPrefers,
+ interop_spec_t *Prefers) override {
+ return interop_spec_t{tgt_fr_cuda, {true, 0}, 0};
+ }
+
+ Expected<omp_interop_val_t *>
+ createInterop(int32_t InteropType, interop_spec_t &InteropSpec) override {
+ auto *Ret = new omp_interop_val_t(
+ DeviceId, static_cast<kmp_interop_type_t>(InteropType));
+ Ret->fr_id = tgt_fr_cuda;
+ Ret->vendor_id = omp_vendor_nvidia;
+
+ if (InteropType == kmp_interop_type_target ||
+ InteropType == kmp_interop_type_targetsync) {
+ Ret->device_info.Platform = nullptr;
+ Ret->device_info.Device = reinterpret_cast<void *>(Device);
+ Ret->device_info.Context = Context;
+ }
+
+ if (InteropType == kmp_interop_type_targetsync) {
+ Ret->async_info = new __tgt_async_info();
+ if (auto Err = setContext())
+ return Err;
+ CUstream Stream;
+ if (auto Err = CUDAStreamManager.getResource(
+ *reinterpret_cast<CUstream *>(&Stream)))
+ return Err;
+
+ Ret->async_info->Queue = Stream;
+ }
+ return Ret;
+ }
+
+ Error releaseInterop(omp_interop_val_t *Interop) override {
+ if (!Interop)
+ return Plugin::success();
+
+ if (Interop->async_info) {
+ // TODO: release the stream back to the pool?
+ delete Interop->async_info;
+ }
+ delete Interop;
+
+ return Plugin::success();
+ }
+
Error enqueueHostCallImpl(void (*Callback)(void *), void *UserData,
AsyncInfoWrapperTy &AsyncInfo) override {
if (auto Err = setContext())
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
@jplehr verified that the same functionality (and his other tests) work as before for AMD. (Although I'd note that for a full interop implementation more things are needed) @jhuber6 @kevinsala @mjklemm @dreachem @CatherineMoore @jdoerfert |
As Alex said, I ran this through local testing on AMD. It resulted in the same behavior as we were observing originally. |
I'm OK with committing this patch and withdrawing the revert patch. Thanks for fixing this, Alex. |
I found a NVIDIA machine so I'll be checking soon. |
It seems that machine hasn't the environment to build llvm properly. I'm trying to fix that, but might take a while. If someone else has a way to quickly test it, it would be appreciated. |
I was finally able to run it on NVIDIA GPU. The test failed because the NVIDIA support for "interop use" is not implemented:
We have 3 options here:
I personally think 1 is ok, because in the past the test was passing not because the implementation was doing the right thing (it wasn't) but because the test doesn't really check anything. But if others thing it's best to do 2 or 3, that's ok with me too. Also, I had to the disable the return of the Stream to the manager that @kevinsala suggested as it causes a double free at the end of the program. It seems to be kept somewhere else and released at deinit so there's no leak. |
I re-tested this PR against the little test that I put up in #161434 on AMD and that test works. From my end, it is OK to go in as-is and we need to implement the missing functionality in the plugins. |
Agreed that the patch is OK as is. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll accept from my end.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This implements two pieces to restore the interop functionality (that I broke) when the 6.0 interfaces were added: * A set of wrappers that support the old interfaces on top of the new ones * The same level of interop support for the CUDA amd AMD plugins
This implements two pieces to restore the interop functionality (that I broke) when the 6.0 interfaces were added: