[OFFLOAD] Restore interop functionality #161429

adurang · 2025-09-30T20:09:26Z

This implements two pieces to restore the interop functionality (that I broke) when the 6.0 interfaces were added:

A set of wrappers that support the old interfaces on top of the new ones
The same level of interop support for the CUDA amd AMD plugins

Implement them as wrappers to the new API

llvmbot · 2025-09-30T20:09:58Z

@llvm/pr-subscribers-offload

@llvm/pr-subscribers-backend-amdgpu

Author: Alex Duran (adurang)

Changes

This implements two pieces to restore the interop functionality (that I broke) when the 6.0 interfaces were added:

A set of wrappers that support the old interfaces on top of the new ones
The same level of interop support for the CUDA amd AMD plugins

Full diff: https://github.com/llvm/llvm-project/pull/161429.diff

4 Files Affected:

(modified) offload/libomptarget/OpenMP/InteropAPI.cpp (+31-1)
(modified) offload/libomptarget/exports (+4-1)
(modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (+32)
(modified) offload/plugins-nextgen/cuda/src/rtl.cpp (+47)

diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp
index eb5425ecbf062..7aa3a6b019627 100644
--- a/offload/libomptarget/OpenMP/InteropAPI.cpp
+++ b/offload/libomptarget/OpenMP/InteropAPI.cpp
@@ -275,7 +275,7 @@ omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType,
   return Interop;
 }
 
-int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop,
+int __tgt_interop_use60(ident_t *LocRef, omp_interop_val_t *Interop,
                       interop_ctx_t *Ctx, dep_pack_t *Deps) {
   bool Nowait = Ctx->flags.nowait;
   DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__,
@@ -359,6 +359,36 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop,
   return omp_irc_success;
 }
 
+// Backwards compatibility wrappers
+void __tgt_interop_init(ident_t *LocRef, int32_t Gtid,
+                        omp_interop_val_t *&InteropPtr,
+                        kmp_interop_type_t InteropType, int32_t DeviceId,
+                        int32_t Ndeps, kmp_depend_info_t *DepList,
+                        int32_t HaveNowait) {
+  interop_ctx_t Ctx = {0, {false, (bool)HaveNowait, 0}, Gtid};
+  dep_pack_t Deps = {Ndeps, 0, DepList, nullptr};
+  InteropPtr = __tgt_interop_get(LocRef, InteropType == 2 ? 1 : 0, DeviceId, 0,
+                                 nullptr, &Ctx, Ndeps ? &Deps : nullptr);
+}
+
+void __tgt_interop_use(ident_t *LocRef, int32_t Gtid,
+                       omp_interop_val_t *&InteropPtr, int32_t DeviceId,
+                       int32_t Ndeps, kmp_depend_info_t *DepList,
+                       int32_t HaveNowait) {
+  interop_ctx_t Ctx = {0, {false, (bool)HaveNowait, 0}, Gtid};
+  dep_pack_t Deps = {Ndeps, 0, DepList, nullptr};
+  __tgt_interop_use60(LocRef, InteropPtr, &Ctx, Ndeps ? &Deps : nullptr);
+}
+
+void __tgt_interop_destroy(ident_t *LocRef, int32_t Gtid,
+                           omp_interop_val_t *&InteropPtr, int32_t DeviceId,
+                           int32_t Ndeps, kmp_depend_info_t *DepList,
+                           int32_t HaveNowait) {
+  interop_ctx_t Ctx = {0, {false, (bool)HaveNowait, 0}, Gtid};
+  dep_pack_t Deps = {Ndeps, 0, DepList, nullptr};
+  __tgt_interop_release(LocRef, InteropPtr, &Ctx, Ndeps ? &Deps : nullptr);
+}
+
 } // extern "C"
 
 llvm::Expected<DeviceTy &> omp_interop_val_t::getDevice() const {
diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports
index 8e2db6ba8bba4..1374bfea81511 100644
--- a/offload/libomptarget/exports
+++ b/offload/libomptarget/exports
@@ -68,8 +68,11 @@ VERS1.0 {
     omp_get_interop_int;
     omp_get_interop_name;
     omp_get_interop_type_desc;
-    __tgt_interop_get;
+    __tgt_interop_init;
     __tgt_interop_use;
+    __tgt_interop_destroy;
+    __tgt_interop_get;
+    __tgt_interop_use60;
     __tgt_interop_release;
     __tgt_target_sync;
     __llvmPushCallConfiguration;
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 7b834ee346e5d..086220cb0057d 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2712,6 +2712,38 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
     return Plugin::success();
   }
 
+  interop_spec_t selectInteropPreference(int32_t InteropType,
+                                         int32_t NumPrefers,
+                                         interop_spec_t *Prefers) override {
+    // TODO: update once targetsync is supported
+    if (InteropType != kmp_interop_type_target)
+      return interop_spec_t{tgt_fr_hip, {false, 0}, 0};
+    return interop_spec_t{tgt_fr_none, {false, 0}, 0};
+  }
+
+  Expected<omp_interop_val_t *>
+  createInterop(int32_t InteropType, interop_spec_t &InteropSpec) override {
+    auto *Ret = new omp_interop_val_t(
+        DeviceId, static_cast<kmp_interop_type_t>(InteropType));
+    Ret->fr_id = tgt_fr_hip;
+    Ret->vendor_id = omp_vendor_amd;
+
+    // TODO: implement targetsync support
+
+    Ret->device_info.Platform = nullptr;
+    Ret->device_info.Device = reinterpret_cast<void *>(Agent.handle);
+    Ret->device_info.Context = nullptr;
+
+    return Ret;
+  }
+
+  Error releaseInterop(omp_interop_val_t *Interop) override {
+    if (!Interop)
+      return Plugin::success();
+    delete Interop;
+    return Plugin::success();
+  }
+
   Error enqueueHostCallImpl(void (*Callback)(void *), void *UserData,
                             AsyncInfoWrapperTy &AsyncInfo) override {
     AMDGPUStreamTy *Stream = nullptr;
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index b30c651223cad..74fc50c6bbcda 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -917,6 +917,53 @@ struct CUDADeviceTy : public GenericDeviceTy {
     return Plugin::success();
   }
 
+  interop_spec_t selectInteropPreference(int32_t InteropType,
+                                         int32_t NumPrefers,
+                                         interop_spec_t *Prefers) override {
+    return interop_spec_t{tgt_fr_cuda, {true, 0}, 0};
+  }
+
+  Expected<omp_interop_val_t *>
+  createInterop(int32_t InteropType, interop_spec_t &InteropSpec) override {
+    auto *Ret = new omp_interop_val_t(
+        DeviceId, static_cast<kmp_interop_type_t>(InteropType));
+    Ret->fr_id = tgt_fr_cuda;
+    Ret->vendor_id = omp_vendor_nvidia;
+
+    if (InteropType == kmp_interop_type_target ||
+        InteropType == kmp_interop_type_targetsync) {
+      Ret->device_info.Platform = nullptr;
+      Ret->device_info.Device = reinterpret_cast<void *>(Device);
+      Ret->device_info.Context = Context;
+    }
+
+    if (InteropType == kmp_interop_type_targetsync) {
+      Ret->async_info = new __tgt_async_info();
+      if (auto Err = setContext())
+        return Err;
+      CUstream Stream;
+      if (auto Err = CUDAStreamManager.getResource(
+              *reinterpret_cast<CUstream *>(&Stream)))
+        return Err;
+
+      Ret->async_info->Queue = Stream;
+    }
+    return Ret;
+  }
+
+  Error releaseInterop(omp_interop_val_t *Interop) override {
+    if (!Interop)
+      return Plugin::success();
+
+    if (Interop->async_info) {
+      // TODO: release the stream back to the pool?
+      delete Interop->async_info;
+    }
+    delete Interop;
+
+    return Plugin::success();
+  }
+
   Error enqueueHostCallImpl(void (*Callback)(void *), void *UserData,
                             AsyncInfoWrapperTy &AsyncInfo) override {
     if (auto Err = setContext())

github-actions · 2025-09-30T20:12:03Z

✅ With the latest revision this PR passed the C/C++ code formatter.

adurang · 2025-09-30T20:15:22Z

@jplehr verified that the same functionality (and his other tests) work as before for AMD. (Although I'd note that for a full interop implementation more things are needed)

@jhuber6 @kevinsala @mjklemm @dreachem @CatherineMoore @jdoerfert

jplehr · 2025-09-30T20:27:09Z

As Alex said, I ran this through local testing on AMD. It resulted in the same behavior as we were observing originally.
I did not test this on Nvidia. Is this something that you can help with @jhuber6? This is regarding the interop.c test.

CatherineMoore · 2025-09-30T20:30:37Z

I'm OK with committing this patch and withdrawing the revert patch. Thanks for fixing this, Alex.

adurang · 2025-09-30T20:33:28Z

I found a NVIDIA machine so I'll be checking soon.

offload/plugins-nextgen/cuda/src/rtl.cpp

adurang · 2025-10-01T14:02:03Z

I found a NVIDIA machine so I'll be checking soon.

It seems that machine hasn't the environment to build llvm properly. I'm trying to fix that, but might take a while. If someone else has a way to quickly test it, it would be appreciated.

offload/plugins-nextgen/amdgpu/src/rtl.cpp

offload/plugins-nextgen/cuda/src/rtl.cpp

offload/libomptarget/OpenMP/InteropAPI.cpp

offload/plugins-nextgen/amdgpu/src/rtl.cpp

adurang · 2025-10-01T22:27:44Z

I was finally able to run it on NVIDIA GPU. The test failed because the NVIDIA support for "interop use" is not implemented:

"PluginInterface" error: Failure to synchronize interop object 0x00005e885181dcf0: "generic error code for features unsupported by the device/backend" sync_barrier not supported
omptarget fatal error 0: Interop sync barrier failed for 0x5e885181dcf0 object

We have 3 options here:

Leave as it is. The RTL is now really trying to implement the OpenMP semantics for "interop use" instead of just returning "success" and the plugin still doesn't support them (nor it did in the past). So, while the test would still be failing it is an actual reflection of the state of things.
I can add a simple implementation to the plugin that returns "success" even if it doesn't really do anything. (I tried this and the tests passes with it).
Someone that has an idea of what should go there (probably something like cudaHostSynchronize) let's me know and I add it (or do PR against this one).

I personally think 1 is ok, because in the past the test was passing not because the implementation was doing the right thing (it wasn't) but because the test doesn't really check anything. But if others thing it's best to do 2 or 3, that's ok with me too.

Also, I had to the disable the return of the Stream to the manager that @kevinsala suggested as it causes a double free at the end of the program. It seems to be kept somewhere else and released at deinit so there's no leak.

jplehr · 2025-10-02T07:50:54Z

I re-tested this PR against the little test that I put up in #161434 on AMD and that test works.

From my end, it is OK to go in as-is and we need to implement the missing functionality in the plugins.
In case someone has a strong opinion otherwise, I won't fight for that course of action.

CatherineMoore · 2025-10-02T12:08:27Z

I re-tested this PR against the little test that I put up in #161434 on AMD and that test works.

From my end, it is OK to go in as-is and we need to implement the missing functionality in the plugins. In case someone has a strong opinion otherwise, I won't fight for that course of action.

Agreed that the patch is OK as is.

jplehr

I'll accept from my end.

kevinsala

LGTM

This implements two pieces to restore the interop functionality (that I broke) when the 6.0 interfaces were added: * A set of wrappers that support the old interfaces on top of the new ones * The same level of interop support for the CUDA amd AMD plugins

adurang added 4 commits September 30, 2025 14:43

[OFFLOAD] Restore old interop API entries

0082f58

Implement them as wrappers to the new API

convert interop type correctly

e520b93

Add cuda plugin interfaces

3917503

Add amd plugin interfaces

f88de96

llvmbot added backend:AMDGPU offload labels Sep 30, 2025

jplehr requested review from jdoerfert, jhuber6, kevinsala and mjklemm September 30, 2025 20:15

format nit

a19daf6

jplehr mentioned this pull request Sep 30, 2025

[OpenMP] Add test to print interop identifiers #161434

Open

jplehr mentioned this pull request Sep 30, 2025

Revert "[OFFLOAD][OPENMP] 6.0 compatible interop interface (#143491)" #161279

Closed

arsenm reviewed Oct 1, 2025

View reviewed changes

offload/plugins-nextgen/cuda/src/rtl.cpp Show resolved Hide resolved

adurang added 2 commits October 1, 2025 16:02

fix warning

e409d5e

format

506169f

kevinsala reviewed Oct 1, 2025

View reviewed changes

address reviews

07594d0

kevinsala reviewed Oct 1, 2025

View reviewed changes

offload/plugins-nextgen/amdgpu/src/rtl.cpp Outdated Show resolved Hide resolved

adurang added 2 commits October 1, 2025 23:26

Avoid resource leak in CUDA plugin

b8607d1

AMD uses HSA

1fea0fb

Remove return of stream to manager to avoid double free

6090c32

jplehr mentioned this pull request Oct 2, 2025

[Issue]: OpenMP interop incorrectly reports runtime as cuda for AMD GPUs ROCm/ROCm#2897

Open

jplehr approved these changes Oct 2, 2025

View reviewed changes

fix in AMD plugin and getters to pass test from llvm#161434

9aa95fe

kevinsala approved these changes Oct 2, 2025

View reviewed changes

jplehr merged commit 902fe02 into llvm:main Oct 2, 2025
9 checks passed

adurang deleted the interop_wrappers branch October 2, 2025 20:26

[OFFLOAD] Restore interop functionality #161429

[OFFLOAD] Restore interop functionality #161429

Conversation

adurang commented Sep 30, 2025

Uh oh!

llvmbot commented Sep 30, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Sep 30, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

adurang commented Sep 30, 2025

Uh oh!

jplehr commented Sep 30, 2025

Uh oh!

CatherineMoore commented Sep 30, 2025

Uh oh!

adurang commented Sep 30, 2025

Uh oh!

Uh oh!

adurang commented Oct 1, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

adurang commented Oct 1, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

jplehr commented Oct 2, 2025

Uh oh!

CatherineMoore commented Oct 2, 2025

Uh oh!

jplehr left a comment

Choose a reason for hiding this comment

Uh oh!

kevinsala left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

6 participants

llvmbot commented Sep 30, 2025 •

edited

Loading

github-actions bot commented Sep 30, 2025 •

edited

Loading

adurang commented Oct 1, 2025 •

edited

Loading