Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OpenMP][AMDGPU] Add interop support for OpenMP AMD GPU plugin #88000

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

mhalk
Copy link
Contributor

@mhalk mhalk commented Apr 8, 2024

Add interop related functionalities for OpenMP AMD GPU plugin, including get async queue, get device reference and get backend runtime's ref ID.

Originally authored here: https://reviews.llvm.org/D137607

Add interop related functionalities for OpenMP AMD GPU plugin, including get
async queue, get device reference and get backend runtime's ref ID.

Originally authored here: https://reviews.llvm.org/D137607

Co-authored-by: JP Lehr <JanPatrick.Lehr@amd.com>
Co-authored-by: Michael Halkenhaeuser <MichaelGerald.Halkenhauser@amd.com>
@mhalk mhalk added openmp openmp:libomptarget OpenMP offload runtime labels Apr 8, 2024
@llvmbot
Copy link
Collaborator

llvmbot commented Apr 8, 2024

@llvm/pr-subscribers-openmp

Author: Michael Halkenhäuser (mhalk)

Changes

Add interop related functionalities for OpenMP AMD GPU plugin, including get async queue, get device reference and get backend runtime's ref ID.

Originally authored here: https://reviews.llvm.org/D137607


Patch is 20.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/88000.diff

13 Files Affected:

  • (modified) openmp/libomptarget/include/OpenMP/InteropAPI.h (+102-5)
  • (modified) openmp/libomptarget/include/OpenMP/omp.h (-106)
  • (modified) openmp/libomptarget/include/Shared/PluginAPI.h (+4)
  • (modified) openmp/libomptarget/include/Shared/PluginAPI.inc (+1)
  • (modified) openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp (+11)
  • (modified) openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h (+5-1)
  • (modified) openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp (+15)
  • (modified) openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp (+11)
  • (modified) openmp/libomptarget/src/OpenMP/InteropAPI.cpp (+26-5)
  • (added) openmp/libomptarget/test/api/omp_interop_amdgpu.c (+92)
  • (modified) openmp/runtime/src/include/omp.h.var (+2-1)
  • (modified) openmp/runtime/src/include/omp_lib.h.var (+3-1)
  • (modified) openmp/runtime/src/kmp_ftn_entry.h (+2-1)
diff --git a/openmp/libomptarget/include/OpenMP/InteropAPI.h b/openmp/libomptarget/include/OpenMP/InteropAPI.h
index 71c78760a32265..f686ea9bd85fa0 100644
--- a/openmp/libomptarget/include/OpenMP/InteropAPI.h
+++ b/openmp/libomptarget/include/OpenMP/InteropAPI.h
@@ -11,12 +11,72 @@
 #ifndef OMPTARGET_OPENMP_INTEROP_API_H
 #define OMPTARGET_OPENMP_INTEROP_API_H
 
-#include "omp.h"
+#define omp_interop_none 0
 
+#include "omp.h"
 #include "omptarget.h"
 
 extern "C" {
 
+/// TODO: Include the `omp.h` of the current build
+/* OpenMP 5.1 interop */
+typedef intptr_t omp_intptr_t;
+
+/* 0..omp_get_num_interop_properties()-1 are reserved for implementation-defined
+ * properties */
+typedef enum omp_interop_property {
+  omp_ipr_fr_id = -1,
+  omp_ipr_fr_name = -2,
+  omp_ipr_vendor = -3,
+  omp_ipr_vendor_name = -4,
+  omp_ipr_device_num = -5,
+  omp_ipr_platform = -6,
+  omp_ipr_device = -7,
+  omp_ipr_device_context = -8,
+  omp_ipr_targetsync = -9,
+  omp_ipr_first = -9
+} omp_interop_property_t;
+
+typedef enum omp_interop_rc {
+  omp_irc_no_value = 1,
+  omp_irc_success = 0,
+  omp_irc_empty = -1,
+  omp_irc_out_of_range = -2,
+  omp_irc_type_int = -3,
+  omp_irc_type_ptr = -4,
+  omp_irc_type_str = -5,
+  omp_irc_other = -6
+} omp_interop_rc_t;
+
+typedef enum omp_interop_fr {
+  omp_ifr_cuda = 1,
+  omp_ifr_cuda_driver = 2,
+  omp_ifr_opencl = 3,
+  omp_ifr_sycl = 4,
+  omp_ifr_hip = 5,
+  omp_ifr_level_zero = 6,
+  omp_ifr_amdhsa = 7,
+  omp_ifr_last = 8
+} omp_interop_fr_t;
+
+typedef enum omp_interop_backend_type_t {
+  // reserve 0
+  omp_interop_backend_type_cuda = 1,
+  omp_interop_backend_type_amdhsa = 7,
+  omp_interop_backend_type_invalid = 8
+} omp_interop_backend_type_t;
+
+typedef enum omp_foreign_runtime_ids {
+  invalid = 0,
+  cuda = 1,
+  cuda_driver = 2,
+  opencl = 3,
+  sycl = 4,
+  hip = 5,
+  level_zero = 6,
+  amdhsa = 7
+} omp_foreign_runtime_ids_t;
+
 typedef enum kmp_interop_type_t {
   kmp_interop_type_unknown = -1,
   kmp_interop_type_platform,
@@ -24,20 +84,57 @@ typedef enum kmp_interop_type_t {
   kmp_interop_type_tasksync,
 } kmp_interop_type_t;
 
+typedef void *omp_interop_t;
+
 /// The interop value type, aka. the interop object.
 typedef struct omp_interop_val_t {
   /// Device and interop-type are determined at construction time and fix.
-  omp_interop_val_t(intptr_t device_id, kmp_interop_type_t interop_type)
-      : interop_type(interop_type), device_id(device_id) {}
+  omp_interop_val_t(intptr_t device_id, kmp_interop_type_t interop_type,
+                    omp_foreign_runtime_ids_t vendor_id,
+                    intptr_t backend_type_id)
+      : interop_type(interop_type), device_id(device_id), vendor_id(vendor_id),
+        backend_type_id(backend_type_id) {}
   const char *err_str = nullptr;
   __tgt_async_info *async_info = nullptr;
   __tgt_device_info device_info;
   const kmp_interop_type_t interop_type;
   const intptr_t device_id;
-  const omp_foreign_runtime_ids_t vendor_id = cuda;
-  const intptr_t backend_type_id = omp_interop_backend_type_cuda_1;
+  omp_foreign_runtime_ids_t vendor_id;
+  intptr_t backend_type_id;
 } omp_interop_val_t;
 
+/// Retrieves the number of implementation-defined properties available for an
+/// omp_interop_t object.
+int __KAI_KMPC_CONVENTION omp_get_num_interop_properties(const omp_interop_t);
+
+/// Retrieves an integer property from an omp_interop_t object.
+omp_intptr_t __KAI_KMPC_CONVENTION omp_get_interop_int(const omp_interop_t,
+                                                       omp_interop_property_t,
+                                                       int *);
+
+/// Retrieves a pointer property from an omp_interop_t object.
+void *__KAI_KMPC_CONVENTION omp_get_interop_ptr(const omp_interop_t,
+                                                omp_interop_property_t, int *);
+
+/// Retrieve a string property from an omp_interop_t object.
+const char *__KAI_KMPC_CONVENTION omp_get_interop_str(const omp_interop_t,
+                                                      omp_interop_property_t,
+                                                      int *);
+
+/// Retrieve a property name from an omp_interop_t object.
+const char *__KAI_KMPC_CONVENTION omp_get_interop_name(const omp_interop_t,
+                                                       omp_interop_property_t);
+
+/// Retrieve a description of the type of a property associated with an
+/// omp_interop_t object.
+const char *__KAI_KMPC_CONVENTION
+omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t);
+
+/// Retrieve a description of the return code associated with an omp_interop_t
+/// object.
+extern const char *__KAI_KMPC_CONVENTION
+omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t);
+
 } // extern "C"
 
 #endif // OMPTARGET_OPENMP_INTEROP_API_H
diff --git a/openmp/libomptarget/include/OpenMP/omp.h b/openmp/libomptarget/include/OpenMP/omp.h
index b44c6aff1b289c..d360b5ef3b1641 100644
--- a/openmp/libomptarget/include/OpenMP/omp.h
+++ b/openmp/libomptarget/include/OpenMP/omp.h
@@ -44,112 +44,6 @@ int omp_get_default_device(void) __attribute__((weak));
 
 ///}
 
-/// InteropAPI
-///
-///{
-
-/// TODO: Include the `omp.h` of the current build
-/* OpenMP 5.1 interop */
-typedef intptr_t omp_intptr_t;
-
-/* 0..omp_get_num_interop_properties()-1 are reserved for implementation-defined
- * properties */
-typedef enum omp_interop_property {
-  omp_ipr_fr_id = -1,
-  omp_ipr_fr_name = -2,
-  omp_ipr_vendor = -3,
-  omp_ipr_vendor_name = -4,
-  omp_ipr_device_num = -5,
-  omp_ipr_platform = -6,
-  omp_ipr_device = -7,
-  omp_ipr_device_context = -8,
-  omp_ipr_targetsync = -9,
-  omp_ipr_first = -9
-} omp_interop_property_t;
-
-#define omp_interop_none 0
-
-typedef enum omp_interop_rc {
-  omp_irc_no_value = 1,
-  omp_irc_success = 0,
-  omp_irc_empty = -1,
-  omp_irc_out_of_range = -2,
-  omp_irc_type_int = -3,
-  omp_irc_type_ptr = -4,
-  omp_irc_type_str = -5,
-  omp_irc_other = -6
-} omp_interop_rc_t;
-
-typedef enum omp_interop_fr {
-  omp_ifr_cuda = 1,
-  omp_ifr_cuda_driver = 2,
-  omp_ifr_opencl = 3,
-  omp_ifr_sycl = 4,
-  omp_ifr_hip = 5,
-  omp_ifr_level_zero = 6,
-  omp_ifr_last = 7
-} omp_interop_fr_t;
-
-typedef void *omp_interop_t;
-
-/*!
- * The `omp_get_num_interop_properties` routine retrieves the number of
- * implementation-defined properties available for an `omp_interop_t` object.
- */
-int __KAI_KMPC_CONVENTION omp_get_num_interop_properties(const omp_interop_t);
-/*!
- * The `omp_get_interop_int` routine retrieves an integer property from an
- * `omp_interop_t` object.
- */
-omp_intptr_t __KAI_KMPC_CONVENTION
-omp_get_interop_int(const omp_interop_t, omp_interop_property_t, int *);
-/*!
- * The `omp_get_interop_ptr` routine retrieves a pointer property from an
- * `omp_interop_t` object.
- */
-void *__KAI_KMPC_CONVENTION omp_get_interop_ptr(const omp_interop_t,
-                                                omp_interop_property_t, int *);
-/*!
- * The `omp_get_interop_str` routine retrieves a string property from an
- * `omp_interop_t` object.
- */
-const char *__KAI_KMPC_CONVENTION
-omp_get_interop_str(const omp_interop_t, omp_interop_property_t, int *);
-/*!
- * The `omp_get_interop_name` routine retrieves a property name from an
- * `omp_interop_t` object.
- */
-const char *__KAI_KMPC_CONVENTION omp_get_interop_name(const omp_interop_t,
-                                                       omp_interop_property_t);
-/*!
- * The `omp_get_interop_type_desc` routine retrieves a description of the type
- * of a property associated with an `omp_interop_t` object.
- */
-const char *__KAI_KMPC_CONVENTION
-omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t);
-/*!
- * The `omp_get_interop_rc_desc` routine retrieves a description of the return
- * code associated with an `omp_interop_t` object.
- */
-extern const char *__KAI_KMPC_CONVENTION
-omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t);
-
-typedef enum omp_interop_backend_type_t {
-  // reserve 0
-  omp_interop_backend_type_cuda_1 = 1,
-} omp_interop_backend_type_t;
-
-typedef enum omp_foreign_runtime_ids {
-  cuda = 1,
-  cuda_driver = 2,
-  opencl = 3,
-  sycl = 4,
-  hip = 5,
-  level_zero = 6,
-} omp_foreign_runtime_ids_t;
-
-///} InteropAPI
-
 } // extern "C"
 
 #endif // OMPTARGET_OPENMP_OMP_H
diff --git a/openmp/libomptarget/include/Shared/PluginAPI.h b/openmp/libomptarget/include/Shared/PluginAPI.h
index ecf669c774f142..c80b9d1693c10e 100644
--- a/openmp/libomptarget/include/Shared/PluginAPI.h
+++ b/openmp/libomptarget/include/Shared/PluginAPI.h
@@ -17,6 +17,7 @@
 #include <cstddef>
 #include <cstdint>
 
+#include "OpenMP/InteropAPI.h"
 #include "Shared/APITypes.h"
 
 extern "C" {
@@ -165,6 +166,9 @@ void __tgt_rtl_set_info_flag(uint32_t);
 // Print the device information
 void __tgt_rtl_print_device_info(int32_t ID);
 
+// Set the runtime related information for interop object
+int32_t __tgt_rtl_set_interop_info(omp_interop_val_t *InteropPtr);
+
 // Event related interfaces. It is expected to use the interfaces in the
 // following way:
 // 1) Create an event on the target device (__tgt_rtl_create_event).
diff --git a/openmp/libomptarget/include/Shared/PluginAPI.inc b/openmp/libomptarget/include/Shared/PluginAPI.inc
index e445da6852f7b4..c11341d969a6db 100644
--- a/openmp/libomptarget/include/Shared/PluginAPI.inc
+++ b/openmp/libomptarget/include/Shared/PluginAPI.inc
@@ -35,6 +35,7 @@ PLUGIN_API_HANDLE(synchronize);
 PLUGIN_API_HANDLE(query_async);
 PLUGIN_API_HANDLE(set_info_flag);
 PLUGIN_API_HANDLE(print_device_info);
+PLUGIN_API_HANDLE(set_interop_info);
 PLUGIN_API_HANDLE(create_event);
 PLUGIN_API_HANDLE(record_event);
 PLUGIN_API_HANDLE(wait_event);
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index a0fdde951b74a7..654cce3bb39c04 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2772,6 +2772,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
 
   bool useMultipleSdmaEngines() const { return OMPX_UseMultipleSdmaEngines; }
 
+  virtual Error setInteropInfo(omp_interop_val_t *InterOpPtr) override {
+    InterOpPtr->vendor_id = amdhsa;
+    InterOpPtr->backend_type_id = omp_interop_backend_type_amdhsa;
+
+    __tgt_device_info *DevInfo = &InterOpPtr->device_info;
+    DevInfo->Context = nullptr;
+    DevInfo->Device = &Agent;
+
+    return Plugin::success();
+  }
+
 private:
   using AMDGPUEventRef = AMDGPUResourceRef<AMDGPUEventTy>;
   using AMDGPUEventManagerTy = GenericDeviceResourceManagerTy<AMDGPUEventRef>;
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
index 79e8464bfda5c1..84159920a57309 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
@@ -19,6 +19,7 @@
 #include <shared_mutex>
 #include <vector>
 
+#include "OpenMP/InteropAPI.h"
 #include "Shared/Debug.h"
 #include "Shared/Environment.h"
 #include "Shared/EnvironmentVar.h"
@@ -850,6 +851,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
     return 0;
   }
 
+  virtual Error setInteropInfo(omp_interop_val_t *InterOpPtr) {
+    return Error::success();
+  }
+
   virtual Error getDeviceStackSize(uint64_t &V) = 0;
 
   /// Returns true if current plugin architecture is an APU
@@ -1059,7 +1064,6 @@ struct GenericPluginTy {
   /// we could not move this function into GenericDeviceTy.
   virtual Expected<bool> isELFCompatible(StringRef Image) const = 0;
 
-protected:
   /// Indicate whether a device id is valid.
   bool isValidDeviceId(int32_t DeviceId) const {
     return (DeviceId >= 0 && DeviceId < getNumDevices());
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index b5f3c45c835fdb..febcd8ecb756c6 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -2040,6 +2040,21 @@ int32_t __tgt_rtl_init_plugin() {
   return OFFLOAD_SUCCESS;
 }
 
+int32_t __tgt_rtl_set_interop_info(omp_interop_val_t *InterOpPtr) {
+  assert(InterOpPtr && "Interop object is allocated");
+  int32_t DevId = InterOpPtr->device_id;
+
+  assert(PluginTy::get().isValidDeviceId(DevId) && "Device Id is valid");
+  if (auto Err = PluginTy::get().getDevice(DevId).setInteropInfo(InterOpPtr)) {
+    REPORT("Failure to determine the OpenMP interop object info for Device Id "
+           "%i\n",
+           DevId);
+    return OFFLOAD_FAIL;
+  }
+
+  return OFFLOAD_SUCCESS;
+}
+
 int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
   if (!PluginTy::isActive())
     return false;
diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
index fc74c6aa23fddd..ad074a88035b5c 100644
--- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -1143,6 +1143,17 @@ struct CUDADeviceTy : public GenericDeviceTy {
   /// Returns the clock frequency for the given NVPTX device.
   uint64_t getClockFrequency() const override { return 1000000000; }
 
+  virtual Error setInteropInfo(omp_interop_val_t *InterOpPtr) override {
+    InterOpPtr->vendor_id = cuda;
+    InterOpPtr->backend_type_id = omp_interop_backend_type_cuda;
+
+    __tgt_device_info *DevInfo = &InterOpPtr->device_info;
+    DevInfo->Context = Context;
+    DevInfo->Device = Device;
+
+    return Plugin::success();
+  }
+
 private:
   using CUDAStreamManagerTy = GenericDeviceResourceManagerTy<CUDAStreamRef>;
   using CUDAEventManagerTy = GenericDeviceResourceManagerTy<CUDAEventRef>;
diff --git a/openmp/libomptarget/src/OpenMP/InteropAPI.cpp b/openmp/libomptarget/src/OpenMP/InteropAPI.cpp
index 1a995cde7816e1..1db2addb25119c 100644
--- a/openmp/libomptarget/src/OpenMP/InteropAPI.cpp
+++ b/openmp/libomptarget/src/OpenMP/InteropAPI.cpp
@@ -70,8 +70,21 @@ const char *getVendorIdToStr(const omp_foreign_runtime_ids_t VendorId) {
     return ("hip");
   case level_zero:
     return ("level_zero");
+  case amdhsa:
+    return ("amdhsa");
+  default:
+    return ("unknown");
+  }
+}
+
+const char *getBackendIdToStr(intptr_t BackendId) {
+  switch (BackendId) {
+  case omp_interop_backend_type_cuda:
+    return "cuda backend";
+  case omp_interop_backend_type_amdhsa:
+    return "amdhsa backend";
   }
-  return ("unknown");
+  return "unknown backend";
 }
 
 template <typename PropertyTy>
@@ -105,6 +118,8 @@ const char *getProperty<const char *>(omp_interop_val_t &InteropVal,
                : "device+context";
   case omp_ipr_vendor_name:
     return getVendorIdToStr(InteropVal.vendor_id);
+  case omp_ipr_fr_name:
+    return getBackendIdToStr(InteropVal.backend_type_id);
   default:
     getTypeMismatch(Property, Err);
     return nullptr;
@@ -221,8 +236,11 @@ void __tgt_interop_init(ident_t *LocRef, int32_t Gtid,
                          NoaliasDepList);
   }
 
-  InteropPtr = new omp_interop_val_t(DeviceId, InteropType);
+  // Create interop value object
+  InteropPtr = new omp_interop_val_t(DeviceId, InteropType, invalid,
+                                     omp_interop_backend_type_invalid);
 
+  // Get an intitialized and ready device, or error
   auto DeviceOrErr = PM->getDevice(DeviceId);
   if (!DeviceOrErr) {
     InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError());
@@ -230,12 +248,15 @@ void __tgt_interop_init(ident_t *LocRef, int32_t Gtid,
   }
 
   DeviceTy &Device = *DeviceOrErr;
-  if (!Device.RTL || !Device.RTL->init_device_info ||
-      Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info,
-                                   &(InteropPtr)->err_str)) {
+  if (!Device.RTL || !Device.RTL->set_interop_info) {
     delete InteropPtr;
     InteropPtr = omp_interop_none;
+    return;
   }
+
+  // Retrieve the target specific interop value object
+  Device.RTL->set_interop_info(InteropPtr);
+
   if (InteropType == kmp_interop_type_tasksync) {
     if (!Device.RTL || !Device.RTL->init_async_info ||
         Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) {
diff --git a/openmp/libomptarget/test/api/omp_interop_amdgpu.c b/openmp/libomptarget/test/api/omp_interop_amdgpu.c
new file mode 100644
index 00000000000000..c66df93d44dc94
--- /dev/null
+++ b/openmp/libomptarget/test/api/omp_interop_amdgpu.c
@@ -0,0 +1,92 @@
+// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O1
+// RUN: %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa
+// REQUIRES: amdgcn-amd-amdhsa
+
+#include <omp.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define N 16384
+
+void vectorSet(int n, double s, double *x) {
+  for (int i = 0; i < n; ++i)
+    x[i] = s * (i + 1);
+}
+
+void vectorCopy(int n, double *x, double *y) {
+  for (int i = 0; i < n; ++i)
+    y[i] = x[i];
+}
+
+void vectorScale(int n, double s, double *x) {
+  for (int i = 0; i < n; ++i)
+    x[i] = s * x[i];
+}
+
+int main() {
+  const double ScaleFactor = 2.0;
+  double x[N], y[N];
+  omp_interop_t SyncObj = omp_interop_none;
+  int DeviceNum = omp_get_default_device();
+
+  // clang-format off
+  #pragma omp target nowait depend(out : x [0:N])                                \
+          map(from : x [0:N]) device(DeviceNum)
+  // clang-format on
+  vectorSet(N, 1.0, x);
+
+#pragma omp task depend(out : y [0:N])
+  vectorSet(N, -1.0, y);
+
+  // Get SyncObject for synchronization
+  // clang-format off
+  #pragma omp interop init(targetsync : SyncObj) device(DeviceNum)               \
+          depend(in : x [0:N]) depend(inout : y [0:N])
+  // clang-format on
+
+  int ForeignContextId = (int)omp_get_interop_int(SyncObj, omp_ipr_fr_id, NULL);
+  char *ForeignContextName =
+      (char *)omp_get_interop_str(SyncObj, omp_ipr_fr_name, NULL);
+
+  if (SyncObj != omp_interop_none && ForeignContextId == omp_ifr_amdhsa) {
+    printf("OpenMP working with %s runtime to execute async memcpy.\n",
+           ForeignContextName);
+    int Status;
+    omp_get_interop_ptr(SyncObj, omp_ipr_targetsync, &Status);
+
+    if (Status != omp_irc_success) {
+      fprintf(stderr, "ERROR: Failed to get %s stream, rt error = %d.\n",
+              ForeignContextName, Status);
+      if (Status == omp_irc_no_value)
+        fprintf(stderr, "Parameters valid, but no meaningful value available.");
+      exit(1);
+    }
+
+    vectorCopy(N, x, y);
+  } else {
+    // Execute as OpenMP offload
+    printf("Notice: Offloading myCopy to perform memcpy.\n");
+    // clang-format off
+  #pragma omp target depend(in : x [0:N]) depend(inout : y [0:N]) nowait         \
+          map(to : x [0:N]) map(tofrom : y [0:N]) device(DeviceNum)
+    // clang-format on
+    vectorCopy(N, x, y);
+  }
+
+  // This also ensures foreign tasks complete
+#pragma omp interop destroy(SyncObj) nowait depend(out : y [0:N])
+
+#pragma omp target depend(inout : x [0:N])
+  vectorScale(N, ScaleFactor, x);
+
+#pragma omp taskwait
+
+  printf("(1 : 16384) %f:%f\n", y[0], y[N - 1]);
+  printf("(2 : 32768) %f:%f\n", x[0], x[N - 1]);
+
+  return 0;
+}
+
+// ToDo: Add meaningful checks; the following is a placeholder.
+
+// CHECK: OpenMP working with amdhsa backend runtime to execute async memcpy
diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var
index eb3ab7778606a3..7c60764e447163 100644
--- a/openmp/runtime/src/include/omp.h.var
+++ b/openmp/runtime/src/include/omp.h.var
@@ -192,7 +192,8 @@
         omp_ifr_sycl = 4,
         omp_ifr_hip = 5,
         omp_ifr_level_zero = 6,
-        omp_ifr_last = 7
+        omp_ifr_amdhsa = 7,
+        omp_ifr_last = 8
     } omp_interop_fr_t;
 
     typedef void * omp_interop_t;
diff --git a/openmp/runtime/src/include/omp_lib.h.var b/openmp/runtime/src/include/omp_lib.h.var
index a709a2f298f8c8..a076890b1207b2 100644
--- a/openmp/runtime/src/include/omp_lib.h.var
+++ b/openmp/runtime/src/include/omp_lib.h.var
@@ -261,8 +261,10 @@
       parameter(omp_ifr_hip=5)
       integer(kind=omp_interop_fr_kind)omp_ifr_level_zero
       parameter(omp_ifr_level_zero=6)
+      integer(kind=omp_interop_fr_kind)omp_ifr_amdhsa
+      parameter(omp_ifr_amdhsa=7)
       integer(kind=omp_interop_fr_kind)omp_ifr...
[truncated]

@mhalk
Copy link
Contributor Author

mhalk commented Apr 8, 2024

Marked this one as WIP / draft since I would like to gather some feedback w.r.t. the testcase / testing the current interop capabilities in general.

Copy link

github-actions bot commented Apr 8, 2024

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff 8ccf1c117b0dc08f7e9c24fe98f45ebe32e95cd1 f2a4e524a2be3384d940a4ae86510a8a18e22ea0 -- openmp/libomptarget/test/api/omp_interop_amdgpu.c openmp/libomptarget/include/OpenMP/InteropAPI.h openmp/libomptarget/include/OpenMP/omp.h openmp/libomptarget/include/Shared/PluginAPI.h openmp/libomptarget/include/Shared/PluginAPI.inc openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp openmp/libomptarget/src/OpenMP/InteropAPI.cpp openmp/runtime/src/kmp_ftn_entry.h
View the diff from clang-format here.
diff --git a/openmp/libomptarget/test/api/omp_interop_amdgpu.c b/openmp/libomptarget/test/api/omp_interop_amdgpu.c
index c66df93d44..26b9fbf5ce 100644
--- a/openmp/libomptarget/test/api/omp_interop_amdgpu.c
+++ b/openmp/libomptarget/test/api/omp_interop_amdgpu.c
@@ -35,11 +35,11 @@ int main() {
   // clang-format on
   vectorSet(N, 1.0, x);
 
-#pragma omp task depend(out : y [0:N])
+#pragma omp task depend(out : y[0 : N])
   vectorSet(N, -1.0, y);
 
-  // Get SyncObject for synchronization
-  // clang-format off
+// Get SyncObject for synchronization
+// clang-format off
   #pragma omp interop init(targetsync : SyncObj) device(DeviceNum)               \
           depend(in : x [0:N]) depend(inout : y [0:N])
   // clang-format on
@@ -74,9 +74,9 @@ int main() {
   }
 
   // This also ensures foreign tasks complete
-#pragma omp interop destroy(SyncObj) nowait depend(out : y [0:N])
+#pragma omp interop destroy(SyncObj) nowait depend(out : y[0 : N])
 
-#pragma omp target depend(inout : x [0:N])
+#pragma omp target depend(inout : x[0 : N])
   vectorScale(N, ScaleFactor, x);
 
 #pragma omp taskwait

omp_ifr_sycl = 4,
omp_ifr_hip = 5,
omp_ifr_level_zero = 6,
omp_ifr_amdhsa = 7,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not use omp_ifr_hip which is specified by OpenMP spec.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for the high latency!
Basically, I rebased the former phabricator review, which was accepted back then.

From what I could gather: there are plans to add amdhsa/ROCr interoperability with OpenMP.
So, this PR may be seen as a precursor -- albeit I have no ETA on the actual amdhsa interoperability.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
openmp:libomptarget OpenMP offload runtime openmp
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

4 participants