From be9fa9dee50138f0283a3354ce76069036b1330c Mon Sep 17 00:00:00 2001
From: Fabian Mora <fmora.dev@gmail.com>
Date: Thu, 16 Nov 2023 11:34:28 -0500
Subject: [PATCH] [flang][NVPTX] Add initial support to the NVPTX target
 (#71992)

This patch adds initial support to the NVPTX target, enabling `flang` to
produce OpenMP offload code for NVPTX targets.
---
 flang/lib/Frontend/FrontendActions.cpp        | 41 ++++++++++++++++++
 flang/lib/Optimizer/CodeGen/Target.cpp        | 30 +++++++++++++
 flang/test/Driver/omp-driver-offload.f90      | 42 +++++++++++++++++++
 flang/test/Fir/target-rewrite-boxchar.fir     |  1 +
 flang/test/Lower/OpenMP/FIR/omp-is-gpu.f90    |  4 +-
 .../Lower/OpenMP/FIR/target_cpu_features.f90  |  5 ++-
 flang/test/Lower/OpenMP/omp-is-gpu.f90        |  4 +-
 .../test/Lower/OpenMP/target_cpu_features.f90 |  4 +-
 .../basic-target-region-1D-array-section.f90  |  3 +-
 .../basic-target-region-3D-array-section.f90  |  3 +-
 .../fortran/basic-target-region-3D-array.f90  |  3 +-
 .../fortran/basic-target-region-array.f90     |  3 +-
 .../fortran/basic_target_region.f90           |  3 +-
 .../declare-target-array-in-target-region.f90 |  3 +-
 ...double-target-call-with-declare-target.f90 |  3 +-
 15 files changed, 134 insertions(+), 18 deletions(-)
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 54f2f37b1ac74..1e4a2f3035f1f 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -175,6 +175,45 @@ getExplicitAndImplicitAMDGPUTargetFeatures(CompilerInstance &ci,
   return llvm::join(featuresVec, ",");
 }
 
+// Get feature string which represents combined explicit target features
+// for NVPTX and the target features specified by the user/
+// TODO: Have a more robust target conf like `clang/lib/Basic/Targets/NVPTX.cpp`
+static std::string
+getExplicitAndImplicitNVPTXTargetFeatures(CompilerInstance &ci,
+                                          const TargetOptions &targetOpts,
+                                          const llvm::Triple triple) {
+  llvm::StringRef cpu = targetOpts.cpu;
+  llvm::StringMap<bool> implicitFeaturesMap;
+  std::string errorMsg;
+  bool ptxVer = false;
+
+  // Add target features specified by the user
+  for (auto &userFeature : targetOpts.featuresAsWritten) {
+    llvm::StringRef userKeyString(llvm::StringRef(userFeature).drop_front(1));
+    implicitFeaturesMap[userKeyString.str()] = (userFeature[0] == '+');
+    // Check if the user provided a PTX version
+    if (userKeyString.startswith("ptx"))
+      ptxVer = true;
+  }
+
+  // Set the default PTX version to `ptx61` if none was provided.
+  // TODO: set the default PTX version based on the chip.
+  if (!ptxVer)
+    implicitFeaturesMap["ptx61"] = true;
+
+  // Set the compute capability.
+  implicitFeaturesMap[cpu.str()] = true;
+
+  llvm::SmallVector<std::string> featuresVec;
+  for (auto &implicitFeatureItem : implicitFeaturesMap) {
+    featuresVec.push_back((llvm::Twine(implicitFeatureItem.second ? "+" : "-") +
+                           implicitFeatureItem.first().str())
+                              .str());
+  }
+  llvm::sort(featuresVec);
+  return llvm::join(featuresVec, ",");
+}
+
 // Produces the string which represents target feature
 static std::string getTargetFeatures(CompilerInstance &ci) {
   const TargetOptions &targetOpts = ci.getInvocation().getTargetOpts();
@@ -188,6 +227,8 @@ static std::string getTargetFeatures(CompilerInstance &ci) {
   // them to the target features specified by the user
   if (triple.isAMDGPU()) {
     return getExplicitAndImplicitAMDGPUTargetFeatures(ci, targetOpts, triple);
+  } else if (triple.isNVPTX()) {
+    return getExplicitAndImplicitNVPTXTargetFeatures(ci, targetOpts, triple);
   }
   return llvm::join(targetOpts.featuresAsWritten.begin(),
                     targetOpts.featuresAsWritten.end(), ",");
diff --git a/flang/lib/Optimizer/CodeGen/Target.cpp b/flang/lib/Optimizer/CodeGen/Target.cpp
index 83e7fa9b440be..bb893277cb4d2 100644
--- a/flang/lib/Optimizer/CodeGen/Target.cpp
+++ b/flang/lib/Optimizer/CodeGen/Target.cpp
@@ -621,6 +621,33 @@ struct TargetAMDGPU : public GenericTarget<TargetAMDGPU> {
 };
 } // namespace
 
+//===----------------------------------------------------------------------===//
+// NVPTX linux target specifics.
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct TargetNVPTX : public GenericTarget<TargetNVPTX> {
+  using GenericTarget::GenericTarget;
+
+  // Default size (in bits) of the index type for strings.
+  static constexpr int defaultWidth = 64;
+
+  CodeGenSpecifics::Marshalling
+  complexArgumentType(mlir::Location loc, mlir::Type eleTy) const override {
+    CodeGenSpecifics::Marshalling marshal;
+    TODO(loc, "handle complex argument types");
+    return marshal;
+  }
+
+  CodeGenSpecifics::Marshalling
+  complexReturnType(mlir::Location loc, mlir::Type eleTy) const override {
+    CodeGenSpecifics::Marshalling marshal;
+    TODO(loc, "handle complex return types");
+    return marshal;
+  }
+};
+} // namespace
+
 //===----------------------------------------------------------------------===//
 // LoongArch64 linux target specifics.
 //===----------------------------------------------------------------------===//
@@ -708,6 +735,9 @@ fir::CodeGenSpecifics::get(mlir::MLIRContext *ctx, llvm::Triple &&trp,
   case llvm::Triple::ArchType::amdgcn:
     return std::make_unique<TargetAMDGPU>(ctx, std::move(trp),
                                           std::move(kindMap));
+  case llvm::Triple::ArchType::nvptx64:
+    return std::make_unique<TargetNVPTX>(ctx, std::move(trp),
+                                         std::move(kindMap));
   case llvm::Triple::ArchType::loongarch64:
     return std::make_unique<TargetLoongArch64>(ctx, std::move(trp),
                                                std::move(kindMap));
diff --git a/flang/test/Driver/omp-driver-offload.f90 b/flang/test/Driver/omp-driver-offload.f90
index bfdc3f6f4d472..ad50723b0e3a7 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -67,6 +67,11 @@
 ! RUN: -fopenmp-targets=amdgcn-amd-amdhsa \
 ! RUN: -fopenmp-assume-threads-oversubscription \
 ! RUN: | FileCheck %s --check-prefixes=CHECK-THREADS-OVS
+! RUN: %flang -### %s -o %t 2>&1 \
+! RUN: -fopenmp --offload-arch=sm_70 \
+! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
+! RUN: -fopenmp-assume-threads-oversubscription \
+! RUN: | FileCheck %s --check-prefixes=CHECK-THREADS-OVS
 ! CHECK-THREADS-OVS: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-assume-threads-oversubscription" {{.*}}.f90"
 
 ! RUN: %flang -### %s -o %t 2>&1 \
@@ -74,6 +79,11 @@
 ! RUN: -fopenmp-targets=amdgcn-amd-amdhsa \
 ! RUN: -fopenmp-assume-teams-oversubscription  \
 ! RUN: | FileCheck %s --check-prefixes=CHECK-TEAMS-OVS
+! RUN: %flang -### %s -o %t 2>&1 \
+! RUN: -fopenmp --offload-arch=sm_70 \
+! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
+! RUN: -fopenmp-assume-teams-oversubscription  \
+! RUN: | FileCheck %s --check-prefixes=CHECK-TEAMS-OVS
 ! CHECK-TEAMS-OVS: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-assume-teams-oversubscription" {{.*}}.f90"
 
 ! RUN: %flang -### %s -o %t 2>&1 \
@@ -81,6 +91,11 @@
 ! RUN: -fopenmp-targets=amdgcn-amd-amdhsa \
 ! RUN: -fopenmp-assume-no-nested-parallelism  \
 ! RUN: | FileCheck %s --check-prefixes=CHECK-NEST-PAR
+! RUN: %flang -### %s -o %t 2>&1 \
+! RUN: -fopenmp --offload-arch=sm_70 \
+! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
+! RUN: -fopenmp-assume-no-nested-parallelism  \
+! RUN: | FileCheck %s --check-prefixes=CHECK-NEST-PAR
 ! CHECK-NEST-PAR: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-assume-no-nested-parallelism" {{.*}}.f90"
 
 ! RUN: %flang -### %s -o %t 2>&1 \
@@ -88,6 +103,11 @@
 ! RUN: -fopenmp-targets=amdgcn-amd-amdhsa \
 ! RUN: -fopenmp-assume-no-thread-state \
 ! RUN: | FileCheck %s --check-prefixes=CHECK-THREAD-STATE
+! RUN: %flang -### %s -o %t 2>&1 \
+! RUN: -fopenmp --offload-arch=sm_70 \
+! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
+! RUN: -fopenmp-assume-no-thread-state \
+! RUN: | FileCheck %s --check-prefixes=CHECK-THREAD-STATE
 ! CHECK-THREAD-STATE: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-assume-no-thread-state" {{.*}}.f90"
 
 ! RUN: %flang -### %s -o %t 2>&1 \
@@ -95,6 +115,11 @@
 ! RUN: -fopenmp-targets=amdgcn-amd-amdhsa \
 ! RUN: -fopenmp-target-debug \
 ! RUN: | FileCheck %s --check-prefixes=CHECK-TARGET-DEBUG
+! RUN: %flang -### %s -o %t 2>&1 \
+! RUN: -fopenmp --offload-arch=sm_70 \
+! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
+! RUN: -fopenmp-target-debug \
+! RUN: | FileCheck %s --check-prefixes=CHECK-TARGET-DEBUG
 ! CHECK-TARGET-DEBUG: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-target-debug" {{.*}}.f90"
 
 ! RUN: %flang -### %s -o %t 2>&1 \
@@ -102,6 +127,11 @@
 ! RUN: -fopenmp-targets=amdgcn-amd-amdhsa \
 ! RUN: -fopenmp-target-debug \
 ! RUN: | FileCheck %s --check-prefixes=CHECK-TARGET-DEBUG
+! RUN: %flang -### %s -o %t 2>&1 \
+! RUN: -fopenmp --offload-arch=sm_70 \
+! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
+! RUN: -fopenmp-target-debug \
+! RUN: | FileCheck %s --check-prefixes=CHECK-TARGET-DEBUG
 ! CHECK-TARGET-DEBUG-EQ: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-target-debug=111" {{.*}}.f90"
 
 ! RUN: %flang -S -### %s -o %t 2>&1 \
@@ -111,6 +141,13 @@
 ! RUN: -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism \
 ! RUN: -fopenmp-assume-no-thread-state \
 ! RUN: | FileCheck %s --check-prefixes=CHECK-RTL-ALL
+! RUN: %flang -S -### %s -o %t 2>&1 \
+! RUN: -fopenmp --offload-arch=sm_70 \
+! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
+! RUN: -fopenmp-target-debug -fopenmp-assume-threads-oversubscription \
+! RUN: -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism \
+! RUN: -fopenmp-assume-no-thread-state \
+! RUN: | FileCheck %s --check-prefixes=CHECK-RTL-ALL
 ! CHECK-RTL-ALL: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" "-fopenmp-target-debug" "-fopenmp-assume-teams-oversubscription"
 ! CHECK-RTL-ALL: "-fopenmp-assume-threads-oversubscription" "-fopenmp-assume-no-thread-state" "-fopenmp-assume-no-nested-parallelism"
 ! CHECK-RTL-ALL: {{.*}}.f90"
@@ -120,6 +157,11 @@
 ! RUN: -fopenmp-targets=amdgcn-amd-amdhsa \
 ! RUN: -fopenmp-version=45 \
 ! RUN: | FileCheck %s --check-prefixes=CHECK-OPENMP-VERSION
+! RUN: %flang -### %s -o %t 2>&1 \
+! RUN: -fopenmp --offload-arch=sm_70 \
+! RUN: -fopenmp-targets=nvptx64-nvidia-cuda \
+! RUN: -fopenmp-version=45 \
+! RUN: | FileCheck %s --check-prefixes=CHECK-OPENMP-VERSION
 ! CHECK-OPENMP-VERSION: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" "-fopenmp-version=45" {{.*}}.f90"
 
 ! Test diagnostic error when host IR file is non-existent 
diff --git a/flang/test/Fir/target-rewrite-boxchar.fir b/flang/test/Fir/target-rewrite-boxchar.fir
index e66fa60416303..b87cb35b46eb6 100644
--- a/flang/test/Fir/target-rewrite-boxchar.fir
+++ b/flang/test/Fir/target-rewrite-boxchar.fir
@@ -3,6 +3,7 @@
 // RUN: fir-opt --target-rewrite="target=aarch64-unknown-linux-gnu" %s | FileCheck %s --check-prefix=INT64
 // RUN: fir-opt --target-rewrite="target=powerpc64le-unknown-linux-gnu" %s | FileCheck %s --check-prefix=INT64
 // RUN: fir-opt --target-rewrite="target=amdgcn-amd-amdhsa" %s | FileCheck %s --check-prefix=INT64
+// RUN: fir-opt --target-rewrite="target=nvptx64-nvidia-cuda" %s | FileCheck %s --check-prefix=INT64
 // RUN: fir-opt --target-rewrite="target=loongarch64-unknown-linux-gnu" %s | FileCheck %s --check-prefix=INT64
 
 // Test that we rewrite the signatures and bodies of functions that take boxchar
diff --git a/flang/test/Lower/OpenMP/FIR/omp-is-gpu.f90 b/flang/test/Lower/OpenMP/FIR/omp-is-gpu.f90
index b702fc2c5a7e2..ac8d249748015 100644
--- a/flang/test/Lower/OpenMP/FIR/omp-is-gpu.f90
+++ b/flang/test/Lower/OpenMP/FIR/omp-is-gpu.f90
@@ -1,9 +1,11 @@
-!REQUIRES: amdgpu-registered-target
+!REQUIRES: amdgpu-registered-target, nvptx-registered-target
 
 !RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-fir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
+!RUN: %flang_fc1 -triple nvptx64-nvidia-cuda -emit-fir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
 !RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-is-gpu -emit-fir -o - %s | FileCheck %s
 
 !RUN: not %flang_fc1 -triple amdgcn-amd-amdhsa -emit-fir -fopenmp %s -o - 2>&1 | FileCheck %s --check-prefix=FLANG-ERROR
+!RUN: not %flang_fc1 -triple nvptx64-nvidia-cuda -emit-fir -fopenmp %s -o - 2>&1 | FileCheck %s --check-prefix=FLANG-ERROR
 !RUN: not bbc -fopenmp -fopenmp-is-gpu -emit-fir %s -o - 2>&1 | FileCheck %s --check-prefix=BBC-ERROR
 
 !CHECK: module attributes {{{.*}}omp.is_gpu = true
diff --git a/flang/test/Lower/OpenMP/FIR/target_cpu_features.f90 b/flang/test/Lower/OpenMP/FIR/target_cpu_features.f90
index c6159342c023a..179b71b3f0cfa 100644
--- a/flang/test/Lower/OpenMP/FIR/target_cpu_features.f90
+++ b/flang/test/Lower/OpenMP/FIR/target_cpu_features.f90
@@ -1,5 +1,7 @@
-!REQUIRES: amdgpu-registered-target
+!REQUIRES: amdgpu-registered-target, nvptx-registered-target
 !RUN: %flang_fc1 -emit-fir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
+!RUN: %flang_fc1 -emit-hlfir -triple nvptx64-nvidia-cuda -target-cpu sm_80 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=NVPTX %s
+
 
 !===============================================================================
 ! Target_Enter Simple
@@ -10,6 +12,7 @@
 !CHECK-SAME: +dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,
 !CHECK-SAME: +gfx8-insts,+gfx9-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,
 !CHECK-SAME: +wavefrontsize64">
+!NVPTX: omp.target = #omp.target<target_cpu = "sm_80", target_features = "+ptx61,+sm_80">
 !CHECK-LABEL: func.func @_QPomp_target_simple()
 subroutine omp_target_simple
   ! Directive needed to prevent subroutine from being filtered out when
diff --git a/flang/test/Lower/OpenMP/omp-is-gpu.f90 b/flang/test/Lower/OpenMP/omp-is-gpu.f90
index 12d0e4e869fba..3e6daeb522d77 100644
--- a/flang/test/Lower/OpenMP/omp-is-gpu.f90
+++ b/flang/test/Lower/OpenMP/omp-is-gpu.f90
@@ -1,9 +1,11 @@
-!REQUIRES: amdgpu-registered-target
+!REQUIRES: amdgpu-registered-target, nvptx-registered-target
 
 !RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
+!RUN: %flang_fc1 -triple nvptx64-nvidia-cuda -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
 !RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-is-gpu -emit-hlfir -o - %s | FileCheck %s
 
 !RUN: not %flang_fc1 -triple amdgcn-amd-amdhsa -emit-hlfir -fopenmp %s -o - 2>&1 | FileCheck %s --check-prefix=FLANG-ERROR
+!RUN: not %flang_fc1 -triple nvptx64-nvidia-cuda -emit-hlfir -fopenmp %s -o - 2>&1 | FileCheck %s --check-prefix=FLANG-ERROR
 !RUN: not bbc -fopenmp -fopenmp-is-gpu -emit-hlfir %s -o - 2>&1 | FileCheck %s --check-prefix=BBC-ERROR
 
 !CHECK: module attributes {{{.*}}omp.is_gpu = true
diff --git a/flang/test/Lower/OpenMP/target_cpu_features.f90 b/flang/test/Lower/OpenMP/target_cpu_features.f90
index 46fb14efad5c0..ea1e5e38fca88 100644
--- a/flang/test/Lower/OpenMP/target_cpu_features.f90
+++ b/flang/test/Lower/OpenMP/target_cpu_features.f90
@@ -1,5 +1,6 @@
-!REQUIRES: amdgpu-registered-target
+!REQUIRES: amdgpu-registered-target, nvptx-registered-target
 !RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
+!RUN: %flang_fc1 -emit-hlfir -triple nvptx64-nvidia-cuda -target-cpu sm_80 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=NVPTX %s
 
 !===============================================================================
 ! Target_Enter Simple
@@ -10,6 +11,7 @@
 !CHECK-SAME: +dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,
 !CHECK-SAME: +gfx8-insts,+gfx9-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,
 !CHECK-SAME: +wavefrontsize64">
+!NVPTX: omp.target = #omp.target<target_cpu = "sm_80", target_features = "+ptx61,+sm_80">
 !CHECK-LABEL: func.func @_QPomp_target_simple()
 subroutine omp_target_simple
   ! Directive needed to prevent subroutine from being filtered out when
diff --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90
index 58f5379e330ec..993b91d4eb623 100644
--- a/openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90
+++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90
@@ -1,7 +1,6 @@
 ! Basic offloading test of arrays with provided lower 
 ! and upper bounds as specified by OpenMP's sectioning
-! REQUIRES: flang, amdgcn-amd-amdhsa
-! UNSUPPORTED: nvptx64-nvidia-cuda
+! REQUIRES: flang, amdgcn-amd-amdhsa, nvptx64-nvidia-cuda
 ! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
 ! UNSUPPORTED: aarch64-unknown-linux-gnu
 ! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
diff --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90
index e3df7983e6b5c..669d3674926f6 100644
--- a/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90
+++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90
@@ -1,7 +1,6 @@
 ! Basic offloading test of a regular array explicitly
 ! passed within a target region
-! REQUIRES: flang, amdgcn-amd-amdhsa
-! UNSUPPORTED: nvptx64-nvidia-cuda
+! REQUIRES: flang, amdgcn-amd-amdhsa, nvptx64-nvidia-cuda
 ! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
 ! UNSUPPORTED: aarch64-unknown-linux-gnu
 ! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
diff --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90
index abc2763d4a30c..c87d6ee24aed3 100644
--- a/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90
+++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90
@@ -1,7 +1,6 @@
 ! Basic offloading test of a regular array explicitly
 ! passed within a target region
-! REQUIRES: flang, amdgcn-amd-amdhsa
-! UNSUPPORTED: nvptx64-nvidia-cuda
+! REQUIRES: flang, amdgcn-amd-amdhsa, nvptx64-nvidia-cuda
 ! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
 ! UNSUPPORTED: aarch64-unknown-linux-gnu
 ! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
diff --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-array.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-array.f90
index d3c799ff3334f..9b10e4c7650d0 100644
--- a/openmp/libomptarget/test/offloading/fortran/basic-target-region-array.f90
+++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-array.f90
@@ -1,7 +1,6 @@
 ! Basic offloading test of a regular array explicitly
 ! passed within a target region
-! REQUIRES: flang, amdgcn-amd-amdhsa
-! UNSUPPORTED: nvptx64-nvidia-cuda
+! REQUIRES: flang, amdgcn-amd-amdhsa, nvptx64-nvidia-cuda
 ! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
 ! UNSUPPORTED: aarch64-unknown-linux-gnu
 ! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
diff --git a/openmp/libomptarget/test/offloading/fortran/basic_target_region.f90 b/openmp/libomptarget/test/offloading/fortran/basic_target_region.f90
index 295452b0698a6..6423ac765670d 100644
--- a/openmp/libomptarget/test/offloading/fortran/basic_target_region.f90
+++ b/openmp/libomptarget/test/offloading/fortran/basic_target_region.f90
@@ -1,6 +1,5 @@
 ! Basic offloading test with a target region
-! REQUIRES: flang, amdgcn-amd-amdhsa
-! UNSUPPORTED: nvptx64-nvidia-cuda
+! REQUIRES: flang, amdgcn-amd-amdhsa, nvptx64-nvidia-cuda
 ! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
 ! UNSUPPORTED: aarch64-unknown-linux-gnu
 ! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
diff --git a/openmp/libomptarget/test/offloading/fortran/declare-target-array-in-target-region.f90 b/openmp/libomptarget/test/offloading/fortran/declare-target-array-in-target-region.f90
index f5e3ae00653a9..d2e59d93a0209 100644
--- a/openmp/libomptarget/test/offloading/fortran/declare-target-array-in-target-region.f90
+++ b/openmp/libomptarget/test/offloading/fortran/declare-target-array-in-target-region.f90
@@ -1,8 +1,7 @@
 ! Offloading test with a target region mapping a declare target
 ! Fortran array writing some values to it and checking the host
 ! correctly receives the updates made on the device.
-! REQUIRES: flang, amdgcn-amd-amdhsa
-! UNSUPPORTED: nvptx64-nvidia-cuda
+! REQUIRES: flang, amdgcn-amd-amdhsa, nvptx64-nvidia-cuda
 ! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
 ! UNSUPPORTED: aarch64-unknown-linux-gnu
 ! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
diff --git a/openmp/libomptarget/test/offloading/fortran/double-target-call-with-declare-target.f90 b/openmp/libomptarget/test/offloading/fortran/double-target-call-with-declare-target.f90
index b4c793ca06cf7..884acb275a0eb 100644
--- a/openmp/libomptarget/test/offloading/fortran/double-target-call-with-declare-target.f90
+++ b/openmp/libomptarget/test/offloading/fortran/double-target-call-with-declare-target.f90
@@ -2,8 +2,7 @@
 ! declare target Fortran array and writing some values to 
 ! it before checking the host correctly receives the 
 ! correct updates made on the device.
-! REQUIRES: flang, amdgcn-amd-amdhsa
-! UNSUPPORTED: nvptx64-nvidia-cuda
+! REQUIRES: flang, amdgcn-amd-amdhsa, nvptx64-nvidia-cuda
 ! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
 ! UNSUPPORTED: aarch64-unknown-linux-gnu
 ! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO