Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[flang][cuda] Implicitly load cudadevice module in device/global subprogram #92038

Merged
merged 7 commits into from
May 17, 2024

Conversation

clementval
Copy link
Contributor

This is a re-worked version of #91668. It adds the cudadevice module and set the device attributes on its functions/subroutines so there is no need for special case in semantic check.
cudadevice module is implicitly USE'd in global/device subprogram.

@llvmbot llvmbot added flang Flang issues not falling into any other category flang:semantics labels May 13, 2024
@llvmbot
Copy link
Collaborator

llvmbot commented May 13, 2024

@llvm/pr-subscribers-flang-semantics

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes

This is a re-worked version of #91668. It adds the cudadevice module and set the device attributes on its functions/subroutines so there is no need for special case in semantic check.
cudadevice module is implicitly USE'd in global/device subprogram.


Full diff: https://github.com/llvm/llvm-project/pull/92038.diff

7 Files Affected:

  • (modified) flang/include/flang/Semantics/semantics.h (+4-1)
  • (modified) flang/lib/Semantics/resolve-names.cpp (+20)
  • (modified) flang/lib/Semantics/semantics.cpp (+8)
  • (added) flang/module/cudadevice.f90 (+74)
  • (added) flang/test/Semantics/cuf-device-procedures01.cuf (+35)
  • (added) flang/test/Semantics/cuf-device-procedures02.cuf (+17)
  • (modified) flang/tools/f18/CMakeLists.txt (+7-4)
diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h
index e6ba71d53e92b..167e613816394 100644
--- a/flang/include/flang/Semantics/semantics.h
+++ b/flang/include/flang/Semantics/semantics.h
@@ -215,8 +215,10 @@ class SemanticsContext {
   void UseFortranBuiltinsModule();
   const Scope *GetBuiltinsScope() const { return builtinsScope_; }
 
-  void UsePPCBuiltinTypesModule();
   const Scope &GetCUDABuiltinsScope();
+  const Scope &GetCUDADeviceScope();
+
+  void UsePPCBuiltinTypesModule();
   void UsePPCBuiltinsModule();
   Scope *GetPPCBuiltinTypesScope() { return ppcBuiltinTypesScope_; }
   const Scope *GetPPCBuiltinsScope() const { return ppcBuiltinsScope_; }
@@ -292,6 +294,7 @@ class SemanticsContext {
   const Scope *builtinsScope_{nullptr}; // module __Fortran_builtins
   Scope *ppcBuiltinTypesScope_{nullptr}; // module __Fortran_PPC_types
   std::optional<const Scope *> cudaBuiltinsScope_; // module __CUDA_builtins
+  std::optional<const Scope *> cudaDeviceScope_; // module cudadevice
   const Scope *ppcBuiltinsScope_{nullptr}; // module __ppc_intrinsics
   std::list<parser::Program> modFileParseTrees_;
   std::unique_ptr<CommonBlockMap> commonBlockMap_;
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index e2875081b732c..1ade71805b70d 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -3797,6 +3797,26 @@ bool SubprogramVisitor::Pre(const parser::PrefixSpec::Attributes &attrs) {
         subp->set_cudaSubprogramAttrs(attr);
       }
     }
+    if (auto attrs{subp->cudaSubprogramAttrs()}) {
+      if (*attrs == common::CUDASubprogramAttrs::Global ||
+          *attrs == common::CUDASubprogramAttrs::Device) {
+        const Scope &scope{currScope()};
+        const Scope *mod{FindModuleContaining(scope)};
+        if (mod && mod->GetName().value() == "cudadevice") {
+          return false;
+        }
+        // Implicitly USE the cudadevice module by copying its symbols in the
+        // current scope.
+        const Scope &cudaDeviceScope{context().GetCUDADeviceScope()};
+        for (auto sym : cudaDeviceScope.GetSymbols()) {
+          if (!currScope().FindSymbol(sym->name())) {
+            auto &localSymbol{MakeSymbol(sym->name())};
+            localSymbol.set_details(UseDetails{sym->name(), *sym});
+            localSymbol.flags() = sym->flags();
+          }
+        }
+      }
+    }
   }
   return false;
 }
diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp
index 6ccd915c4dcbf..d51cc62d804e8 100644
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@@ -543,6 +543,14 @@ const Scope &SemanticsContext::GetCUDABuiltinsScope() {
   return **cudaBuiltinsScope_;
 }
 
+const Scope &SemanticsContext::GetCUDADeviceScope() {
+  if (!cudaDeviceScope_) {
+    cudaDeviceScope_ = GetBuiltinModule("cudadevice");
+    CHECK(cudaDeviceScope_.value() != nullptr);
+  }
+  return **cudaDeviceScope_;
+}
+
 void SemanticsContext::UsePPCBuiltinsModule() {
   if (ppcBuiltinsScope_ == nullptr) {
     ppcBuiltinsScope_ = GetBuiltinModule("__ppc_intrinsics");
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
new file mode 100644
index 0000000000000..f34820dd10792
--- /dev/null
+++ b/flang/module/cudadevice.f90
@@ -0,0 +1,74 @@
+!===-- module/cudedevice.f90 -----------------------------------------------===!
+!
+! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+! See https://llvm.org/LICENSE.txt for license information.
+! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+!
+!===------------------------------------------------------------------------===!
+
+! CUDA Fortran procedures available in device subprogram
+
+module cudadevice
+implicit none
+
+  ! Set PRIVATE by default to explicitly only export what is meant
+  ! to be exported by this MODULE.
+  private
+
+  ! Synchronization Functions
+
+  interface
+    attributes(device) subroutine syncthreads()
+    end subroutine
+  end interface
+  public :: syncthreads
+
+  interface
+    attributes(device) integer function syncthreads_and(value)
+      integer :: value
+    end function
+  end interface
+  public :: syncthreads_and
+
+  interface
+    attributes(device) integer function syncthreads_count(value)
+      integer :: value
+    end function
+  end interface
+  public :: syncthreads_count
+
+  interface
+    attributes(device) integer function syncthreads_or(value)
+      integer :: value
+    end function
+  end interface
+  public :: syncthreads_or
+
+  interface
+    attributes(device) subroutine syncwarp(mask)
+      integer :: mask
+    end subroutine
+  end interface
+  public :: syncwarp
+
+  ! Memory Fences
+
+  interface
+    attributes(device) subroutine threadfence()
+    end subroutine
+  end interface
+  public :: threadfence
+
+  interface
+    attributes(device) subroutine threadfence_block()
+    end subroutine
+  end interface
+  public :: threadfence_block
+
+  interface
+    attributes(device) subroutine threadfence_system()
+    end subroutine
+  end interface
+  public :: threadfence_system
+
+end module
diff --git a/flang/test/Semantics/cuf-device-procedures01.cuf b/flang/test/Semantics/cuf-device-procedures01.cuf
new file mode 100644
index 0000000000000..b9918d8a4ae4c
--- /dev/null
+++ b/flang/test/Semantics/cuf-device-procedures01.cuf
@@ -0,0 +1,35 @@
+! RUN: %flang_fc1 -fdebug-dump-symbols %s | FileCheck %s
+
+! Test CUDA Fortran intrinsic can pass semantic
+
+attributes(global) subroutine devsub()
+  implicit none
+  integer :: ret
+
+  ! 3.6.4. Synchronization Functions
+  call syncthreads()
+  call syncwarp(1)
+  call threadfence()
+  call threadfence_block()
+  call threadfence_system()
+  ret = syncthreads_and(1)
+  ret = syncthreads_count(1)
+  ret = syncthreads_or(1)
+end
+
+! CHECK-LABEL: Subprogram scope: devsub
+! CHECK: syncthreads (Subroutine): Use from syncthreads in cudadevice
+! CHECK: syncthreads_and (Function): Use from syncthreads_and in cudadevice
+! CHECK: syncthreads_count (Function): Use from syncthreads_count in cudadevice
+! CHECK: syncthreads_or (Function): Use from syncthreads_or in cudadevice
+! CHECK: syncwarp (Subroutine): Use from syncwarp in cudadevice
+! CHECK: threadfence (Subroutine): Use from threadfence in cudadevice
+! CHECK: threadfence_block (Subroutine): Use from threadfence_block in cudadevice
+! CHECK: threadfence_system (Subroutine): Use from threadfence_system in cudadevice
+
+subroutine host()
+  call syncthreads()
+end subroutine
+
+! CHECK-LABEL: Subprogram scope: host
+! CHECK: syncthreads, EXTERNAL: HostAssoc{{$}}
diff --git a/flang/test/Semantics/cuf-device-procedures02.cuf b/flang/test/Semantics/cuf-device-procedures02.cuf
new file mode 100644
index 0000000000000..c93fc4033b8f0
--- /dev/null
+++ b/flang/test/Semantics/cuf-device-procedures02.cuf
@@ -0,0 +1,17 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1
+
+module dev
+  integer, device :: syncthreads
+
+contains
+
+  attributes(device) subroutine sub1()
+    syncthreads = 1 ! syncthreads not overwritten by cudadevice
+  end subroutine
+
+  attributes(global) subroutine sub2()
+!ERROR: 'threadfence' is use-associated from module 'cudadevice' and cannot be re-declared
+    integer :: threadfence
+  end subroutine
+end module
+
diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 64815a1f5da62..73046b9d538ea 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -12,6 +12,7 @@ set(MODULES
   "__ppc_intrinsics"
   "mma"
   "__cuda_builtins"
+  "cudadevice"
   "ieee_arithmetic"
   "ieee_exceptions"
   "ieee_features"
@@ -26,11 +27,14 @@ set(MODULES
 if (NOT CMAKE_CROSSCOMPILING)
   foreach(filename ${MODULES})
     set(depends "")
+    set(opts "")
     if(${filename} STREQUAL "__fortran_builtins" OR
        ${filename} STREQUAL "__ppc_types")
     elseif(${filename} STREQUAL "__ppc_intrinsics" OR
            ${filename} STREQUAL "mma")
       set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod)
+    elseif(${filename} STREQUAL "cudadevice")
+      set(opts "-xcuda")
     else()
       set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod)
       if(NOT ${filename} STREQUAL "__fortran_type_info")
@@ -43,9 +47,8 @@ if (NOT CMAKE_CROSSCOMPILING)
     endif()
 
     # The module contains PPC vector types that needs the PPC target.
-    set(opts "")
-      if(${filename} STREQUAL "__ppc_intrinsics" OR
-         ${filename} STREQUAL "mma")
+    if(${filename} STREQUAL "__ppc_intrinsics" OR
+       ${filename} STREQUAL "mma")
       if (PowerPC IN_LIST LLVM_TARGETS_TO_BUILD)
         set(opts "--target=ppc64le")
       else()
@@ -58,7 +61,7 @@ if (NOT CMAKE_CROSSCOMPILING)
     # TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support
     add_custom_command(OUTPUT ${base}.mod
       COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR}
-      COMMAND flang-new -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR}
+      COMMAND flang-new -fc1 -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR}
         ${FLANG_SOURCE_DIR}/module/${filename}.f90
       DEPENDS flang-new ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends}
     )

flang/lib/Semantics/resolve-names.cpp Outdated Show resolved Hide resolved
@clementval clementval merged commit 6d50a79 into llvm:main May 17, 2024
3 of 4 checks passed
@clementval clementval deleted the cuda_device_func2 branch May 17, 2024 20:58
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
flang:semantics flang Flang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants