Skip to content

Conversation

@clementval
Copy link
Contributor

No description provided.

@clementval clementval requested a review from wangzpgi October 22, 2025 20:40
@llvmbot llvmbot added flang Flang issues not falling into any other category flang:fir-hlfir labels Oct 22, 2025
@llvmbot
Copy link
Member

llvmbot commented Oct 22, 2025

@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/164706.diff

3 Files Affected:

  • (modified) flang/lib/Optimizer/Builder/IntrinsicCall.cpp (+12-3)
  • (modified) flang/module/cudadevice.f90 (+21-12)
  • (modified) flang/test/Lower/CUDA/cuda-device-proc.cuf (+22-4)
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 29eedfb0ce9cd..d2a36d4bdcc86 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -989,9 +989,18 @@ static constexpr IntrinsicHandler handlers[]{
        {"mask", asBox, handleDynamicOptional}}},
      /*isElemental=*/false},
     {"syncthreads", &I::genSyncThreads, {}, /*isElemental=*/false},
-    {"syncthreads_and", &I::genSyncThreadsAnd, {}, /*isElemental=*/false},
-    {"syncthreads_count", &I::genSyncThreadsCount, {}, /*isElemental=*/false},
-    {"syncthreads_or", &I::genSyncThreadsOr, {}, /*isElemental=*/false},
+    {"syncthreads_and_i4", &I::genSyncThreadsAnd, {}, /*isElemental=*/false},
+    {"syncthreads_and_l4", &I::genSyncThreadsAnd, {}, /*isElemental=*/false},
+    {"syncthreads_count_i4",
+     &I::genSyncThreadsCount,
+     {},
+     /*isElemental=*/false},
+    {"syncthreads_count_l4",
+     &I::genSyncThreadsCount,
+     {},
+     /*isElemental=*/false},
+    {"syncthreads_or_i4", &I::genSyncThreadsOr, {}, /*isElemental=*/false},
+    {"syncthreads_or_l4", &I::genSyncThreadsOr, {}, /*isElemental=*/false},
     {"syncwarp", &I::genSyncWarp, {}, /*isElemental=*/false},
     {"system",
      &I::genSystem,
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index 22df9cdf410d5..5182950cbffea 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -21,23 +21,32 @@ module cudadevice
     procedure :: syncthreads
   end interface
 
-  interface
-    attributes(device) integer function syncthreads_and(value)
-      integer, value :: value
+  interface syncthreads_and
+    attributes(device) integer function syncthreads_and_i4(value)
+      integer(4), value :: value
     end function
-  end interface
+    attributes(device) integer function syncthreads_and_l4(value)
+      logical(4), value :: value
+    end function
+  end interface syncthreads_and
 
-  interface
-    attributes(device) integer function syncthreads_count(value)
-      integer, value :: value
+  interface syncthreads_count
+    attributes(device) integer function syncthreads_count_i4(value)
+      integer(4), value :: value
     end function
-  end interface
+    attributes(device) integer function syncthreads_count_l4(value)
+      logical(4), value :: value
+    end function
+  end interface syncthreads_count
 
-  interface
-    attributes(device) integer function syncthreads_or(value)
-      integer, value :: value
+  interface syncthreads_or
+    attributes(device) integer function syncthreads_or_i4(value)
+      integer(4), value :: value
     end function
-  end interface
+    attributes(device) integer function syncthreads_or_l4(value)
+      logical(4), value :: value
+    end function
+  end interface syncthreads_or
 
   interface
     attributes(device) subroutine syncwarp(mask)
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 29c348c5260a5..55bb587dcf681 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -12,17 +12,23 @@ attributes(global) subroutine devsub()
   integer(8) :: al
   integer(8) :: time
   integer :: smalltime
-  integer(4) :: res
+  integer(4) :: res, offset
   integer(8) :: resl
 
+  integer :: tid
+  tid = threadIdx%x
+
   call syncthreads()
   call syncwarp(1)
   call threadfence()
   call threadfence_block()
   call threadfence_system()
   ret = syncthreads_and(1)
+  res = syncthreads_and(tid > offset)
   ret = syncthreads_count(1)
+  ret = syncthreads_count(tid > offset)
   ret = syncthreads_or(1)
+  ret = syncthreads_or(tid > offset)
 
   ai = atomicadd(ai, 1_4)
   al = atomicadd(al, 1_8)
@@ -100,9 +106,21 @@ end
 ! CHECK: fir.call @llvm.nvvm.membar.gl() fastmath<contract> : () -> ()
 ! CHECK: fir.call @llvm.nvvm.membar.cta() fastmath<contract> : () -> ()
 ! CHECK: fir.call @llvm.nvvm.membar.sys() fastmath<contract> : () -> ()
-! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.and(%c1_i32_0) fastmath<contract> : (i32) -> i32
-! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.popc(%c1_i32_1) fastmath<contract> : (i32) -> i32
-! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.or(%c1_i32_2) fastmath<contract> : (i32) -> i32
+! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.and(%c1{{.*}}) fastmath<contract> : (i32) -> i32
+! CHECK: %[[A:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[B:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[A]], %[[B]] : i32
+! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.and(%[[CMP]])
+! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.popc(%c1{{.*}}) fastmath<contract> : (i32) -> i32
+! CHECK: %[[A:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[B:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[A]], %[[B]] : i32
+! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.popc(%[[CMP]]) fastmath<contract> : (i1) -> i32
+! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.or(%c1{{.*}}) fastmath<contract> : (i32) -> i32
+! CHECK: %[[A:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[B:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[A]], %[[B]] : i32
+! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.or(%[[CMP]]) fastmath<contract> : (i1) -> i32
 ! CHECK: %{{.*}} = llvm.atomicrmw add  %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
 ! CHECK: %{{.*}} = llvm.atomicrmw add  %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64
 ! CHECK: %{{.*}} = llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32

@clementval clementval enabled auto-merge (squash) October 22, 2025 21:11
@clementval clementval merged commit 07ed101 into llvm:main Oct 22, 2025
13 checks passed
mikolaj-pirog pushed a commit to mikolaj-pirog/llvm-project that referenced this pull request Oct 23, 2025
dvbuka pushed a commit to dvbuka/llvm-project that referenced this pull request Oct 27, 2025
Lukacma pushed a commit to Lukacma/llvm-project that referenced this pull request Oct 29, 2025
aokblast pushed a commit to aokblast/llvm-project that referenced this pull request Oct 30, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

flang:fir-hlfir flang Flang issues not falling into any other category

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants