From 7d00ddd8971ba4ce98b70a1dd0f390cdc78909a9 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Thu, 13 Nov 2025 13:35:19 -0800 Subject: [PATCH] [mlir][NVVM] Make sure barrier reduction attr can roundtrip --- flang/test/Lower/CUDA/cuda-device-proc.cuf | 18 +++++++-------- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 2 +- mlir/test/Target/LLVMIR/nvvm/barrier.mlir | 25 +++++++++++++-------- 3 files changed, 26 insertions(+), 19 deletions(-) diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index ef15bf8d7726d..69fefcf972065 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -103,24 +103,24 @@ end ! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc} ! CHECK: nvvm.barrier0 ! CHECK: nvvm.bar.warp.sync %c1{{.*}} : i32 -! CHECK: %{{.*}} = nvvm.barrier %c1{{.*}} -> i32 +! CHECK: %{{.*}} = nvvm.barrier #nvvm.reduction %c1{{.*}} -> i32 ! CHECK: %[[A:.*]] = fir.load %{{.*}} : !fir.ref ! CHECK: %[[B:.*]] = fir.load %{{.*}} : !fir.ref ! CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[A]], %[[B]] : i32 ! CHECK: %[[CONV:.*]] = fir.convert %[[CMP]] : (i1) -> i32 -! CHECK: %{{.*}} = nvvm.barrier %[[CONV]] -> i32 -! CHECK: %{{.*}} = nvvm.barrier %c1{{.*}} -> i32 +! CHECK: %{{.*}} = nvvm.barrier #nvvm.reduction %[[CONV]] -> i32 +! CHECK: %{{.*}} = nvvm.barrier #nvvm.reduction %c1{{.*}} -> i32 ! CHECK: %[[A:.*]] = fir.load %{{.*}} : !fir.ref ! CHECK: %[[B:.*]] = fir.load %{{.*}} : !fir.ref ! CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[A]], %[[B]] : i32 ! CHECK: %[[CONV:.*]] = fir.convert %[[CMP]] : (i1) -> i32 -! CHECK: %{{.*}} = nvvm.barrier %[[CONV]] -> i32 -! CHECK: %{{.*}} = nvvm.barrier %c1{{.*}} -> i32 +! CHECK: %{{.*}} = nvvm.barrier #nvvm.reduction %[[CONV]] -> i32 +! CHECK: %{{.*}} = nvvm.barrier #nvvm.reduction %c1{{.*}} -> i32 ! CHECK: %[[A:.*]] = fir.load %{{.*}} : !fir.ref ! CHECK: %[[B:.*]] = fir.load %{{.*}} : !fir.ref ! CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[A]], %[[B]] : i32 ! CHECK: %[[CONV:.*]] = fir.convert %[[CMP]] : (i1) -> i32 -! CHECK: %{{.*}} = nvvm.barrier %[[CONV]] -> i32 +! CHECK: %{{.*}} = nvvm.barrier #nvvm.reduction %[[CONV]] -> i32 ! CHECK: %{{.*}} = llvm.atomicrmw add %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 ! CHECK: %{{.*}} = llvm.atomicrmw add %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64 ! CHECK: %{{.*}} = llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32 @@ -214,9 +214,9 @@ end ! CHECK: cuf.kernel ! CHECK: nvvm.barrier0 ! CHECK: nvvm.bar.warp.sync %c1{{.*}} : i32 -! CHECK: nvvm.barrier %c1{{.*}} -> i32 -! CHECK: nvvm.barrier %c1{{.*}} -> i32 -! CHECK: nvvm.barrier %c1{{.*}} -> i32 +! CHECK: nvvm.barrier #nvvm.reduction %c1{{.*}} -> i32 +! CHECK: nvvm.barrier #nvvm.reduction %c1{{.*}} -> i32 +! CHECK: nvvm.barrier #nvvm.reduction %c1{{.*}} -> i32 attributes(device) subroutine testMatch() integer :: a, ipred, mask, v32 diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 1c30d754a1792..995ade5c9b033 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -994,7 +994,7 @@ def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> { let assemblyFormat = "(`id` `=` $barrierId^)? (`number_of_threads` `=` $numberOfThreads^)? " - "($reductionOp^ $reductionPredicate)? (`->` type($res)^)? attr-dict"; + "(qualified($reductionOp)^ $reductionPredicate)? (`->` type($res)^)? attr-dict"; let builders = [OpBuilder<(ins), [{ return build($_builder, $_state, TypeRange{}, Value{}, Value{}, {}, Value{}); diff --git a/mlir/test/Target/LLVMIR/nvvm/barrier.mlir b/mlir/test/Target/LLVMIR/nvvm/barrier.mlir index d89f93101c1fc..1887f230bc952 100644 --- a/mlir/test/Target/LLVMIR/nvvm/barrier.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/barrier.mlir @@ -1,19 +1,26 @@ -// RUN: mlir-translate -mlir-to-llvmir %s -split-input-file --verify-diagnostics | FileCheck %s +// RUN: mlir-translate -mlir-to-llvmir %s -split-input-file --verify-diagnostics | FileCheck %s --check-prefix=LLVM +// RUN: mlir-opt %s -split-input-file | mlir-opt | FileCheck %s -// CHECK-LABEL: @llvm_nvvm_barrier( -// CHECK-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]], i32 %[[redOperand:.*]]) +// LLVM-LABEL: @llvm_nvvm_barrier( +// LLVM-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]], i32 %[[redOperand:.*]]) llvm.func @llvm_nvvm_barrier(%barID : i32, %numberOfThreads : i32, %redOperand : i32) { - // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0) + // LLVM: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0) + // CHECK: nvvm.barrier nvvm.barrier - // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 %[[barId]]) + // LLVM: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 %[[barId]]) + // CHECK: nvvm.barrier id = %{{.*}} nvvm.barrier id = %barID - // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.count(i32 %[[barId]], i32 %[[numThreads]]) + // LLVM: call void @llvm.nvvm.barrier.cta.sync.aligned.count(i32 %[[barId]], i32 %[[numThreads]]) + // CHECK: nvvm.barrier id = %{{.*}} number_of_threads = %{{.*}} nvvm.barrier id = %barID number_of_threads = %numberOfThreads - // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.and(i32 %[[redOperand]]) + // LLVM: %{{.*}} = call i32 @llvm.nvvm.barrier0.and(i32 %[[redOperand]]) + // CHECK: %{{.*}} = nvvm.barrier #nvvm.reduction %{{.*}} -> i32 %0 = nvvm.barrier #nvvm.reduction %redOperand -> i32 - // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.or(i32 %[[redOperand]]) + // LLVM: %{{.*}} = call i32 @llvm.nvvm.barrier0.or(i32 %[[redOperand]]) + // CHECK: %{{.*}} = nvvm.barrier #nvvm.reduction %{{.*}} -> i32 %1 = nvvm.barrier #nvvm.reduction %redOperand -> i32 - // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.popc(i32 %[[redOperand]]) + // LLVM: %{{.*}} = call i32 @llvm.nvvm.barrier0.popc(i32 %[[redOperand]]) + // CHECK: %{{.*}} = nvvm.barrier #nvvm.reduction %{{.*}} -> i32 %2 = nvvm.barrier #nvvm.reduction %redOperand -> i32 llvm.return