diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp index 6312e61f5e62a..4c0d266428632 100644 --- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp @@ -1122,13 +1122,7 @@ CUDAIntrinsicLibrary::genSyncThreadsOr(mlir::Type resultType, void CUDAIntrinsicLibrary::genSyncWarp( llvm::ArrayRef args) { assert(args.size() == 1); - constexpr llvm::StringLiteral funcName = "llvm.nvvm.bar.warp.sync"; - mlir::Value mask = fir::getBase(args[0]); - mlir::FunctionType funcType = - mlir::FunctionType::get(builder.getContext(), {mask.getType()}, {}); - auto funcOp = builder.createFunction(loc, funcName, funcType); - llvm::SmallVector argsList{mask}; - fir::CallOp::create(builder, loc, funcOp, argsList); + mlir::NVVM::SyncWarpOp::create(builder, loc, fir::getBase(args[0])); } // THIS_GRID diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 2d2c801b48f4d..9f8f74a0c7b5e 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -105,7 +105,7 @@ end ! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc} ! CHECK: nvvm.barrier0 -! CHECK: fir.call @llvm.nvvm.bar.warp.sync(%c1{{.*}}) fastmath : (i32) -> () +! CHECK: nvvm.bar.warp.sync %c1{{.*}} : i32 ! CHECK: fir.call @llvm.nvvm.membar.gl() fastmath : () -> () ! CHECK: fir.call @llvm.nvvm.membar.cta() fastmath : () -> () ! CHECK: fir.call @llvm.nvvm.membar.sys() fastmath : () -> () @@ -219,7 +219,7 @@ end ! CHECK-LABEL: func.func @_QPhost1() ! CHECK: cuf.kernel ! CHECK: nvvm.barrier0 -! CHECK: fir.call @llvm.nvvm.bar.warp.sync(%c1{{.*}}) fastmath : (i32) -> () +! CHECK: nvvm.bar.warp.sync %c1{{.*}} : i32 ! CHECK: fir.call @llvm.nvvm.barrier0.and(%c1{{.*}}) fastmath : (i32) -> i32 ! CHECK: fir.call @llvm.nvvm.barrier0.popc(%c1{{.*}}) fastmath : (i32) -> i32 ! CHECK: fir.call @llvm.nvvm.barrier0.or(%c1{{.*}}) fastmath : (i32) -> i32