diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 15ea84565dd75..dbfe9a69f794f 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -9455,7 +9455,7 @@ void IntrinsicLibrary::genTMABulkS2G(llvm::ArrayRef args) { builder, loc, dst, src, fir::getBase(args[2]), {}, {}); mlir::NVVM::InlinePtxOp::create(builder, loc, mlir::TypeRange{}, {}, {}, - "cp.async.bulk.commit_group", {}); + "cp.async.bulk.commit_group;", {}); mlir::NVVM::CpAsyncBulkWaitGroupOp::create(builder, loc, builder.getI32IntegerAttr(0), {}); } @@ -9471,7 +9471,7 @@ static void genTMABulkStore(fir::FirOpBuilder &builder, mlir::Location loc, mlir::NVVM::CpAsyncBulkSharedCTAToGlobalOp::create(builder, loc, dst, src, size, {}, {}); mlir::NVVM::InlinePtxOp::create(builder, loc, mlir::TypeRange{}, {}, {}, - "cp.async.bulk.commit_group", {}); + "cp.async.bulk.commit_group;", {}); mlir::NVVM::CpAsyncBulkWaitGroupOp::create(builder, loc, builder.getI32IntegerAttr(0), {}); } diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 09b4302446ee7..bb626baab77dc 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -490,7 +490,7 @@ end subroutine ! CHECK-LABEL: func.func @_QPtest_bulk_s2g ! CHECL: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> -! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;" ! CHECK: nvvm.cp.async.bulk.wait_group 0 attributes(device) subroutine testAtomicCasLoop(aa, n) @@ -671,7 +671,7 @@ end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_c4 ! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> -! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;" ! CHECK: nvvm.cp.async.bulk.wait_group 0 attributes(global) subroutine test_tma_bulk_store_c8(c, n) @@ -684,7 +684,7 @@ end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_c8 ! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> -! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;" ! CHECK: nvvm.cp.async.bulk.wait_group 0 attributes(global) subroutine test_tma_bulk_store_i4(c, n) @@ -697,7 +697,7 @@ end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_i4 ! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> -! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;" ! CHECK: nvvm.cp.async.bulk.wait_group 0 attributes(global) subroutine test_tma_bulk_store_i8(c, n) @@ -710,7 +710,7 @@ end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_i8 ! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> -! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;" ! CHECK: nvvm.cp.async.bulk.wait_group 0 @@ -724,7 +724,7 @@ end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_r2 ! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> -! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;" ! CHECK: nvvm.cp.async.bulk.wait_group 0 attributes(global) subroutine test_tma_bulk_store_r4(c, n) @@ -737,7 +737,7 @@ end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_r4 ! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> -! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;" ! CHECK: nvvm.cp.async.bulk.wait_group 0 attributes(global) subroutine test_tma_bulk_store_r8(c, n) @@ -750,5 +750,5 @@ end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_r8 ! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> -! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;" ! CHECK: nvvm.cp.async.bulk.wait_group 0