diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 308bae21d98e9..53371e8991b90 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -211,6 +211,27 @@ def SharedSpaceAttr : EnumAttr { let assemblyFormat = "`<` $value `>`"; } +// Attrs describing the Memory Ordering Semantics +def MemOrderKindWeak : I32EnumAttrCase<"WEAK", 0, "weak">; +def MemOrderKindRelaxed : I32EnumAttrCase<"RELAXED", 1, "relaxed">; +def MemOrderKindAcquire : I32EnumAttrCase<"ACQUIRE", 2, "acquire">; +def MemOrderKindRelease : I32EnumAttrCase<"RELEASE", 3, "release">; +def MemOrderKindAcqRel : I32EnumAttrCase<"ACQ_REL", 4, "acq_rel">; +def MemOrderKindSC : I32EnumAttrCase<"SC", 5, "sc">; +def MemOrderKindMMIO : I32EnumAttrCase<"MMIO", 6, "mmio">; +def MemOrderKindVolatile : I32EnumAttrCase<"VOLATILE", 7, "volatile">; + +def MemOrderKind : I32EnumAttr<"MemOrderKind", "NVVM Memory Ordering kind", + [MemOrderKindWeak, MemOrderKindRelaxed, MemOrderKindAcquire, + MemOrderKindRelease, MemOrderKindAcqRel, MemOrderKindSC, + MemOrderKindMMIO, MemOrderKindVolatile]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::NVVM"; +} +def MemOrderKindAttr : EnumAttr { + let assemblyFormat = "`<` $value `>`"; +} + //===----------------------------------------------------------------------===// // NVVM intrinsic operations //===----------------------------------------------------------------------===// @@ -1372,6 +1393,27 @@ def NVVM_ClusterWaitOp : NVVM_Op<"cluster.wait", [NVVMRequiresSM<90>]> { let assemblyFormat = "attr-dict"; } +//===----------------------------------------------------------------------===// +// NVVM Member/Fence +//===----------------------------------------------------------------------===// + +def NVVM_MembarOp : NVVM_Op<"memory.barrier">, + Arguments<(ins MemScopeKindAttr:$scope)> { + let summary = "Memory barrier operation"; + let description = [{ + `membar` operation guarantees that prior memory accesses requested by this + thread are performed at the specified `scope`, before later memory + operations requested by this thread following the membar instruction. + + [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-membar) + }]; + + let assemblyFormat = "$scope attr-dict"; + let llvmBuilder = [{ + createIntrinsicCall(builder, getMembarIntrinsicID($scope)); + }]; +} + def NVVM_FenceScClusterOp : NVVM_Op<"fence.sc.cluster"> { string llvmBuilder = [{ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_fence_sc_cluster); @@ -1379,6 +1421,38 @@ def NVVM_FenceScClusterOp : NVVM_Op<"fence.sc.cluster"> { let assemblyFormat = "attr-dict"; } +def NVVM_FenceSyncRestrictOp : NVVM_Op<"fence.sync_restrict">, + Arguments<(ins MemOrderKindAttr:$order)> { + let summary = "Uni-directional thread fence operation"; + let description = [{ + The `nvvm.fence.sync_restrict` Op restricts the class of memory + operations for which the fence instruction provides the memory ordering guarantees. + `sync_restrict` restricts `acquire` memory semantics to `shared_cluster` and + `release` memory semantics to `shared_cta` with cluster scope. + [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar) + }]; + + let assemblyFormat = "attr-dict"; + let llvmBuilder = [{ + createIntrinsicCall(builder, getFenceSyncRestrictID($order)); + }]; + + let hasVerifier = 1; +} + +def NVVM_FenceMbarrierInitOp : NVVM_Op<"fence.mbarrier.init"> { + let description = [{ + Fence operation that applies on the prior nvvm.mbarrier.init + + [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar) + }]; + + let assemblyFormat = "attr-dict"; + let llvmBuilder = [{ + createIntrinsicCall(builder, llvm::Intrinsic::nvvm_fence_mbarrier_init_release_cluster); + }]; +} + def ProxyAlias : I32EnumAttrCase<"alias", 0, "alias">; def ProxyAsync : I32EnumAttrCase<"async", 1, "async">; def ProxyAsyncGlobal : I32EnumAttrCase<"async_global", 2, "async.global">; @@ -1392,10 +1466,15 @@ def ProxyKind : I32EnumAttr<"ProxyKind", "Proxy kind", } def ProxyKindAttr : EnumAttr { + let description = [{ + ProxyKind attribute represents a memory proxy which is an abstract label + applied to a method of memory access. When two memory operations use distinct + methods of memory access, they are said to be different proxies. + }]; let assemblyFormat = "`<` $value `>`"; } -def NVVM_FenceProxyOp : NVVM_PTXBuilder_Op<"fence.proxy">, +def NVVM_FenceProxyOp : NVVM_Op<"fence.proxy">, Arguments<(ins ProxyKindAttr:$kind, OptionalAttr:$space)> { let description = [{ @@ -1406,16 +1485,11 @@ def NVVM_FenceProxyOp : NVVM_PTXBuilder_Op<"fence.proxy">, }]; let assemblyFormat = "attr-dict"; - let extraClassDefinition = [{ - std::string $cppClass::getPtx() { - std::string ptx = "fence.proxy."; - ptx += stringifyProxyKind(getKind()); - if(getKind() == NVVM::ProxyKind::async_shared) - { ptx += "::"; ptx += stringifySharedSpace(getSpace().value()); } - ptx += ";"; - return ptx; - } + + let llvmBuilder = [{ + createIntrinsicCall(builder, getFenceProxyID($kind, $space)); }]; + let hasVerifier = 1; } @@ -1452,23 +1526,6 @@ def NVVM_FenceProxyAcquireOp : NVVM_Op<"fence.proxy.acquire">, let hasVerifier = 1; } -def NVVM_MembarOp : NVVM_Op<"memory.barrier">, - Arguments<(ins MemScopeKindAttr:$scope)> { - let summary = "Memory barrier operation"; - let description = [{ - `membar` operation guarantees that prior memory accesses requested by this - thread are performed at the specified `scope`, before later memory - operations requested by this thread following the membar instruction. - - [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-membar) - }]; - - let assemblyFormat = "$scope attr-dict"; - let llvmBuilder = [{ - createIntrinsicCall(builder, getMembarIntrinsicID($scope), {}); - }]; -} - def NVVM_FenceProxyReleaseOp : NVVM_Op<"fence.proxy.release">, Arguments<(ins MemScopeKindAttr:$scope, DefaultValuedAttr, let hasVerifier = 1; } +def NVVM_FenceProxySyncRestrictOp : NVVM_Op<"fence.proxy.sync_restrict">, + Arguments<(ins MemOrderKindAttr:$order, + DefaultValuedAttr:$fromProxy, + DefaultValuedAttr:$toProxy)> { + let summary = "Uni-directional proxy fence operation with sync_restrict"; + let description = [{ + The `nvvm.fence.proxy.sync_restrict` Op used to establish + ordering between a prior memory access performed between proxies. Currently, + the ordering is only supported between async and generic proxies. `sync_restrict` + restricts `acquire` memory semantics to `shared_cluster` and `release` memory + semantics to `shared_cta` with cluster scope. + [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar) + }]; + + let assemblyFormat = "attr-dict"; + let llvmBuilder = [{ + createIntrinsicCall(builder, getFenceProxySyncRestrictID($order)); + }]; + + let hasVerifier = 1; +} + def SetMaxRegisterActionIncrease : I32EnumAttrCase<"increase", 0>; def SetMaxRegisterActionDecrease : I32EnumAttrCase<"decrease", 1>; def SetMaxRegisterAction : I32EnumAttr<"SetMaxRegisterAction", "NVVM set max register action", @@ -1517,22 +1596,6 @@ def NVVM_SetMaxRegisterOp : NVVM_Op<"setmaxregister"> { }]; } -def NVVM_FenceMbarrierInitOp : NVVM_PTXBuilder_Op<"fence.mbarrier.init"> { - let arguments = (ins ); - let description = [{ - Fence operation that applies on the prior nvvm.mbarrier.init - - [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar) - }]; - - let assemblyFormat = "attr-dict"; - let extraClassDefinition = [{ - std::string $cppClass::getPtx() { - return std::string("fence.mbarrier_init.release.cluster;"); - } - }]; -} - def ShflKindBfly : I32EnumAttrCase<"bfly", 0>; def ShflKindUp : I32EnumAttrCase<"up", 1>; def ShflKindDown : I32EnumAttrCase<"down", 2>; diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index cb0d70361aec9..5ce56e6399e31 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -2098,6 +2098,13 @@ bool NVVM::WgmmaMmaAsyncOp::getAsmValues( return true; // Has manual mapping } +LogicalResult NVVM::FenceSyncRestrictOp::verify() { + if (getOrder() != NVVM::MemOrderKind::ACQUIRE && + getOrder() != NVVM::MemOrderKind::RELEASE) + return emitOpError("only acquire and release semantics are supported"); + return success(); +} + LogicalResult NVVM::FenceProxyOp::verify() { if (getKind() == NVVM::ProxyKind::TENSORMAP) return emitOpError() << "tensormap proxy is not a supported proxy kind"; @@ -2120,7 +2127,6 @@ LogicalResult NVVM::FenceProxyAcquireOp::verify() { if (getToProxy() != NVVM::ProxyKind::TENSORMAP) return emitOpError("uni-directional proxies only support tensormap " "for to_proxy attribute"); - return success(); } @@ -2132,7 +2138,19 @@ LogicalResult NVVM::FenceProxyReleaseOp::verify() { if (getToProxy() != NVVM::ProxyKind::TENSORMAP) return emitOpError("uni-directional proxies only support tensormap " "for to_proxy attribute"); + return success(); +} + +LogicalResult NVVM::FenceProxySyncRestrictOp::verify() { + if (getOrder() != NVVM::MemOrderKind::ACQUIRE && + getOrder() != NVVM::MemOrderKind::RELEASE) + return emitOpError("only acquire and release semantics are supported"); + + if (getFromProxy() != NVVM::ProxyKind::GENERIC) + return emitOpError("only generic is support for from_proxy attribute"); + if (getToProxy() != NVVM::ProxyKind::async) + return emitOpError("only async is supported for to_proxy attribute"); return success(); } diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp index cecff51e637a5..b7427a559fb79 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp @@ -411,6 +411,41 @@ getTcgen05StIntrinsicID(mlir::NVVM::Tcgen05LdStShape shape, uint32_t num) { llvm_unreachable("unhandled tcgen05.st lowering"); } +static llvm::Intrinsic::ID getFenceSyncRestrictID(NVVM::MemOrderKind order) { + return order == NVVM::MemOrderKind::ACQUIRE + ? llvm::Intrinsic:: + nvvm_fence_acquire_sync_restrict_space_cluster_scope_cluster + : llvm::Intrinsic:: + nvvm_fence_release_sync_restrict_space_cta_scope_cluster; +} + +static llvm::Intrinsic::ID +getFenceProxyID(NVVM::ProxyKind kind, std::optional space) { + switch (kind) { + case NVVM::ProxyKind::alias: + return llvm::Intrinsic::nvvm_fence_proxy_alias; + case NVVM::ProxyKind::async: + return llvm::Intrinsic::nvvm_fence_proxy_async; + case NVVM::ProxyKind::async_global: + return llvm::Intrinsic::nvvm_fence_proxy_async_global; + case NVVM::ProxyKind::async_shared: + return *space == NVVM::SharedSpace::shared_cta + ? llvm::Intrinsic::nvvm_fence_proxy_async_shared_cta + : llvm::Intrinsic::nvvm_fence_proxy_async_shared_cluster; + default: + llvm_unreachable("unsupported proxy kind"); + } +} + +static llvm::Intrinsic::ID +getFenceProxySyncRestrictID(NVVM::MemOrderKind order) { + return order == NVVM::MemOrderKind::ACQUIRE + ? llvm::Intrinsic:: + nvvm_fence_proxy_async_generic_acquire_sync_restrict_space_cluster_scope_cluster + : llvm::Intrinsic:: + nvvm_fence_proxy_async_generic_release_sync_restrict_space_cta_scope_cluster; +} + namespace { /// Implementation of the dialect interface that converts operations belonging /// to the NVVM dialect to LLVM IR. diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir index fbf8d9efb3bc7..8fb36ace2c463 100644 --- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir +++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir @@ -584,29 +584,6 @@ func.func @cp_async_bulk_wait_group() { // ----- -func.func @fence_mbarrier_init() { - //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "fence.mbarrier_init.release.cluster;" - nvvm.fence.mbarrier.init - func.return -} -// ----- - -func.func @fence_proxy() { - //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "fence.proxy.alias;", "" : () -> () - nvvm.fence.proxy { kind = #nvvm.proxy_kind} - //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "fence.proxy.async;", "" : () -> () - nvvm.fence.proxy { kind = #nvvm.proxy_kind} - //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "fence.proxy.async.global;", "" : () -> () - nvvm.fence.proxy { kind = #nvvm.proxy_kind} - //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "fence.proxy.async.shared::cta;", "" : () -> () - nvvm.fence.proxy { kind = #nvvm.proxy_kind, space = #nvvm.shared_space} - //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "fence.proxy.async.shared::cluster;", "" : () -> () - nvvm.fence.proxy { kind = #nvvm.proxy_kind, space = #nvvm.shared_space} - func.return -} - -// ----- - // CHECK-LABEL: @llvm_nvvm_barrier_arrive // CHECK-SAME: (%[[barId:.*]]: i32, %[[numberOfThreads:.*]]: i32) llvm.func @llvm_nvvm_barrier_arrive(%barID : i32, %numberOfThreads : i32) { diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir index 6f67a50c1a946..579f0ac3ccad1 100644 --- a/mlir/test/Dialect/LLVMIR/nvvm.mlir +++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir @@ -92,13 +92,6 @@ func.func @llvm_nvvm_cluster_wait() { llvm.return } -// CHECK-LABEL: @llvm_nvvm_fence_sc_cluster -func.func @llvm_nvvm_fence_sc_cluster() { - // CHECK: nvvm.fence.sc.cluster - nvvm.fence.sc.cluster - llvm.return -} - // CHECK-LABEL: @nvvm_shfl func.func @nvvm_shfl( %arg0 : i32, %arg1 : i32, %arg2 : i32, diff --git a/mlir/test/Target/LLVMIR/nvvm/fence-invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/fence-invalid.mlir new file mode 100644 index 0000000000000..22578b5581da4 --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/fence-invalid.mlir @@ -0,0 +1,89 @@ +// RUN: mlir-translate --mlir-to-llvmir -verify-diagnostics -split-input-file %s + +llvm.func @fence_sync_restrict() { + // expected-error @below {{only acquire and release semantics are supported}} + nvvm.fence.sync_restrict {order = #nvvm.mem_order} + llvm.return +} + +// ----- + +llvm.func @fence_sync_restrict() { + // expected-error @below {{only acquire and release semantics are supported}} + nvvm.fence.sync_restrict {order = #nvvm.mem_order} + llvm.return +} + +// ----- + +llvm.func @fence_proxy() { + // expected-error @below {{tensormap proxy is not a supported proxy kind}} + nvvm.fence.proxy {kind = #nvvm.proxy_kind} + llvm.return +} + +// ----- + +llvm.func @fence_proxy() { + // expected-error @below {{generic proxy not a supported proxy kind}} + nvvm.fence.proxy {kind = #nvvm.proxy_kind} + llvm.return +} + +// ----- + +llvm.func @fence_proxy() { + // expected-error @below {{async_shared fence requires space attribute}} + nvvm.fence.proxy {kind = #nvvm.proxy_kind} + llvm.return +} + +// ----- + +llvm.func @fence_proxy() { + // expected-error @below {{only async_shared fence can have space attribute}} + nvvm.fence.proxy {kind = #nvvm.proxy_kind, space = #nvvm.shared_space} + llvm.return +} + +// ----- + +llvm.func @fence_proxy_release() { + // expected-error @below {{uni-directional proxies only support generic for from_proxy attribute}} + nvvm.fence.proxy.release #nvvm.mem_scope from_proxy = #nvvm.proxy_kind to_proxy = #nvvm.proxy_kind + llvm.return +} + +// ----- + +llvm.func @fence_proxy_release() { + // expected-error @below {{uni-directional proxies only support tensormap for to_proxy attribute}} + nvvm.fence.proxy.release #nvvm.mem_scope from_proxy = #nvvm.proxy_kind to_proxy = #nvvm.proxy_kind + llvm.return +} + +// ----- + +llvm.func @fence_proxy_sync_restrict() { + // expected-error @below {{only acquire and release semantics are supported}} + nvvm.fence.proxy.sync_restrict {order = #nvvm.mem_order} + llvm.return +} + +// ----- + +llvm.func @fence_proxy_sync_restrict() { + // expected-error @below {{only async is supported for to_proxy attribute}} + nvvm.fence.proxy.sync_restrict {order = #nvvm.mem_order, toProxy = #nvvm.proxy_kind, + fromProxy = #nvvm.proxy_kind} + llvm.return +} + +// ----- + +llvm.func @fence_proxy_sync_restrict() { + // expected-error @below {{only generic is support for from_proxy attribute}} + nvvm.fence.proxy.sync_restrict {order = #nvvm.mem_order, toProxy = #nvvm.proxy_kind, + fromProxy = #nvvm.proxy_kind} + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/nvvm/fence.mlir b/mlir/test/Target/LLVMIR/nvvm/fence.mlir new file mode 100644 index 0000000000000..0ab4cb74b8f54 --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/fence.mlir @@ -0,0 +1,85 @@ +// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s + +// CHECK-LABEL: @llvm_nvvm_fence_sc_cluster +llvm.func @llvm_nvvm_fence_sc_cluster() { + // CHECK: nvvm.fence.sc.cluster + nvvm.fence.sc.cluster + llvm.return +} + +// CHECK-LABEL: @nvvm_fence_sync_restrict +llvm.func @nvvm_fence_sync_restrict() { + // CHECK: call void @llvm.nvvm.fence.acquire.sync_restrict.space.cluster.scope.cluster() + nvvm.fence.sync_restrict {order = #nvvm.mem_order} + // CHECK: call void @llvm.nvvm.fence.release.sync_restrict.space.cta.scope.cluster() + nvvm.fence.sync_restrict {order = #nvvm.mem_order} + llvm.return +} + +// CHECK-LABEL: @fence_mbarrier_init +llvm.func @fence_mbarrier_init() { + // CHECK: call void @llvm.nvvm.fence.mbarrier_init.release.cluster() + nvvm.fence.mbarrier.init + llvm.return +} + +// CHECK-LABEL: @nvvm_fence_proxy +llvm.func @nvvm_fence_proxy() { + // CHECK: call void @llvm.nvvm.fence.proxy.alias() + nvvm.fence.proxy {kind = #nvvm.proxy_kind} + + // CHECK: call void @llvm.nvvm.fence.proxy.async() + nvvm.fence.proxy {kind = #nvvm.proxy_kind} + + // CHECK: call void @llvm.nvvm.fence.proxy.async.global() + nvvm.fence.proxy {kind = #nvvm.proxy_kind} + + // CHECK: call void @llvm.nvvm.fence.proxy.async.shared_cta() + nvvm.fence.proxy {kind = #nvvm.proxy_kind, space = #nvvm.shared_space} + + // CHECK: call void @llvm.nvvm.fence.proxy.async.shared_cluster() + nvvm.fence.proxy {kind = #nvvm.proxy_kind, space = #nvvm.shared_space} + llvm.return +} + +// CHECK-LABEL: @nvvm_fence_proxy_sync_restrict +llvm.func @nvvm_fence_proxy_sync_restrict() { + // CHECK: call void @llvm.nvvm.fence.proxy.async_generic.acquire.sync_restrict.space.cluster.scope.cluster() + nvvm.fence.proxy.sync_restrict {order = #nvvm.mem_order} + // CHECK: call void @llvm.nvvm.fence.proxy.async_generic.release.sync_restrict.space.cta.scope.cluster() + nvvm.fence.proxy.sync_restrict {order = #nvvm.mem_order} + llvm.return +} + +// CHECK-LABEL: @nvvm_fence_proxy_tensormap_generic_release +llvm.func @nvvm_fence_proxy_tensormap_generic_release() { + // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.cta() + nvvm.fence.proxy.release #nvvm.mem_scope + + // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.cluster() + nvvm.fence.proxy.release #nvvm.mem_scope + + // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.gpu() + nvvm.fence.proxy.release #nvvm.mem_scope + + // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.sys() + nvvm.fence.proxy.release #nvvm.mem_scope + llvm.return +} + +// CHECK-LABEL: @nvvm_fence_proxy_tensormap_generic_acquire +llvm.func @nvvm_fence_proxy_tensormap_generic_acquire(%addr : !llvm.ptr) { + %c128 = llvm.mlir.constant(128) : i32 + // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.cta(ptr {{%[0-9]+}}, i32 128) + nvvm.fence.proxy.acquire #nvvm.mem_scope %addr, %c128 + + // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.cluster(ptr {{%[0-9]+}}, i32 128) + nvvm.fence.proxy.acquire #nvvm.mem_scope %addr, %c128 + + // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.gpu(ptr {{%[0-9]+}}, i32 128) + nvvm.fence.proxy.acquire #nvvm.mem_scope %addr, %c128 + + // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.sys(ptr {{%[0-9]+}}, i32 128) + nvvm.fence.proxy.acquire #nvvm.mem_scope %addr, %c128 + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index 5cba5c4fceefd..c4a69097692cb 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -699,42 +699,6 @@ llvm.func @kernel_func(%arg0: !llvm.ptr {llvm.byval = i32, nvvm.grid_constant}, llvm.return } - -// ----- -// CHECK-LABEL: @nvvm_fence_proxy_tensormap_generic_release -llvm.func @nvvm_fence_proxy_tensormap_generic_release() { - %c128 = llvm.mlir.constant(128) : i32 - // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.cta() - nvvm.fence.proxy.release #nvvm.mem_scope - - // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.cluster() - nvvm.fence.proxy.release #nvvm.mem_scope - - // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.gpu() - nvvm.fence.proxy.release #nvvm.mem_scope - - // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.sys() - nvvm.fence.proxy.release #nvvm.mem_scope - llvm.return -} - -// ----- -// CHECK-LABEL: @nvvm_fence_proxy_tensormap_generic_acquire -llvm.func @nvvm_fence_proxy_tensormap_generic_acquire(%addr : !llvm.ptr) { - %c128 = llvm.mlir.constant(128) : i32 - // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.cta(ptr {{%[0-9]+}}, i32 128) - nvvm.fence.proxy.acquire #nvvm.mem_scope %addr, %c128 - - // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.cluster(ptr {{%[0-9]+}}, i32 128) - nvvm.fence.proxy.acquire #nvvm.mem_scope %addr, %c128 - - // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.gpu(ptr {{%[0-9]+}}, i32 128) - nvvm.fence.proxy.acquire #nvvm.mem_scope %addr, %c128 - - // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.sys(ptr {{%[0-9]+}}, i32 128) - nvvm.fence.proxy.acquire #nvvm.mem_scope %addr, %c128 - llvm.return -} // ----- // CHECK-LABEL: @nvvm_exit